Commit 9cb7528e728624c7f2b361f6a8696f655ab4cc99
1 parent
41e1f8df
店匠体系数据的搜索:mock data -> mysql, mysql->ES
Showing
11 changed files
with
41 additions
and
52 deletions
Show diff stats
api/app.py
| ... | ... | @@ -39,13 +39,13 @@ limiter = Limiter(key_func=get_remote_address) |
| 39 | 39 | # Add parent directory to path |
| 40 | 40 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 41 | 41 | |
| 42 | -from config import ConfigLoader, CustomerConfig | |
| 42 | +from config import ConfigLoader, SearchConfig | |
| 43 | 43 | from utils import ESClient |
| 44 | 44 | from search import Searcher |
| 45 | 45 | from query import QueryParser |
| 46 | 46 | |
| 47 | 47 | # Global instances |
| 48 | -_config: Optional[CustomerConfig] = None | |
| 48 | +_config: Optional[SearchConfig] = None | |
| 49 | 49 | _es_client: Optional[ESClient] = None |
| 50 | 50 | _searcher: Optional[Searcher] = None |
| 51 | 51 | _query_parser: Optional[QueryParser] = None |
| ... | ... | @@ -111,7 +111,7 @@ def init_service(es_host: str = "http://localhost:9200"): |
| 111 | 111 | print("Search service ready!") |
| 112 | 112 | |
| 113 | 113 | |
| 114 | -def get_config() -> CustomerConfig: | |
| 114 | +def get_config() -> SearchConfig: | |
| 115 | 115 | """Get search engine configuration.""" |
| 116 | 116 | if _config is None: |
| 117 | 117 | raise RuntimeError("Service not initialized") | ... | ... |
config/__init__.py
| ... | ... | @@ -14,7 +14,7 @@ from .field_types import ( |
| 14 | 14 | |
| 15 | 15 | from .config_loader import ( |
| 16 | 16 | ConfigLoader, |
| 17 | - CustomerConfig, | |
| 17 | + SearchConfig, | |
| 18 | 18 | IndexConfig, |
| 19 | 19 | RankingConfig, |
| 20 | 20 | QueryConfig, |
| ... | ... | @@ -38,7 +38,7 @@ __all__ = [ |
| 38 | 38 | |
| 39 | 39 | # Config loader |
| 40 | 40 | 'ConfigLoader', |
| 41 | - 'CustomerConfig', | |
| 41 | + 'SearchConfig', | |
| 42 | 42 | 'IndexConfig', |
| 43 | 43 | 'RankingConfig', |
| 44 | 44 | 'QueryConfig', | ... | ... |
config/config_loader.py
| 1 | 1 | """ |
| 2 | -Configuration loader and validator for customer-specific search configurations. | |
| 2 | +Configuration loader and validator for search engine configurations. | |
| 3 | 3 | |
| 4 | 4 | This module handles loading, parsing, and validating YAML configuration files |
| 5 | -that define how each customer's data should be indexed and searched. | |
| 5 | +that define how search engine data should be indexed and searched. | |
| 6 | 6 | """ |
| 7 | 7 | |
| 8 | 8 | import yaml |
| ... | ... | @@ -85,7 +85,7 @@ class RerankConfig: |
| 85 | 85 | |
| 86 | 86 | |
| 87 | 87 | @dataclass |
| 88 | -class CustomerConfig: | |
| 88 | +class SearchConfig: | |
| 89 | 89 | """Complete configuration for search engine (multi-tenant).""" |
| 90 | 90 | # Field definitions |
| 91 | 91 | fields: List[FieldConfig] |
| ... | ... | @@ -161,12 +161,12 @@ class ConfigLoader: |
| 161 | 161 | |
| 162 | 162 | return rewrite_dict |
| 163 | 163 | |
| 164 | - def load_config(self) -> CustomerConfig: | |
| 164 | + def load_config(self) -> SearchConfig: | |
| 165 | 165 | """ |
| 166 | 166 | Load unified configuration from YAML file. |
| 167 | 167 | |
| 168 | 168 | Returns: |
| 169 | - CustomerConfig object | |
| 169 | + SearchConfig object | |
| 170 | 170 | |
| 171 | 171 | Raises: |
| 172 | 172 | ConfigurationError: If config file not found or invalid |
| ... | ... | @@ -182,8 +182,8 @@ class ConfigLoader: |
| 182 | 182 | |
| 183 | 183 | return self._parse_config(config_data) |
| 184 | 184 | |
| 185 | - def _parse_config(self, config_data: Dict[str, Any]) -> CustomerConfig: | |
| 186 | - """Parse configuration dictionary into CustomerConfig object.""" | |
| 185 | + def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig: | |
| 186 | + """Parse configuration dictionary into SearchConfig object.""" | |
| 187 | 187 | |
| 188 | 188 | # Parse fields |
| 189 | 189 | fields = [] |
| ... | ... | @@ -243,7 +243,7 @@ class ConfigLoader: |
| 243 | 243 | inner_hits_size=spu_data.get("inner_hits_size", 3) |
| 244 | 244 | ) |
| 245 | 245 | |
| 246 | - return CustomerConfig( | |
| 246 | + return SearchConfig( | |
| 247 | 247 | fields=fields, |
| 248 | 248 | indexes=indexes, |
| 249 | 249 | query_config=query_config, |
| ... | ... | @@ -312,12 +312,12 @@ class ConfigLoader: |
| 312 | 312 | language_field_mapping=language_field_mapping |
| 313 | 313 | ) |
| 314 | 314 | |
| 315 | - def validate_config(self, config: CustomerConfig) -> List[str]: | |
| 315 | + def validate_config(self, config: SearchConfig) -> List[str]: | |
| 316 | 316 | """ |
| 317 | - Validate customer configuration. | |
| 317 | + Validate search configuration. | |
| 318 | 318 | |
| 319 | 319 | Args: |
| 320 | - config: Customer configuration to validate | |
| 320 | + config: Search configuration to validate | |
| 321 | 321 | |
| 322 | 322 | Returns: |
| 323 | 323 | List of validation error messages (empty if valid) |
| ... | ... | @@ -407,7 +407,7 @@ class ConfigLoader: |
| 407 | 407 | |
| 408 | 408 | return errors |
| 409 | 409 | |
| 410 | - def save_config(self, config: CustomerConfig, output_path: Optional[str] = None) -> None: | |
| 410 | + def save_config(self, config: SearchConfig, output_path: Optional[str] = None) -> None: | |
| 411 | 411 | """ |
| 412 | 412 | Save configuration to YAML file. |
| 413 | 413 | ... | ... |
config/env_config.py
| ... | ... | @@ -31,9 +31,6 @@ REDIS_CONFIG = { |
| 31 | 31 | # DeepL API Key |
| 32 | 32 | DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY') |
| 33 | 33 | |
| 34 | -# Customer Configuration | |
| 35 | -CUSTOMER_ID = os.getenv('CUSTOMER_ID', 'customer1') | |
| 36 | - | |
| 37 | 34 | # API Service Configuration |
| 38 | 35 | API_HOST = os.getenv('API_HOST', '0.0.0.0') |
| 39 | 36 | API_PORT = int(os.getenv('API_PORT', 6002)) |
| ... | ... | @@ -74,11 +71,6 @@ def get_deepl_key() -> str: |
| 74 | 71 | return DEEPL_AUTH_KEY |
| 75 | 72 | |
| 76 | 73 | |
| 77 | -def get_customer_id() -> str: | |
| 78 | - """Get default customer ID.""" | |
| 79 | - return CUSTOMER_ID | |
| 80 | - | |
| 81 | - | |
| 82 | 74 | def get_db_config() -> Dict[str, Any]: |
| 83 | 75 | """Get MySQL database configuration.""" |
| 84 | 76 | return DB_CONFIG.copy() |
| ... | ... | @@ -103,9 +95,6 @@ def print_config(): |
| 103 | 95 | print("\nDeepL:") |
| 104 | 96 | print(f" API Key: {'*' * 10 if DEEPL_AUTH_KEY else 'None (translation disabled)'}") |
| 105 | 97 | |
| 106 | - print("\nCustomer:") | |
| 107 | - print(f" Customer ID: {CUSTOMER_ID}") | |
| 108 | - | |
| 109 | 98 | print("\nAPI Service:") |
| 110 | 99 | print(f" Host: {API_HOST}") |
| 111 | 100 | print(f" Port: {API_PORT}") | ... | ... |
indexer/data_transformer.py
| ... | ... | @@ -8,7 +8,7 @@ import pandas as pd |
| 8 | 8 | import numpy as np |
| 9 | 9 | import datetime |
| 10 | 10 | from typing import Dict, Any, List, Optional |
| 11 | -from config import CustomerConfig, FieldConfig, FieldType | |
| 11 | +from config import SearchConfig, FieldConfig, FieldType | |
| 12 | 12 | from embeddings import BgeEncoder, CLIPImageEncoder |
| 13 | 13 | from utils.cache import EmbeddingCache |
| 14 | 14 | |
| ... | ... | @@ -18,7 +18,7 @@ class DataTransformer: |
| 18 | 18 | |
| 19 | 19 | def __init__( |
| 20 | 20 | self, |
| 21 | - config: CustomerConfig, | |
| 21 | + config: SearchConfig, | |
| 22 | 22 | text_encoder: Optional[BgeEncoder] = None, |
| 23 | 23 | image_encoder: Optional[CLIPImageEncoder] = None, |
| 24 | 24 | use_cache: bool = True |
| ... | ... | @@ -27,7 +27,7 @@ class DataTransformer: |
| 27 | 27 | Initialize data transformer. |
| 28 | 28 | |
| 29 | 29 | Args: |
| 30 | - config: Customer configuration | |
| 30 | + config: Search configuration | |
| 31 | 31 | text_encoder: Text embedding encoder (lazy loaded if not provided) |
| 32 | 32 | image_encoder: Image embedding encoder (lazy loaded if not provided) |
| 33 | 33 | use_cache: Whether to use embedding cache | ... | ... |
indexer/mapping_generator.py
| 1 | 1 | """ |
| 2 | 2 | Elasticsearch mapping generator. |
| 3 | 3 | |
| 4 | -Generates Elasticsearch index mappings from customer configuration. | |
| 4 | +Generates Elasticsearch index mappings from search configuration. | |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | 7 | from typing import Dict, Any |
| 8 | 8 | from config import ( |
| 9 | - CustomerConfig, | |
| 9 | + SearchConfig, | |
| 10 | 10 | FieldConfig, |
| 11 | 11 | get_es_mapping_for_field, |
| 12 | 12 | get_default_analyzers, |
| ... | ... | @@ -15,9 +15,9 @@ from config import ( |
| 15 | 15 | |
| 16 | 16 | |
| 17 | 17 | class MappingGenerator: |
| 18 | - """Generates Elasticsearch mapping from customer configuration.""" | |
| 18 | + """Generates Elasticsearch mapping from search configuration.""" | |
| 19 | 19 | |
| 20 | - def __init__(self, config: CustomerConfig): | |
| 20 | + def __init__(self, config: SearchConfig): | |
| 21 | 21 | self.config = config |
| 22 | 22 | |
| 23 | 23 | def generate_mapping(self) -> Dict[str, Any]: | ... | ... |
query/query_parser.py
| ... | ... | @@ -7,7 +7,7 @@ Handles query rewriting, translation, and embedding generation. |
| 7 | 7 | from typing import Dict, List, Optional, Any |
| 8 | 8 | import numpy as np |
| 9 | 9 | |
| 10 | -from config import CustomerConfig, QueryConfig | |
| 10 | +from config import SearchConfig, QueryConfig | |
| 11 | 11 | from embeddings import BgeEncoder |
| 12 | 12 | from .language_detector import LanguageDetector |
| 13 | 13 | from .translator import Translator |
| ... | ... | @@ -61,7 +61,7 @@ class QueryParser: |
| 61 | 61 | |
| 62 | 62 | def __init__( |
| 63 | 63 | self, |
| 64 | - config: CustomerConfig, | |
| 64 | + config: SearchConfig, | |
| 65 | 65 | text_encoder: Optional[BgeEncoder] = None, |
| 66 | 66 | translator: Optional[Translator] = None |
| 67 | 67 | ): |
| ... | ... | @@ -69,7 +69,7 @@ class QueryParser: |
| 69 | 69 | Initialize query parser. |
| 70 | 70 | |
| 71 | 71 | Args: |
| 72 | - config: Customer configuration | |
| 72 | + config: Search configuration | |
| 73 | 73 | text_encoder: Text embedding encoder (lazy loaded if not provided) |
| 74 | 74 | translator: Translator instance (lazy loaded if not provided) |
| 75 | 75 | """ | ... | ... |
search/multilang_query_builder.py
| ... | ... | @@ -9,7 +9,7 @@ maintaining a unified external interface. |
| 9 | 9 | from typing import Dict, Any, List, Optional |
| 10 | 10 | import numpy as np |
| 11 | 11 | |
| 12 | -from config import CustomerConfig, IndexConfig | |
| 12 | +from config import SearchConfig, IndexConfig | |
| 13 | 13 | from query import ParsedQuery |
| 14 | 14 | from .es_query_builder import ESQueryBuilder |
| 15 | 15 | |
| ... | ... | @@ -26,7 +26,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 26 | 26 | |
| 27 | 27 | def __init__( |
| 28 | 28 | self, |
| 29 | - config: CustomerConfig, | |
| 29 | + config: SearchConfig, | |
| 30 | 30 | index_name: str, |
| 31 | 31 | text_embedding_field: Optional[str] = None, |
| 32 | 32 | image_embedding_field: Optional[str] = None, | ... | ... |
search/searcher.py
| ... | ... | @@ -7,7 +7,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. |
| 7 | 7 | from typing import Dict, Any, List, Optional, Union |
| 8 | 8 | import time |
| 9 | 9 | |
| 10 | -from config import CustomerConfig | |
| 10 | +from config import SearchConfig | |
| 11 | 11 | from utils.es_client import ESClient |
| 12 | 12 | from query import QueryParser, ParsedQuery |
| 13 | 13 | from indexer import MappingGenerator |
| ... | ... | @@ -75,7 +75,7 @@ class Searcher: |
| 75 | 75 | |
| 76 | 76 | def __init__( |
| 77 | 77 | self, |
| 78 | - config: CustomerConfig, | |
| 78 | + config: SearchConfig, | |
| 79 | 79 | es_client: ESClient, |
| 80 | 80 | query_parser: Optional[QueryParser] = None |
| 81 | 81 | ): |
| ... | ... | @@ -83,7 +83,7 @@ class Searcher: |
| 83 | 83 | Initialize searcher. |
| 84 | 84 | |
| 85 | 85 | Args: |
| 86 | - config: Customer configuration | |
| 86 | + config: Search configuration | |
| 87 | 87 | es_client: Elasticsearch client |
| 88 | 88 | query_parser: Query parser (created if not provided) |
| 89 | 89 | """ | ... | ... |
test_source_fields.py
| ... | ... | @@ -7,7 +7,7 @@ import sys |
| 7 | 7 | import os |
| 8 | 8 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) |
| 9 | 9 | |
| 10 | -from config import ConfigLoader, CustomerConfig | |
| 10 | +from config import ConfigLoader, SearchConfig | |
| 11 | 11 | |
| 12 | 12 | def test_source_fields_config(): |
| 13 | 13 | """测试source_fields配置是否正确加载""" | ... | ... |
tests/conftest.py
| ... | ... | @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock |
| 15 | 15 | project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 16 | 16 | sys.path.insert(0, project_root) |
| 17 | 17 | |
| 18 | -from config import CustomerConfig, QueryConfig, IndexConfig, FieldConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig | |
| 18 | +from config import SearchConfig, QueryConfig, IndexConfig, FieldConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig | |
| 19 | 19 | from config.field_types import FieldType, AnalyzerType |
| 20 | 20 | from utils.es_client import ESClient |
| 21 | 21 | from search import Searcher |
| ... | ... | @@ -51,8 +51,8 @@ def sample_index_config() -> IndexConfig: |
| 51 | 51 | |
| 52 | 52 | |
| 53 | 53 | @pytest.fixture |
| 54 | -def sample_customer_config(sample_index_config) -> CustomerConfig: | |
| 55 | - """样例客户配置""" | |
| 54 | +def sample_search_config(sample_index_config) -> SearchConfig: | |
| 55 | + """样例搜索配置""" | |
| 56 | 56 | query_config = QueryConfig( |
| 57 | 57 | enable_query_rewrite=True, |
| 58 | 58 | enable_translation=True, |
| ... | ... | @@ -74,7 +74,7 @@ def sample_customer_config(sample_index_config) -> CustomerConfig: |
| 74 | 74 | function_score_config = FunctionScoreConfig() |
| 75 | 75 | rerank_config = RerankConfig() |
| 76 | 76 | |
| 77 | - return CustomerConfig( | |
| 77 | + return SearchConfig( | |
| 78 | 78 | es_index_name="test_products", |
| 79 | 79 | fields=[ |
| 80 | 80 | FieldConfig(name="tenant_id", field_type=FieldType.KEYWORD, required=True), |
| ... | ... | @@ -134,18 +134,18 @@ def mock_es_client() -> Mock: |
| 134 | 134 | |
| 135 | 135 | |
| 136 | 136 | @pytest.fixture |
| 137 | -def test_searcher(sample_customer_config, mock_es_client) -> Searcher: | |
| 137 | +def test_searcher(sample_search_config, mock_es_client) -> Searcher: | |
| 138 | 138 | """测试用Searcher实例""" |
| 139 | 139 | return Searcher( |
| 140 | - config=sample_customer_config, | |
| 140 | + config=sample_search_config, | |
| 141 | 141 | es_client=mock_es_client |
| 142 | 142 | ) |
| 143 | 143 | |
| 144 | 144 | |
| 145 | 145 | @pytest.fixture |
| 146 | -def test_query_parser(sample_customer_config) -> QueryParser: | |
| 146 | +def test_query_parser(sample_search_config) -> QueryParser: | |
| 147 | 147 | """测试用QueryParser实例""" |
| 148 | - return QueryParser(sample_customer_config) | |
| 148 | + return QueryParser(sample_search_config) | |
| 149 | 149 | |
| 150 | 150 | |
| 151 | 151 | @pytest.fixture | ... | ... |