Commit 9cb7528e728624c7f2b361f6a8696f655ab4cc99

Authored by tangwang
1 parent 41e1f8df

店匠体系数据的搜索:mock data -> mysql, mysql->ES

@@ -39,13 +39,13 @@ limiter = Limiter(key_func=get_remote_address) @@ -39,13 +39,13 @@ limiter = Limiter(key_func=get_remote_address)
39 # Add parent directory to path 39 # Add parent directory to path
40 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 40 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
41 41
42 -from config import ConfigLoader, CustomerConfig 42 +from config import ConfigLoader, SearchConfig
43 from utils import ESClient 43 from utils import ESClient
44 from search import Searcher 44 from search import Searcher
45 from query import QueryParser 45 from query import QueryParser
46 46
47 # Global instances 47 # Global instances
48 -_config: Optional[CustomerConfig] = None 48 +_config: Optional[SearchConfig] = None
49 _es_client: Optional[ESClient] = None 49 _es_client: Optional[ESClient] = None
50 _searcher: Optional[Searcher] = None 50 _searcher: Optional[Searcher] = None
51 _query_parser: Optional[QueryParser] = None 51 _query_parser: Optional[QueryParser] = None
@@ -111,7 +111,7 @@ def init_service(es_host: str = "http://localhost:9200"): @@ -111,7 +111,7 @@ def init_service(es_host: str = "http://localhost:9200"):
111 print("Search service ready!") 111 print("Search service ready!")
112 112
113 113
114 -def get_config() -> CustomerConfig: 114 +def get_config() -> SearchConfig:
115 """Get search engine configuration.""" 115 """Get search engine configuration."""
116 if _config is None: 116 if _config is None:
117 raise RuntimeError("Service not initialized") 117 raise RuntimeError("Service not initialized")
config/__init__.py
@@ -14,7 +14,7 @@ from .field_types import ( @@ -14,7 +14,7 @@ from .field_types import (
14 14
15 from .config_loader import ( 15 from .config_loader import (
16 ConfigLoader, 16 ConfigLoader,
17 - CustomerConfig, 17 + SearchConfig,
18 IndexConfig, 18 IndexConfig,
19 RankingConfig, 19 RankingConfig,
20 QueryConfig, 20 QueryConfig,
@@ -38,7 +38,7 @@ __all__ = [ @@ -38,7 +38,7 @@ __all__ = [
38 38
39 # Config loader 39 # Config loader
40 'ConfigLoader', 40 'ConfigLoader',
41 - 'CustomerConfig', 41 + 'SearchConfig',
42 'IndexConfig', 42 'IndexConfig',
43 'RankingConfig', 43 'RankingConfig',
44 'QueryConfig', 44 'QueryConfig',
config/config_loader.py
1 """ 1 """
2 -Configuration loader and validator for customer-specific search configurations. 2 +Configuration loader and validator for search engine configurations.
3 3
4 This module handles loading, parsing, and validating YAML configuration files 4 This module handles loading, parsing, and validating YAML configuration files
5 -that define how each customer's data should be indexed and searched. 5 +that define how search engine data should be indexed and searched.
6 """ 6 """
7 7
8 import yaml 8 import yaml
@@ -85,7 +85,7 @@ class RerankConfig: @@ -85,7 +85,7 @@ class RerankConfig:
85 85
86 86
87 @dataclass 87 @dataclass
88 -class CustomerConfig: 88 +class SearchConfig:
89 """Complete configuration for search engine (multi-tenant).""" 89 """Complete configuration for search engine (multi-tenant)."""
90 # Field definitions 90 # Field definitions
91 fields: List[FieldConfig] 91 fields: List[FieldConfig]
@@ -161,12 +161,12 @@ class ConfigLoader: @@ -161,12 +161,12 @@ class ConfigLoader:
161 161
162 return rewrite_dict 162 return rewrite_dict
163 163
164 - def load_config(self) -> CustomerConfig: 164 + def load_config(self) -> SearchConfig:
165 """ 165 """
166 Load unified configuration from YAML file. 166 Load unified configuration from YAML file.
167 167
168 Returns: 168 Returns:
169 - CustomerConfig object 169 + SearchConfig object
170 170
171 Raises: 171 Raises:
172 ConfigurationError: If config file not found or invalid 172 ConfigurationError: If config file not found or invalid
@@ -182,8 +182,8 @@ class ConfigLoader: @@ -182,8 +182,8 @@ class ConfigLoader:
182 182
183 return self._parse_config(config_data) 183 return self._parse_config(config_data)
184 184
185 - def _parse_config(self, config_data: Dict[str, Any]) -> CustomerConfig:  
186 - """Parse configuration dictionary into CustomerConfig object.""" 185 + def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig:
  186 + """Parse configuration dictionary into SearchConfig object."""
187 187
188 # Parse fields 188 # Parse fields
189 fields = [] 189 fields = []
@@ -243,7 +243,7 @@ class ConfigLoader: @@ -243,7 +243,7 @@ class ConfigLoader:
243 inner_hits_size=spu_data.get("inner_hits_size", 3) 243 inner_hits_size=spu_data.get("inner_hits_size", 3)
244 ) 244 )
245 245
246 - return CustomerConfig( 246 + return SearchConfig(
247 fields=fields, 247 fields=fields,
248 indexes=indexes, 248 indexes=indexes,
249 query_config=query_config, 249 query_config=query_config,
@@ -312,12 +312,12 @@ class ConfigLoader: @@ -312,12 +312,12 @@ class ConfigLoader:
312 language_field_mapping=language_field_mapping 312 language_field_mapping=language_field_mapping
313 ) 313 )
314 314
315 - def validate_config(self, config: CustomerConfig) -> List[str]: 315 + def validate_config(self, config: SearchConfig) -> List[str]:
316 """ 316 """
317 - Validate customer configuration. 317 + Validate search configuration.
318 318
319 Args: 319 Args:
320 - config: Customer configuration to validate 320 + config: Search configuration to validate
321 321
322 Returns: 322 Returns:
323 List of validation error messages (empty if valid) 323 List of validation error messages (empty if valid)
@@ -407,7 +407,7 @@ class ConfigLoader: @@ -407,7 +407,7 @@ class ConfigLoader:
407 407
408 return errors 408 return errors
409 409
410 - def save_config(self, config: CustomerConfig, output_path: Optional[str] = None) -> None: 410 + def save_config(self, config: SearchConfig, output_path: Optional[str] = None) -> None:
411 """ 411 """
412 Save configuration to YAML file. 412 Save configuration to YAML file.
413 413
config/env_config.py
@@ -31,9 +31,6 @@ REDIS_CONFIG = { @@ -31,9 +31,6 @@ REDIS_CONFIG = {
31 # DeepL API Key 31 # DeepL API Key
32 DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY') 32 DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY')
33 33
34 -# Customer Configuration  
35 -CUSTOMER_ID = os.getenv('CUSTOMER_ID', 'customer1')  
36 -  
37 # API Service Configuration 34 # API Service Configuration
38 API_HOST = os.getenv('API_HOST', '0.0.0.0') 35 API_HOST = os.getenv('API_HOST', '0.0.0.0')
39 API_PORT = int(os.getenv('API_PORT', 6002)) 36 API_PORT = int(os.getenv('API_PORT', 6002))
@@ -74,11 +71,6 @@ def get_deepl_key() -> str: @@ -74,11 +71,6 @@ def get_deepl_key() -> str:
74 return DEEPL_AUTH_KEY 71 return DEEPL_AUTH_KEY
75 72
76 73
77 -def get_customer_id() -> str:  
78 - """Get default customer ID."""  
79 - return CUSTOMER_ID  
80 -  
81 -  
82 def get_db_config() -> Dict[str, Any]: 74 def get_db_config() -> Dict[str, Any]:
83 """Get MySQL database configuration.""" 75 """Get MySQL database configuration."""
84 return DB_CONFIG.copy() 76 return DB_CONFIG.copy()
@@ -103,9 +95,6 @@ def print_config(): @@ -103,9 +95,6 @@ def print_config():
103 print("\nDeepL:") 95 print("\nDeepL:")
104 print(f" API Key: {'*' * 10 if DEEPL_AUTH_KEY else 'None (translation disabled)'}") 96 print(f" API Key: {'*' * 10 if DEEPL_AUTH_KEY else 'None (translation disabled)'}")
105 97
106 - print("\nCustomer:")  
107 - print(f" Customer ID: {CUSTOMER_ID}")  
108 -  
109 print("\nAPI Service:") 98 print("\nAPI Service:")
110 print(f" Host: {API_HOST}") 99 print(f" Host: {API_HOST}")
111 print(f" Port: {API_PORT}") 100 print(f" Port: {API_PORT}")
indexer/data_transformer.py
@@ -8,7 +8,7 @@ import pandas as pd @@ -8,7 +8,7 @@ import pandas as pd
8 import numpy as np 8 import numpy as np
9 import datetime 9 import datetime
10 from typing import Dict, Any, List, Optional 10 from typing import Dict, Any, List, Optional
11 -from config import CustomerConfig, FieldConfig, FieldType 11 +from config import SearchConfig, FieldConfig, FieldType
12 from embeddings import BgeEncoder, CLIPImageEncoder 12 from embeddings import BgeEncoder, CLIPImageEncoder
13 from utils.cache import EmbeddingCache 13 from utils.cache import EmbeddingCache
14 14
@@ -18,7 +18,7 @@ class DataTransformer: @@ -18,7 +18,7 @@ class DataTransformer:
18 18
19 def __init__( 19 def __init__(
20 self, 20 self,
21 - config: CustomerConfig, 21 + config: SearchConfig,
22 text_encoder: Optional[BgeEncoder] = None, 22 text_encoder: Optional[BgeEncoder] = None,
23 image_encoder: Optional[CLIPImageEncoder] = None, 23 image_encoder: Optional[CLIPImageEncoder] = None,
24 use_cache: bool = True 24 use_cache: bool = True
@@ -27,7 +27,7 @@ class DataTransformer: @@ -27,7 +27,7 @@ class DataTransformer:
27 Initialize data transformer. 27 Initialize data transformer.
28 28
29 Args: 29 Args:
30 - config: Customer configuration 30 + config: Search configuration
31 text_encoder: Text embedding encoder (lazy loaded if not provided) 31 text_encoder: Text embedding encoder (lazy loaded if not provided)
32 image_encoder: Image embedding encoder (lazy loaded if not provided) 32 image_encoder: Image embedding encoder (lazy loaded if not provided)
33 use_cache: Whether to use embedding cache 33 use_cache: Whether to use embedding cache
indexer/mapping_generator.py
1 """ 1 """
2 Elasticsearch mapping generator. 2 Elasticsearch mapping generator.
3 3
4 -Generates Elasticsearch index mappings from customer configuration. 4 +Generates Elasticsearch index mappings from search configuration.
5 """ 5 """
6 6
7 from typing import Dict, Any 7 from typing import Dict, Any
8 from config import ( 8 from config import (
9 - CustomerConfig, 9 + SearchConfig,
10 FieldConfig, 10 FieldConfig,
11 get_es_mapping_for_field, 11 get_es_mapping_for_field,
12 get_default_analyzers, 12 get_default_analyzers,
@@ -15,9 +15,9 @@ from config import ( @@ -15,9 +15,9 @@ from config import (
15 15
16 16
17 class MappingGenerator: 17 class MappingGenerator:
18 - """Generates Elasticsearch mapping from customer configuration.""" 18 + """Generates Elasticsearch mapping from search configuration."""
19 19
20 - def __init__(self, config: CustomerConfig): 20 + def __init__(self, config: SearchConfig):
21 self.config = config 21 self.config = config
22 22
23 def generate_mapping(self) -> Dict[str, Any]: 23 def generate_mapping(self) -> Dict[str, Any]:
query/query_parser.py
@@ -7,7 +7,7 @@ Handles query rewriting, translation, and embedding generation. @@ -7,7 +7,7 @@ Handles query rewriting, translation, and embedding generation.
7 from typing import Dict, List, Optional, Any 7 from typing import Dict, List, Optional, Any
8 import numpy as np 8 import numpy as np
9 9
10 -from config import CustomerConfig, QueryConfig 10 +from config import SearchConfig, QueryConfig
11 from embeddings import BgeEncoder 11 from embeddings import BgeEncoder
12 from .language_detector import LanguageDetector 12 from .language_detector import LanguageDetector
13 from .translator import Translator 13 from .translator import Translator
@@ -61,7 +61,7 @@ class QueryParser: @@ -61,7 +61,7 @@ class QueryParser:
61 61
62 def __init__( 62 def __init__(
63 self, 63 self,
64 - config: CustomerConfig, 64 + config: SearchConfig,
65 text_encoder: Optional[BgeEncoder] = None, 65 text_encoder: Optional[BgeEncoder] = None,
66 translator: Optional[Translator] = None 66 translator: Optional[Translator] = None
67 ): 67 ):
@@ -69,7 +69,7 @@ class QueryParser: @@ -69,7 +69,7 @@ class QueryParser:
69 Initialize query parser. 69 Initialize query parser.
70 70
71 Args: 71 Args:
72 - config: Customer configuration 72 + config: Search configuration
73 text_encoder: Text embedding encoder (lazy loaded if not provided) 73 text_encoder: Text embedding encoder (lazy loaded if not provided)
74 translator: Translator instance (lazy loaded if not provided) 74 translator: Translator instance (lazy loaded if not provided)
75 """ 75 """
search/multilang_query_builder.py
@@ -9,7 +9,7 @@ maintaining a unified external interface. @@ -9,7 +9,7 @@ maintaining a unified external interface.
9 from typing import Dict, Any, List, Optional 9 from typing import Dict, Any, List, Optional
10 import numpy as np 10 import numpy as np
11 11
12 -from config import CustomerConfig, IndexConfig 12 +from config import SearchConfig, IndexConfig
13 from query import ParsedQuery 13 from query import ParsedQuery
14 from .es_query_builder import ESQueryBuilder 14 from .es_query_builder import ESQueryBuilder
15 15
@@ -26,7 +26,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -26,7 +26,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
26 26
27 def __init__( 27 def __init__(
28 self, 28 self,
29 - config: CustomerConfig, 29 + config: SearchConfig,
30 index_name: str, 30 index_name: str,
31 text_embedding_field: Optional[str] = None, 31 text_embedding_field: Optional[str] = None,
32 image_embedding_field: Optional[str] = None, 32 image_embedding_field: Optional[str] = None,
search/searcher.py
@@ -7,7 +7,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. @@ -7,7 +7,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting.
7 from typing import Dict, Any, List, Optional, Union 7 from typing import Dict, Any, List, Optional, Union
8 import time 8 import time
9 9
10 -from config import CustomerConfig 10 +from config import SearchConfig
11 from utils.es_client import ESClient 11 from utils.es_client import ESClient
12 from query import QueryParser, ParsedQuery 12 from query import QueryParser, ParsedQuery
13 from indexer import MappingGenerator 13 from indexer import MappingGenerator
@@ -75,7 +75,7 @@ class Searcher: @@ -75,7 +75,7 @@ class Searcher:
75 75
76 def __init__( 76 def __init__(
77 self, 77 self,
78 - config: CustomerConfig, 78 + config: SearchConfig,
79 es_client: ESClient, 79 es_client: ESClient,
80 query_parser: Optional[QueryParser] = None 80 query_parser: Optional[QueryParser] = None
81 ): 81 ):
@@ -83,7 +83,7 @@ class Searcher: @@ -83,7 +83,7 @@ class Searcher:
83 Initialize searcher. 83 Initialize searcher.
84 84
85 Args: 85 Args:
86 - config: Customer configuration 86 + config: Search configuration
87 es_client: Elasticsearch client 87 es_client: Elasticsearch client
88 query_parser: Query parser (created if not provided) 88 query_parser: Query parser (created if not provided)
89 """ 89 """
test_source_fields.py
@@ -7,7 +7,7 @@ import sys @@ -7,7 +7,7 @@ import sys
7 import os 7 import os
8 sys.path.append(os.path.dirname(os.path.abspath(__file__))) 8 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
9 9
10 -from config import ConfigLoader, CustomerConfig 10 +from config import ConfigLoader, SearchConfig
11 11
12 def test_source_fields_config(): 12 def test_source_fields_config():
13 """测试source_fields配置是否正确加载""" 13 """测试source_fields配置是否正确加载"""
@@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock
15 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 15 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
16 sys.path.insert(0, project_root) 16 sys.path.insert(0, project_root)
17 17
18 -from config import CustomerConfig, QueryConfig, IndexConfig, FieldConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig 18 +from config import SearchConfig, QueryConfig, IndexConfig, FieldConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig
19 from config.field_types import FieldType, AnalyzerType 19 from config.field_types import FieldType, AnalyzerType
20 from utils.es_client import ESClient 20 from utils.es_client import ESClient
21 from search import Searcher 21 from search import Searcher
@@ -51,8 +51,8 @@ def sample_index_config() -> IndexConfig: @@ -51,8 +51,8 @@ def sample_index_config() -> IndexConfig:
51 51
52 52
53 @pytest.fixture 53 @pytest.fixture
54 -def sample_customer_config(sample_index_config) -> CustomerConfig:  
55 - """样例客户配置""" 54 +def sample_search_config(sample_index_config) -> SearchConfig:
  55 + """样例搜索配置"""
56 query_config = QueryConfig( 56 query_config = QueryConfig(
57 enable_query_rewrite=True, 57 enable_query_rewrite=True,
58 enable_translation=True, 58 enable_translation=True,
@@ -74,7 +74,7 @@ def sample_customer_config(sample_index_config) -> CustomerConfig: @@ -74,7 +74,7 @@ def sample_customer_config(sample_index_config) -> CustomerConfig:
74 function_score_config = FunctionScoreConfig() 74 function_score_config = FunctionScoreConfig()
75 rerank_config = RerankConfig() 75 rerank_config = RerankConfig()
76 76
77 - return CustomerConfig( 77 + return SearchConfig(
78 es_index_name="test_products", 78 es_index_name="test_products",
79 fields=[ 79 fields=[
80 FieldConfig(name="tenant_id", field_type=FieldType.KEYWORD, required=True), 80 FieldConfig(name="tenant_id", field_type=FieldType.KEYWORD, required=True),
@@ -134,18 +134,18 @@ def mock_es_client() -> Mock: @@ -134,18 +134,18 @@ def mock_es_client() -> Mock:
134 134
135 135
136 @pytest.fixture 136 @pytest.fixture
137 -def test_searcher(sample_customer_config, mock_es_client) -> Searcher: 137 +def test_searcher(sample_search_config, mock_es_client) -> Searcher:
138 """测试用Searcher实例""" 138 """测试用Searcher实例"""
139 return Searcher( 139 return Searcher(
140 - config=sample_customer_config, 140 + config=sample_search_config,
141 es_client=mock_es_client 141 es_client=mock_es_client
142 ) 142 )
143 143
144 144
145 @pytest.fixture 145 @pytest.fixture
146 -def test_query_parser(sample_customer_config) -> QueryParser: 146 +def test_query_parser(sample_search_config) -> QueryParser:
147 """测试用QueryParser实例""" 147 """测试用QueryParser实例"""
148 - return QueryParser(sample_customer_config) 148 + return QueryParser(sample_search_config)
149 149
150 150
151 @pytest.fixture 151 @pytest.fixture