Compare View

switch
from
...
to
 
@@ -40,14 +40,13 @@ limiter = Limiter(key_func=get_remote_address) @@ -40,14 +40,13 @@ limiter = Limiter(key_func=get_remote_address)
40 # Add parent directory to path 40 # Add parent directory to path
41 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 41 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
42 42
43 -from config import ConfigLoader, SearchConfig  
44 from config.env_config import ES_CONFIG 43 from config.env_config import ES_CONFIG
45 from utils import ESClient 44 from utils import ESClient
46 from search import Searcher 45 from search import Searcher
  46 +from search.query_config import DEFAULT_INDEX_NAME
47 from query import QueryParser 47 from query import QueryParser
48 48
49 # Global instances 49 # Global instances
50 -_config: Optional[SearchConfig] = None  
51 _es_client: Optional[ESClient] = None 50 _es_client: Optional[ESClient] = None
52 _searcher: Optional[Searcher] = None 51 _searcher: Optional[Searcher] = None
53 _query_parser: Optional[QueryParser] = None 52 _query_parser: Optional[QueryParser] = None
@@ -60,20 +59,11 @@ def init_service(es_host: str = "http://localhost:9200"): @@ -60,20 +59,11 @@ def init_service(es_host: str = "http://localhost:9200"):
60 Args: 59 Args:
61 es_host: Elasticsearch host URL 60 es_host: Elasticsearch host URL
62 """ 61 """
63 - global _config, _es_client, _searcher, _query_parser 62 + global _es_client, _searcher, _query_parser
64 63
65 start_time = time.time() 64 start_time = time.time()
66 logger.info("Initializing search service (multi-tenant)") 65 logger.info("Initializing search service (multi-tenant)")
67 66
68 - # Load and validate configuration  
69 - logger.info("Loading configuration...")  
70 - config_loader = ConfigLoader("config/config.yaml")  
71 - _config = config_loader.load_config()  
72 - errors = config_loader.validate_config(_config)  
73 - if errors:  
74 - raise ValueError(f"Configuration validation failed: {errors}")  
75 - logger.info(f"Configuration loaded: {_config.es_index_name}")  
76 -  
77 # Get ES credentials 67 # Get ES credentials
78 es_username = os.getenv('ES_USERNAME') or ES_CONFIG.get('username') 68 es_username = os.getenv('ES_USERNAME') or ES_CONFIG.get('username')
79 es_password = os.getenv('ES_PASSWORD') or ES_CONFIG.get('password') 69 es_password = os.getenv('ES_PASSWORD') or ES_CONFIG.get('password')
@@ -91,20 +81,15 @@ def init_service(es_host: str = "http://localhost:9200"): @@ -91,20 +81,15 @@ def init_service(es_host: str = "http://localhost:9200"):
91 81
92 # Initialize components 82 # Initialize components
93 logger.info("Initializing query parser...") 83 logger.info("Initializing query parser...")
94 - _query_parser = QueryParser(_config) 84 + _query_parser = QueryParser()
95 85
96 logger.info("Initializing searcher...") 86 logger.info("Initializing searcher...")
97 - _searcher = Searcher(_config, _es_client, _query_parser) 87 + _searcher = Searcher(_es_client, _query_parser, index_name=DEFAULT_INDEX_NAME)
98 88
99 elapsed = time.time() - start_time 89 elapsed = time.time() - start_time
100 - logger.info(f"Search service ready! (took {elapsed:.2f}s)") 90 + logger.info(f"Search service ready! (took {elapsed:.2f}s) | Index: {DEFAULT_INDEX_NAME}")
101 91
102 92
103 -def get_config() -> SearchConfig:  
104 - """Get search engine configuration."""  
105 - if _config is None:  
106 - raise RuntimeError("Service not initialized")  
107 - return _config  
108 93
109 94
110 def get_es_client() -> ESClient: 95 def get_es_client() -> ESClient:
@@ -243,8 +228,8 @@ async def health_check(request: Request): @@ -243,8 +228,8 @@ async def health_check(request: Request):
243 """Health check endpoint.""" 228 """Health check endpoint."""
244 try: 229 try:
245 # Check if services are initialized 230 # Check if services are initialized
246 - get_config()  
247 get_es_client() 231 get_es_client()
  232 + get_searcher()
248 233
249 return { 234 return {
250 "status": "healthy", 235 "status": "healthy",
@@ -69,6 +69,10 @@ class SearchRequest(BaseModel): @@ -69,6 +69,10 @@ class SearchRequest(BaseModel):
69 query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") 69 query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)")
70 size: int = Field(10, ge=1, le=100, description="返回结果数量") 70 size: int = Field(10, ge=1, le=100, description="返回结果数量")
71 from_: int = Field(0, ge=0, alias="from", description="分页偏移量") 71 from_: int = Field(0, ge=0, alias="from", description="分页偏移量")
  72 + language: Literal["zh", "en"] = Field(
  73 + "zh",
  74 + description="响应语言:'zh'(中文)或 'en'(英文),用于选择 title/description/vendor 等多语言字段"
  75 + )
72 76
73 # 过滤器 - 精确匹配和多值匹配 77 # 过滤器 - 精确匹配和多值匹配
74 filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( 78 filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field(
@@ -175,28 +179,53 @@ class FacetResult(BaseModel): @@ -175,28 +179,53 @@ class FacetResult(BaseModel):
175 class SkuResult(BaseModel): 179 class SkuResult(BaseModel):
176 """SKU 结果""" 180 """SKU 结果"""
177 sku_id: str = Field(..., description="SKU ID") 181 sku_id: str = Field(..., description="SKU ID")
178 - title: Optional[str] = Field(None, description="SKU标题") 182 + # 与 ES nested skus 结构对齐
179 price: Optional[float] = Field(None, description="价格") 183 price: Optional[float] = Field(None, description="价格")
180 compare_at_price: Optional[float] = Field(None, description="原价") 184 compare_at_price: Optional[float] = Field(None, description="原价")
181 - sku: Optional[str] = Field(None, description="SKU编码") 185 + sku_code: Optional[str] = Field(None, description="SKU编码")
182 stock: int = Field(0, description="库存数量") 186 stock: int = Field(0, description="库存数量")
183 - options: Optional[Dict[str, Any]] = Field(None, description="选项(颜色、尺寸等)") 187 + weight: Optional[float] = Field(None, description="重量")
  188 + weight_unit: Optional[str] = Field(None, description="重量单位")
  189 + option1_value: Optional[str] = Field(None, description="选项1取值(如颜色)")
  190 + option2_value: Optional[str] = Field(None, description="选项2取值(如尺码)")
  191 + option3_value: Optional[str] = Field(None, description="选项3取值")
  192 + image_src: Optional[str] = Field(None, description="SKU图片地址")
184 193
185 194
186 class SpuResult(BaseModel): 195 class SpuResult(BaseModel):
187 """SPU 搜索结果""" 196 """SPU 搜索结果"""
188 spu_id: str = Field(..., description="SPU ID") 197 spu_id: str = Field(..., description="SPU ID")
189 title: Optional[str] = Field(None, description="商品标题") 198 title: Optional[str] = Field(None, description="商品标题")
  199 + brief: Optional[str] = Field(None, description="商品短描述")
190 handle: Optional[str] = Field(None, description="商品handle") 200 handle: Optional[str] = Field(None, description="商品handle")
191 description: Optional[str] = Field(None, description="商品描述") 201 description: Optional[str] = Field(None, description="商品描述")
192 vendor: Optional[str] = Field(None, description="供应商/品牌") 202 vendor: Optional[str] = Field(None, description="供应商/品牌")
193 - category: Optional[str] = Field(None, description="类目")  
194 - tags: Optional[str] = Field(None, description="标签") 203 + category: Optional[str] = Field(None, description="类目(兼容字段,等同于category_name)")
  204 + category_path: Optional[str] = Field(None, description="类目路径(多级,用于面包屑)")
  205 + category_name: Optional[str] = Field(None, description="类目名称(展示用)")
  206 + category_id: Optional[str] = Field(None, description="类目ID")
  207 + category_level: Optional[int] = Field(None, description="类目层级")
  208 + category1_name: Optional[str] = Field(None, description="一级类目名称")
  209 + category2_name: Optional[str] = Field(None, description="二级类目名称")
  210 + category3_name: Optional[str] = Field(None, description="三级类目名称")
  211 + tags: Optional[List[str]] = Field(None, description="标签列表")
195 price: Optional[float] = Field(None, description="价格(min_price)") 212 price: Optional[float] = Field(None, description="价格(min_price)")
196 compare_at_price: Optional[float] = Field(None, description="原价") 213 compare_at_price: Optional[float] = Field(None, description="原价")
197 currency: str = Field("USD", description="货币单位") 214 currency: str = Field("USD", description="货币单位")
198 image_url: Optional[str] = Field(None, description="主图URL") 215 image_url: Optional[str] = Field(None, description="主图URL")
199 in_stock: bool = Field(True, description="是否有库存") 216 in_stock: bool = Field(True, description="是否有库存")
  217 + # SKU 扁平化信息
  218 + sku_prices: Optional[List[float]] = Field(None, description="所有SKU价格列表")
  219 + sku_weights: Optional[List[int]] = Field(None, description="所有SKU重量列表")
  220 + sku_weight_units: Optional[List[str]] = Field(None, description="所有SKU重量单位列表")
  221 + total_inventory: Optional[int] = Field(None, description="总库存")
  222 + option1_name: Optional[str] = Field(None, description="选项1名称(如颜色)")
  223 + option2_name: Optional[str] = Field(None, description="选项2名称(如尺码)")
  224 + option3_name: Optional[str] = Field(None, description="选项3名称")
  225 + specifications: Optional[List[Dict[str, Any]]] = Field(
  226 + None,
  227 + description="规格列表(与 ES specifications 字段对应)"
  228 + )
200 skus: List[SkuResult] = Field(default_factory=list, description="SKU列表") 229 skus: List[SkuResult] = Field(default_factory=list, description="SKU列表")
201 relevance_score: float = Field(..., ge=0.0, description="相关性分数(ES原始分数)") 230 relevance_score: float = Field(..., ge=0.0, description="相关性分数(ES原始分数)")
202 231
api/result_formatter.py
@@ -12,7 +12,8 @@ class ResultFormatter: @@ -12,7 +12,8 @@ class ResultFormatter:
12 @staticmethod 12 @staticmethod
13 def format_search_results( 13 def format_search_results(
14 es_hits: List[Dict[str, Any]], 14 es_hits: List[Dict[str, Any]],
15 - max_score: float = 1.0 15 + max_score: float = 1.0,
  16 + language: str = "zh"
16 ) -> List[SpuResult]: 17 ) -> List[SpuResult]:
17 """ 18 """
18 Convert ES hits to SpuResult list. 19 Convert ES hits to SpuResult list.
@@ -25,6 +26,18 @@ class ResultFormatter: @@ -25,6 +26,18 @@ class ResultFormatter:
25 List of SpuResult objects 26 List of SpuResult objects
26 """ 27 """
27 results = [] 28 results = []
  29 + lang = (language or "zh").lower()
  30 + if lang not in ("zh", "en"):
  31 + lang = "en"
  32 +
  33 + def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
  34 + """从 *_zh / *_en 字段中按语言选择一个值,若目标语言缺失则回退到另一种。"""
  35 + zh_val = src.get(f"{base}_zh")
  36 + en_val = src.get(f"{base}_en")
  37 + if lang == "zh":
  38 + return zh_val or en_val
  39 + else:
  40 + return en_val or zh_val
28 41
29 for hit in es_hits: 42 for hit in es_hits:
30 source = hit.get('_source', {}) 43 source = hit.get('_source', {})
@@ -40,6 +53,14 @@ class ResultFormatter: @@ -40,6 +53,14 @@ class ResultFormatter:
40 except (ValueError, TypeError): 53 except (ValueError, TypeError):
41 relevance_score = 0.0 54 relevance_score = 0.0
42 55
  56 + # Multi-language fields
  57 + title = pick_lang_field(source, "title")
  58 + brief = pick_lang_field(source, "brief")
  59 + description = pick_lang_field(source, "description")
  60 + vendor = pick_lang_field(source, "vendor")
  61 + category_path = pick_lang_field(source, "category_path")
  62 + category_name = pick_lang_field(source, "category_name")
  63 +
43 # Extract SKUs 64 # Extract SKUs
44 skus = [] 65 skus = []
45 skus_data = source.get('skus', []) 66 skus_data = source.get('skus', [])
@@ -62,17 +83,33 @@ class ResultFormatter: @@ -62,17 +83,33 @@ class ResultFormatter:
62 # Build SpuResult 83 # Build SpuResult
63 spu = SpuResult( 84 spu = SpuResult(
64 spu_id=str(source.get('spu_id', '')), 85 spu_id=str(source.get('spu_id', '')),
65 - title=source.get('title'), 86 + title=title,
  87 + brief=brief,
66 handle=source.get('handle'), 88 handle=source.get('handle'),
67 - description=source.get('description'),  
68 - vendor=source.get('vendor'),  
69 - category=source.get('category'), 89 + description=description,
  90 + vendor=vendor,
  91 + category=category_name,
  92 + category_path=category_path,
  93 + category_name=category_name,
  94 + category_id=source.get('category_id'),
  95 + category_level=source.get('category_level'),
  96 + category1_name=source.get('category1_name'),
  97 + category2_name=source.get('category2_name'),
  98 + category3_name=source.get('category3_name'),
70 tags=source.get('tags'), 99 tags=source.get('tags'),
71 price=source.get('min_price'), 100 price=source.get('min_price'),
72 compare_at_price=source.get('compare_at_price'), 101 compare_at_price=source.get('compare_at_price'),
73 currency="USD", # Default currency 102 currency="USD", # Default currency
74 image_url=source.get('image_url'), 103 image_url=source.get('image_url'),
75 in_stock=in_stock, 104 in_stock=in_stock,
  105 + sku_prices=source.get('sku_prices'),
  106 + sku_weights=source.get('sku_weights'),
  107 + sku_weight_units=source.get('sku_weight_units'),
  108 + total_inventory=source.get('total_inventory'),
  109 + option1_name=source.get('option1_name'),
  110 + option2_name=source.get('option2_name'),
  111 + option3_name=source.get('option3_name'),
  112 + specifications=source.get('specifications'),
76 skus=skus, 113 skus=skus,
77 relevance_score=relevance_score 114 relevance_score=relevance_score
78 ) 115 )
@@ -89,6 +126,11 @@ class ResultFormatter: @@ -89,6 +126,11 @@ class ResultFormatter:
89 """ 126 """
90 Format ES aggregations to FacetResult list. 127 Format ES aggregations to FacetResult list.
91 128
  129 + 支持:
  130 + 1. 普通terms聚合
  131 + 2. range聚合
  132 + 3. specifications嵌套聚合(按name分组,然后按value聚合)
  133 +
92 Args: 134 Args:
93 es_aggregations: ES aggregations response 135 es_aggregations: ES aggregations response
94 facet_configs: Facet configurations (optional) 136 facet_configs: Facet configurations (optional)
@@ -100,6 +142,38 @@ class ResultFormatter: @@ -100,6 +142,38 @@ class ResultFormatter:
100 142
101 for field_name, agg_data in es_aggregations.items(): 143 for field_name, agg_data in es_aggregations.items():
102 display_field = field_name[:-6] if field_name.endswith("_facet") else field_name 144 display_field = field_name[:-6] if field_name.endswith("_facet") else field_name
  145 +
  146 + # 处理specifications嵌套分面
  147 + if field_name == "specifications_facet" and 'by_name' in agg_data:
  148 + # specifications嵌套聚合:按name分组,每个name下有value_counts
  149 + by_name_agg = agg_data['by_name']
  150 + if 'buckets' in by_name_agg:
  151 + for name_bucket in by_name_agg['buckets']:
  152 + name = name_bucket['key']
  153 + value_counts = name_bucket.get('value_counts', {})
  154 +
  155 + values = []
  156 + if 'buckets' in value_counts:
  157 + for value_bucket in value_counts['buckets']:
  158 + value = FacetValue(
  159 + value=value_bucket['key'],
  160 + label=str(value_bucket['key']),
  161 + count=value_bucket['doc_count'],
  162 + selected=False
  163 + )
  164 + values.append(value)
  165 +
  166 + # 为每个name创建一个分面结果
  167 + facet = FacetResult(
  168 + field=f"specifications.{name}",
  169 + label=str(name), # 使用name作为label,如"颜色"、"尺寸"
  170 + type="terms",
  171 + values=values,
  172 + total_count=name_bucket['doc_count']
  173 + )
  174 + facets.append(facet)
  175 + continue
  176 +
103 # Handle terms aggregation 177 # Handle terms aggregation
104 if 'buckets' in agg_data: 178 if 'buckets' in agg_data:
105 values = [] 179 values = []
api/routes/search.py
@@ -94,7 +94,8 @@ async def search(request: SearchRequest, http_request: Request): @@ -94,7 +94,8 @@ async def search(request: SearchRequest, http_request: Request):
94 context=context, 94 context=context,
95 sort_by=request.sort_by, 95 sort_by=request.sort_by,
96 sort_order=request.sort_order, 96 sort_order=request.sort_order,
97 - debug=request.debug 97 + debug=request.debug,
  98 + language=request.language,
98 ) 99 )
99 100
100 # Include performance summary in response 101 # Include performance summary in response
docs/ES常用表达式.md
@@ -7,3 +7,14 @@ GET /search_products/_search @@ -7,3 +7,14 @@ GET /search_products/_search
7 } 7 }
8 } 8 }
9 9
  10 +
  11 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  12 + "size": 5,
  13 + "query": {
  14 + "bool": {
  15 + "filter": [
  16 + { "term": { "tenant_id": "162" } }
  17 + ]
  18 + }
  19 + }
  20 + }'
10 \ No newline at end of file 21 \ No newline at end of file
indexer/mapping_generator.py
@@ -51,23 +51,26 @@ def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, An @@ -51,23 +51,26 @@ def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, An
51 Create Elasticsearch index if it doesn't exist. 51 Create Elasticsearch index if it doesn't exist.
52 52
53 Args: 53 Args:
54 - es_client: Elasticsearch client instance 54 + es_client: ESClient instance
55 index_name: Name of the index to create 55 index_name: Name of the index to create
56 mapping: Index mapping configuration. If None, loads from default file. 56 mapping: Index mapping configuration. If None, loads from default file.
57 57
58 Returns: 58 Returns:
59 True if index was created, False if it already exists 59 True if index was created, False if it already exists
60 """ 60 """
61 - if es_client.indices.exists(index=index_name): 61 + if es_client.index_exists(index_name):
62 logger.info(f"Index '{index_name}' already exists") 62 logger.info(f"Index '{index_name}' already exists")
63 return False 63 return False
64 64
65 if mapping is None: 65 if mapping is None:
66 mapping = load_mapping() 66 mapping = load_mapping()
67 67
68 - es_client.indices.create(index=index_name, body=mapping)  
69 - logger.info(f"Index '{index_name}' created successfully")  
70 - return True 68 + if es_client.create_index(index_name, mapping):
  69 + logger.info(f"Index '{index_name}' created successfully")
  70 + return True
  71 + else:
  72 + logger.error(f"Failed to create index '{index_name}'")
  73 + return False
71 74
72 75
73 def delete_index_if_exists(es_client, index_name: str) -> bool: 76 def delete_index_if_exists(es_client, index_name: str) -> bool:
@@ -75,19 +78,22 @@ def delete_index_if_exists(es_client, index_name: str) -> bool: @@ -75,19 +78,22 @@ def delete_index_if_exists(es_client, index_name: str) -> bool:
75 Delete Elasticsearch index if it exists. 78 Delete Elasticsearch index if it exists.
76 79
77 Args: 80 Args:
78 - es_client: Elasticsearch client instance 81 + es_client: ESClient instance
79 index_name: Name of the index to delete 82 index_name: Name of the index to delete
80 83
81 Returns: 84 Returns:
82 True if index was deleted, False if it didn't exist 85 True if index was deleted, False if it didn't exist
83 """ 86 """
84 - if not es_client.indices.exists(index=index_name): 87 + if not es_client.index_exists(index_name):
85 logger.warning(f"Index '{index_name}' does not exist") 88 logger.warning(f"Index '{index_name}' does not exist")
86 return False 89 return False
87 90
88 - es_client.indices.delete(index=index_name)  
89 - logger.info(f"Index '{index_name}' deleted successfully")  
90 - return True 91 + if es_client.delete_index(index_name):
  92 + logger.info(f"Index '{index_name}' deleted successfully")
  93 + return True
  94 + else:
  95 + logger.error(f"Failed to delete index '{index_name}'")
  96 + return False
91 97
92 98
93 def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bool: 99 def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bool:
@@ -95,18 +101,21 @@ def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bo @@ -95,18 +101,21 @@ def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bo
95 Update mapping for existing index (only adding new fields). 101 Update mapping for existing index (only adding new fields).
96 102
97 Args: 103 Args:
98 - es_client: Elasticsearch client instance 104 + es_client: ESClient instance
99 index_name: Name of the index 105 index_name: Name of the index
100 new_fields: New field mappings to add 106 new_fields: New field mappings to add
101 107
102 Returns: 108 Returns:
103 True if successful 109 True if successful
104 """ 110 """
105 - if not es_client.indices.exists(index=index_name): 111 + if not es_client.index_exists(index_name):
106 logger.error(f"Index '{index_name}' does not exist") 112 logger.error(f"Index '{index_name}' does not exist")
107 return False 113 return False
108 114
109 mapping = {"properties": new_fields} 115 mapping = {"properties": new_fields}
110 - es_client.indices.put_mapping(index=index_name, body=mapping)  
111 - logger.info(f"Mapping updated for index '{index_name}'")  
112 - return True 116 + if es_client.update_mapping(index_name, mapping):
  117 + logger.info(f"Mapping updated for index '{index_name}'")
  118 + return True
  119 + else:
  120 + logger.error(f"Failed to update mapping for index '{index_name}'")
  121 + return False
indexer/spu_transformer.py
@@ -124,7 +124,7 @@ class SPUTransformer: @@ -124,7 +124,7 @@ class SPUTransformer:
124 query = text(""" 124 query = text("""
125 SELECT 125 SELECT
126 id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, 126 id, spu_id, shop_id, shoplazza_id, shoplazza_product_id,
127 - position, name, values, tenant_id, 127 + position, name, `values`, tenant_id,
128 creator, create_time, updater, update_time, deleted 128 creator, create_time, updater, update_time, deleted
129 FROM shoplazza_product_option 129 FROM shoplazza_product_option
130 WHERE tenant_id = :tenant_id AND deleted = 0 130 WHERE tenant_id = :tenant_id AND deleted = 0
query/query_parser.py
@@ -8,8 +8,14 @@ from typing import Dict, List, Optional, Any @@ -8,8 +8,14 @@ from typing import Dict, List, Optional, Any
8 import numpy as np 8 import numpy as np
9 import logging 9 import logging
10 10
11 -from config import SearchConfig, QueryConfig  
12 from embeddings import BgeEncoder 11 from embeddings import BgeEncoder
  12 +from search.query_config import (
  13 + ENABLE_TEXT_EMBEDDING,
  14 + ENABLE_TRANSLATION,
  15 + REWRITE_DICTIONARY,
  16 + TRANSLATION_API_KEY,
  17 + TRANSLATION_SERVICE
  18 +)
13 from .language_detector import LanguageDetector 19 from .language_detector import LanguageDetector
14 from .translator import Translator 20 from .translator import Translator
15 from .query_rewriter import QueryRewriter, QueryNormalizer 21 from .query_rewriter import QueryRewriter, QueryNormalizer
@@ -64,7 +70,6 @@ class QueryParser: @@ -64,7 +70,6 @@ class QueryParser:
64 70
65 def __init__( 71 def __init__(
66 self, 72 self,
67 - config: SearchConfig,  
68 text_encoder: Optional[BgeEncoder] = None, 73 text_encoder: Optional[BgeEncoder] = None,
69 translator: Optional[Translator] = None 74 translator: Optional[Translator] = None
70 ): 75 ):
@@ -72,24 +77,21 @@ class QueryParser: @@ -72,24 +77,21 @@ class QueryParser:
72 Initialize query parser. 77 Initialize query parser.
73 78
74 Args: 79 Args:
75 - config: Search configuration  
76 text_encoder: Text embedding encoder (lazy loaded if not provided) 80 text_encoder: Text embedding encoder (lazy loaded if not provided)
77 translator: Translator instance (lazy loaded if not provided) 81 translator: Translator instance (lazy loaded if not provided)
78 """ 82 """
79 - self.config = config  
80 - self.query_config = config.query_config  
81 self._text_encoder = text_encoder 83 self._text_encoder = text_encoder
82 self._translator = translator 84 self._translator = translator
83 85
84 # Initialize components 86 # Initialize components
85 self.normalizer = QueryNormalizer() 87 self.normalizer = QueryNormalizer()
86 self.language_detector = LanguageDetector() 88 self.language_detector = LanguageDetector()
87 - self.rewriter = QueryRewriter(self.query_config.rewrite_dictionary) 89 + self.rewriter = QueryRewriter(REWRITE_DICTIONARY)
88 90
89 @property 91 @property
90 def text_encoder(self) -> BgeEncoder: 92 def text_encoder(self) -> BgeEncoder:
91 """Lazy load text encoder.""" 93 """Lazy load text encoder."""
92 - if self._text_encoder is None and self.query_config.enable_text_embedding: 94 + if self._text_encoder is None and ENABLE_TEXT_EMBEDDING:
93 logger.info("Initializing text encoder (lazy load)...") 95 logger.info("Initializing text encoder (lazy load)...")
94 self._text_encoder = BgeEncoder() 96 self._text_encoder = BgeEncoder()
95 return self._text_encoder 97 return self._text_encoder
@@ -97,13 +99,13 @@ class QueryParser: @@ -97,13 +99,13 @@ class QueryParser:
97 @property 99 @property
98 def translator(self) -> Translator: 100 def translator(self) -> Translator:
99 """Lazy load translator.""" 101 """Lazy load translator."""
100 - if self._translator is None and self.query_config.enable_translation: 102 + if self._translator is None and ENABLE_TRANSLATION:
101 logger.info("Initializing translator (lazy load)...") 103 logger.info("Initializing translator (lazy load)...")
102 self._translator = Translator( 104 self._translator = Translator(
103 - api_key=self.query_config.translation_api_key, 105 + api_key=TRANSLATION_API_KEY,
104 use_cache=True, 106 use_cache=True,
105 - glossary_id=getattr(self.query_config, 'translation_glossary_id', None),  
106 - translation_context=getattr(self.query_config, 'translation_context', 'e-commerce product search') 107 + glossary_id=None, # Can be added to query_config if needed
  108 + translation_context='e-commerce product search'
107 ) 109 )
108 return self._translator 110 return self._translator
109 111
@@ -154,7 +156,7 @@ class QueryParser: @@ -154,7 +156,7 @@ class QueryParser:
154 156
155 # Stage 2: Query rewriting 157 # Stage 2: Query rewriting
156 rewritten = None 158 rewritten = None
157 - if self.query_config.enable_query_rewrite: 159 + if REWRITE_DICTIONARY: # Enable rewrite if dictionary exists
158 rewritten = self.rewriter.rewrite(query_text) 160 rewritten = self.rewriter.rewrite(query_text)
159 if rewritten != query_text: 161 if rewritten != query_text:
160 log_info(f"查询重写 | '{query_text}' -> '{rewritten}'") 162 log_info(f"查询重写 | '{query_text}' -> '{rewritten}'")
@@ -171,26 +173,11 @@ class QueryParser: @@ -171,26 +173,11 @@ class QueryParser:
171 173
172 # Stage 4: Translation 174 # Stage 4: Translation
173 translations = {} 175 translations = {}
174 - if self.query_config.enable_translation: 176 + if ENABLE_TRANSLATION:
175 try: 177 try:
176 # Determine target languages for translation 178 # Determine target languages for translation
177 - # If domain has language_field_mapping, only translate to languages in the mapping  
178 - # Otherwise, use all supported languages  
179 - target_langs_for_translation = self.query_config.supported_languages  
180 -  
181 - # Check if domain has language_field_mapping  
182 - domain_config = next(  
183 - (idx for idx in self.config.indexes if idx.name == domain),  
184 - None  
185 - )  
186 - if domain_config and domain_config.language_field_mapping:  
187 - # Only translate to languages that exist in the mapping  
188 - available_languages = set(domain_config.language_field_mapping.keys())  
189 - target_langs_for_translation = [  
190 - lang for lang in self.query_config.supported_languages  
191 - if lang in available_languages  
192 - ]  
193 - log_debug(f"域 '{domain}' 有语言字段映射,将翻译到: {target_langs_for_translation}") 179 + # Simplified: always translate to Chinese and English
  180 + target_langs_for_translation = ['zh', 'en']
194 181
195 target_langs = self.translator.get_translation_needs( 182 target_langs = self.translator.get_translation_needs(
196 detected_lang, 183 detected_lang,
@@ -200,7 +187,7 @@ class QueryParser: @@ -200,7 +187,7 @@ class QueryParser:
200 if target_langs: 187 if target_langs:
201 log_info(f"开始翻译 | 源语言: {detected_lang} | 目标语言: {target_langs}") 188 log_info(f"开始翻译 | 源语言: {detected_lang} | 目标语言: {target_langs}")
202 # Use e-commerce context for better disambiguation 189 # Use e-commerce context for better disambiguation
203 - translation_context = getattr(self.query_config, 'translation_context', 'e-commerce product search') 190 + translation_context = 'e-commerce product search'
204 translations = self.translator.translate_multi( 191 translations = self.translator.translate_multi(
205 query_text, 192 query_text,
206 target_langs, 193 target_langs,
@@ -223,7 +210,7 @@ class QueryParser: @@ -223,7 +210,7 @@ class QueryParser:
223 # Stage 5: Text embedding 210 # Stage 5: Text embedding
224 query_vector = None 211 query_vector = None
225 if (generate_vector and 212 if (generate_vector and
226 - self.query_config.enable_text_embedding and 213 + ENABLE_TEXT_EMBEDDING and
227 domain == "default"): # Only generate vector for default domain 214 domain == "default"): # Only generate vector for default domain
228 try: 215 try:
229 log_debug("开始生成查询向量") 216 log_debug("开始生成查询向量")
scripts/recreate_and_import.py 0 → 100755
@@ -0,0 +1,184 @@ @@ -0,0 +1,184 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +重建索引并导入数据的脚本。
  4 +
  5 +清除旧索引,使用新的mapping重建索引,然后导入数据。
  6 +"""
  7 +
  8 +import sys
  9 +import os
  10 +import argparse
  11 +from pathlib import Path
  12 +
  13 +# Add parent directory to path
  14 +sys.path.insert(0, str(Path(__file__).parent.parent))
  15 +
  16 +from utils.db_connector import create_db_connection
  17 +from utils.es_client import ESClient
  18 +from indexer.mapping_generator import load_mapping, delete_index_if_exists, DEFAULT_INDEX_NAME
  19 +from indexer.spu_transformer import SPUTransformer
  20 +from indexer.bulk_indexer import BulkIndexer
  21 +
  22 +
  23 +def main():
  24 + parser = argparse.ArgumentParser(description='重建ES索引并导入数据')
  25 +
  26 + # Database connection
  27 + parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)')
  28 + parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)')
  29 + parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)')
  30 + parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)')
  31 + parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)')
  32 +
  33 + # Tenant and ES
  34 + parser.add_argument('--tenant-id', required=True, help='Tenant ID (必需)')
  35 + parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)')
  36 +
  37 + # Options
  38 + parser.add_argument('--batch-size', type=int, default=500, help='批量导入大小 (默认: 500)')
  39 + parser.add_argument('--skip-delete', action='store_true', help='跳过删除旧索引步骤')
  40 +
  41 + args = parser.parse_args()
  42 +
  43 + print("=" * 60)
  44 + print("重建ES索引并导入数据")
  45 + print("=" * 60)
  46 +
  47 + # 加载mapping
  48 + print("\n[1/4] 加载mapping配置...")
  49 + try:
  50 + mapping = load_mapping()
  51 + print(f"✓ 成功加载mapping配置")
  52 + except Exception as e:
  53 + print(f"✗ 加载mapping失败: {e}")
  54 + return 1
  55 +
  56 + index_name = DEFAULT_INDEX_NAME
  57 + print(f"索引名称: {index_name}")
  58 +
  59 + # 连接Elasticsearch
  60 + print("\n[2/4] 连接Elasticsearch...")
  61 + es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200')
  62 + es_username = os.environ.get('ES_USERNAME')
  63 + es_password = os.environ.get('ES_PASSWORD')
  64 +
  65 + print(f"ES地址: {es_host}")
  66 + if es_username:
  67 + print(f"ES用户名: {es_username}")
  68 +
  69 + try:
  70 + if es_username and es_password:
  71 + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password)
  72 + else:
  73 + es_client = ESClient(hosts=[es_host])
  74 +
  75 + if not es_client.ping():
  76 + print(f"✗ 无法连接到Elasticsearch: {es_host}")
  77 + return 1
  78 + print("✓ Elasticsearch连接成功")
  79 + except Exception as e:
  80 + print(f"✗ 连接Elasticsearch失败: {e}")
  81 + return 1
  82 +
  83 + # 删除旧索引
  84 + if not args.skip_delete:
  85 + print("\n[3/4] 删除旧索引...")
  86 + if es_client.index_exists(index_name):
  87 + print(f"发现已存在的索引: {index_name}")
  88 + if delete_index_if_exists(es_client, index_name):
  89 + print(f"✓ 成功删除索引: {index_name}")
  90 + else:
  91 + print(f"✗ 删除索引失败: {index_name}")
  92 + return 1
  93 + else:
  94 + print(f"索引不存在,跳过删除: {index_name}")
  95 + else:
  96 + print("\n[3/4] 跳过删除旧索引步骤")
  97 +
  98 + # 创建新索引
  99 + print("\n[4/4] 创建新索引...")
  100 + try:
  101 + if es_client.index_exists(index_name):
  102 + print(f"✓ 索引已存在: {index_name},跳过创建")
  103 + else:
  104 + print(f"创建索引: {index_name}")
  105 + if es_client.create_index(index_name, mapping):
  106 + print(f"✓ 成功创建索引: {index_name}")
  107 + else:
  108 + print(f"✗ 创建索引失败: {index_name}")
  109 + return 1
  110 + except Exception as e:
  111 + print(f"✗ 创建索引失败: {e}")
  112 + import traceback
  113 + traceback.print_exc()
  114 + return 1
  115 +
  116 + # 连接MySQL
  117 + print("\n[5/5] 连接MySQL...")
  118 + db_host = args.db_host or os.environ.get('DB_HOST')
  119 + db_port = args.db_port or int(os.environ.get('DB_PORT', 3306))
  120 + db_database = args.db_database or os.environ.get('DB_DATABASE')
  121 + db_username = args.db_username or os.environ.get('DB_USERNAME')
  122 + db_password = args.db_password or os.environ.get('DB_PASSWORD')
  123 +
  124 + if not all([db_host, db_database, db_username, db_password]):
  125 + print("✗ MySQL连接参数不完整")
  126 + print("请提供 --db-host, --db-database, --db-username, --db-password")
  127 + print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD")
  128 + return 1
  129 +
  130 + print(f"MySQL: {db_host}:{db_port}/{db_database}")
  131 + try:
  132 + db_engine = create_db_connection(
  133 + host=db_host,
  134 + port=db_port,
  135 + database=db_database,
  136 + username=db_username,
  137 + password=db_password
  138 + )
  139 + print("✓ MySQL连接成功")
  140 + except Exception as e:
  141 + print(f"✗ 连接MySQL失败: {e}")
  142 + return 1
  143 +
  144 + # 导入数据
  145 + print("\n[6/6] 导入数据...")
  146 + print(f"Tenant ID: {args.tenant_id}")
  147 + print(f"批量大小: {args.batch_size}")
  148 +
  149 + try:
  150 + transformer = SPUTransformer(db_engine, args.tenant_id)
  151 + print("正在转换数据...")
  152 + documents = transformer.transform_batch()
  153 + print(f"✓ 转换完成: {len(documents)} 个文档")
  154 +
  155 + if not documents:
  156 + print("⚠ 没有数据需要导入")
  157 + return 0
  158 +
  159 + print(f"正在导入数据到ES (批量大小: {args.batch_size})...")
  160 + indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size)
  161 + results = indexer.index_documents(documents, id_field="spu_id", show_progress=True)
  162 +
  163 + print(f"\n{'='*60}")
  164 + print("导入完成!")
  165 + print(f"{'='*60}")
  166 + print(f"成功: {results['success']}")
  167 + print(f"失败: {results['failed']}")
  168 + print(f"耗时: {results.get('elapsed_time', 0):.2f}秒")
  169 +
  170 + if results['failed'] > 0:
  171 + print(f"\n⚠ 警告: {results['failed']} 个文档导入失败")
  172 + return 1
  173 +
  174 + return 0
  175 + except Exception as e:
  176 + print(f"✗ 导入数据失败: {e}")
  177 + import traceback
  178 + traceback.print_exc()
  179 + return 1
  180 +
  181 +
  182 +if __name__ == '__main__':
  183 + sys.exit(main())
  184 +
search/es_query_builder.py
@@ -330,10 +330,15 @@ class ESQueryBuilder: @@ -330,10 +330,15 @@ class ESQueryBuilder:
330 """ 330 """
331 构建分面聚合。 331 构建分面聚合。
332 332
  333 + 支持:
  334 + 1. 分类分面:category1_name, category2_name, category3_name, category_name
  335 + 2. specifications分面:嵌套聚合,按name聚合,然后按value聚合
  336 +
333 Args: 337 Args:
334 facet_configs: 分面配置列表(标准格式): 338 facet_configs: 分面配置列表(标准格式):
335 - str: 字段名,使用默认 terms 配置 339 - str: 字段名,使用默认 terms 配置
336 - FacetConfig: 详细的分面配置对象 340 - FacetConfig: 详细的分面配置对象
  341 + - 特殊值 "specifications": 构建specifications嵌套分面
337 342
338 Returns: 343 Returns:
339 ES aggregations 字典 344 ES aggregations 字典
@@ -344,6 +349,34 @@ class ESQueryBuilder: @@ -344,6 +349,34 @@ class ESQueryBuilder:
344 aggs = {} 349 aggs = {}
345 350
346 for config in facet_configs: 351 for config in facet_configs:
  352 + # 特殊处理:specifications嵌套分面
  353 + if isinstance(config, str) and config == "specifications":
  354 + # 构建specifications嵌套分面(按name聚合,然后按value聚合)
  355 + aggs["specifications_facet"] = {
  356 + "nested": {
  357 + "path": "specifications"
  358 + },
  359 + "aggs": {
  360 + "by_name": {
  361 + "terms": {
  362 + "field": "specifications.name",
  363 + "size": 20,
  364 + "order": {"_count": "desc"}
  365 + },
  366 + "aggs": {
  367 + "value_counts": {
  368 + "terms": {
  369 + "field": "specifications.value",
  370 + "size": 10,
  371 + "order": {"_count": "desc"}
  372 + }
  373 + }
  374 + }
  375 + }
  376 + }
  377 + }
  378 + continue
  379 +
347 # 简单模式:只有字段名(字符串) 380 # 简单模式:只有字段名(字符串)
348 if isinstance(config, str): 381 if isinstance(config, str):
349 field = config 382 field = config
search/multilang_query_builder.py
@@ -11,9 +11,9 @@ import numpy as np @@ -11,9 +11,9 @@ import numpy as np
11 import logging 11 import logging
12 import re 12 import re
13 13
14 -from config import SearchConfig, IndexConfig  
15 from query import ParsedQuery 14 from query import ParsedQuery
16 from .es_query_builder import ESQueryBuilder 15 from .es_query_builder import ESQueryBuilder
  16 +from .query_config import DEFAULT_MATCH_FIELDS, DOMAIN_FIELDS, FUNCTION_SCORE_CONFIG
17 17
18 logger = logging.getLogger(__name__) 18 logger = logging.getLogger(__name__)
19 19
@@ -30,8 +30,8 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -30,8 +30,8 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
30 30
31 def __init__( 31 def __init__(
32 self, 32 self,
33 - config: SearchConfig,  
34 index_name: str, 33 index_name: str,
  34 + match_fields: Optional[List[str]] = None,
35 text_embedding_field: Optional[str] = None, 35 text_embedding_field: Optional[str] = None,
36 image_embedding_field: Optional[str] = None, 36 image_embedding_field: Optional[str] = None,
37 source_fields: Optional[List[str]] = None 37 source_fields: Optional[List[str]] = None
@@ -40,53 +40,32 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -40,53 +40,32 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
40 Initialize multi-language query builder. 40 Initialize multi-language query builder.
41 41
42 Args: 42 Args:
43 - config: Search configuration  
44 index_name: ES index name 43 index_name: ES index name
  44 + match_fields: Fields to search for text matching (default: from query_config)
45 text_embedding_field: Field name for text embeddings 45 text_embedding_field: Field name for text embeddings
46 image_embedding_field: Field name for image embeddings 46 image_embedding_field: Field name for image embeddings
47 source_fields: Fields to return in search results (_source includes) 47 source_fields: Fields to return in search results (_source includes)
48 """ 48 """
49 - self.config = config  
50 - self.function_score_config = config.function_score 49 + self.function_score_config = FUNCTION_SCORE_CONFIG
51 50
52 - # For default domain, use all fields as fallback  
53 - default_fields = self._get_domain_fields("default") 51 + # Use provided match_fields or default
  52 + if match_fields is None:
  53 + match_fields = DEFAULT_MATCH_FIELDS
54 54
55 super().__init__( 55 super().__init__(
56 index_name=index_name, 56 index_name=index_name,
57 - match_fields=default_fields, 57 + match_fields=match_fields,
58 text_embedding_field=text_embedding_field, 58 text_embedding_field=text_embedding_field,
59 image_embedding_field=image_embedding_field, 59 image_embedding_field=image_embedding_field,
60 source_fields=source_fields 60 source_fields=source_fields
61 ) 61 )
62 62
63 - # Build domain configurations  
64 - self.domain_configs = self._build_domain_configs()  
65 -  
66 - def _build_domain_configs(self) -> Dict[str, IndexConfig]:  
67 - """Build mapping of domain name to IndexConfig."""  
68 - return {index.name: index for index in self.config.indexes} 63 + # Build domain configurations from query_config
  64 + self.domain_configs = DOMAIN_FIELDS
69 65
70 def _get_domain_fields(self, domain_name: str) -> List[str]: 66 def _get_domain_fields(self, domain_name: str) -> List[str]:
71 """Get fields for a specific domain with boost notation.""" 67 """Get fields for a specific domain with boost notation."""
72 - for index in self.config.indexes:  
73 - if index.name == domain_name:  
74 - result = []  
75 - for field_name in index.fields:  
76 - field = self._get_field_by_name(field_name)  
77 - if field and field.boost != 1.0:  
78 - result.append(f"{field_name}^{field.boost}")  
79 - else:  
80 - result.append(field_name)  
81 - return result  
82 - return []  
83 -  
84 - def _get_field_by_name(self, field_name: str):  
85 - """Get field configuration by name."""  
86 - for field in self.config.fields:  
87 - if field.name == field_name:  
88 - return field  
89 - return None 68 + return self.domain_configs.get(domain_name, DEFAULT_MATCH_FIELDS)
90 69
91 def build_multilang_query( 70 def build_multilang_query(
92 self, 71 self,
@@ -103,7 +82,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -103,7 +82,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
103 min_score: Optional[float] = None 82 min_score: Optional[float] = None
104 ) -> Dict[str, Any]: 83 ) -> Dict[str, Any]:
105 """ 84 """
106 - Build ES query with multi-language support (重构版). 85 + Build ES query with multi-language support (简化版).
107 86
108 Args: 87 Args:
109 parsed_query: Parsed query with language info and translations 88 parsed_query: Parsed query with language info and translations
@@ -120,19 +99,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -120,19 +99,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
120 Returns: 99 Returns:
121 ES query DSL dictionary 100 ES query DSL dictionary
122 """ 101 """
123 - domain = parsed_query.domain  
124 - domain_config = self.domain_configs.get(domain)  
125 -  
126 - if not domain_config:  
127 - # Fallback to default domain  
128 - domain = "default"  
129 - domain_config = self.domain_configs.get("default")  
130 -  
131 - if not domain_config:  
132 - # Use original behavior 102 + # 1. 根据域选择匹配字段(默认域使用 DEFAULT_MATCH_FIELDS)
  103 + domain = parsed_query.domain or "default"
  104 + domain_fields = self.domain_configs.get(domain) or DEFAULT_MATCH_FIELDS
  105 +
  106 + # 2. 临时切换 match_fields,复用基类 build_query 逻辑
  107 + original_match_fields = self.match_fields
  108 + self.match_fields = domain_fields
  109 + try:
133 return super().build_query( 110 return super().build_query(
134 - query_text=parsed_query.rewritten_query, 111 + query_text=parsed_query.rewritten_query or parsed_query.normalized_query,
135 query_vector=query_vector, 112 query_vector=query_vector,
  113 + query_node=query_node,
136 filters=filters, 114 filters=filters,
137 range_filters=range_filters, 115 range_filters=range_filters,
138 size=size, 116 size=size,
@@ -142,95 +120,9 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -142,95 +120,9 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
142 knn_num_candidates=knn_num_candidates, 120 knn_num_candidates=knn_num_candidates,
143 min_score=min_score 121 min_score=min_score
144 ) 122 )
145 -  
146 - logger.debug(f"Building query for domain: {domain}, language: {parsed_query.detected_language}")  
147 -  
148 - # Build query clause with multi-language support  
149 - if query_node and isinstance(query_node, tuple) and len(query_node) > 0:  
150 - # Handle boolean query from tuple (AST, score)  
151 - ast_node = query_node[0]  
152 - query_clause = self._build_boolean_query_from_tuple(ast_node)  
153 - logger.debug(f"Using boolean query")  
154 - elif query_node and hasattr(query_node, 'operator') and query_node.operator != 'TERM':  
155 - # Handle boolean query using base class method  
156 - query_clause = self._build_boolean_query(query_node)  
157 - logger.debug(f"Using boolean query")  
158 - else:  
159 - # Handle text query with multi-language support  
160 - query_clause = self._build_multilang_text_query(parsed_query, domain_config)  
161 -  
162 - # 构建内层bool: 文本和KNN二选一  
163 - inner_bool_should = [query_clause]  
164 -  
165 - # 如果启用KNN,添加到should  
166 - if enable_knn and query_vector is not None and self.text_embedding_field:  
167 - knn_query = {  
168 - "knn": {  
169 - "field": self.text_embedding_field,  
170 - "query_vector": query_vector.tolist(),  
171 - "k": knn_k,  
172 - "num_candidates": knn_num_candidates  
173 - }  
174 - }  
175 - inner_bool_should.append(knn_query)  
176 - logger.info(f"KNN query added: field={self.text_embedding_field}, k={knn_k}")  
177 - else:  
178 - # Debug why KNN is not added  
179 - reasons = []  
180 - if not enable_knn:  
181 - reasons.append("enable_knn=False")  
182 - if query_vector is None:  
183 - reasons.append("query_vector is None")  
184 - if not self.text_embedding_field:  
185 - reasons.append(f"text_embedding_field is not set (current: {self.text_embedding_field})")  
186 - logger.debug(f"KNN query NOT added. Reasons: {', '.join(reasons) if reasons else 'unknown'}")  
187 -  
188 - # 构建内层bool结构  
189 - inner_bool = {  
190 - "bool": {  
191 - "should": inner_bool_should,  
192 - "minimum_should_match": 1  
193 - }  
194 - }  
195 -  
196 - # 构建外层bool: 包含filter  
197 - filter_clauses = self._build_filters(filters, range_filters) if (filters or range_filters) else []  
198 -  
199 - outer_bool = {  
200 - "bool": {  
201 - "must": [inner_bool]  
202 - }  
203 - }  
204 -  
205 - if filter_clauses:  
206 - outer_bool["bool"]["filter"] = filter_clauses  
207 -  
208 - # 包裹function_score(从配置读取score_mode和boost_mode)  
209 - function_score_query = {  
210 - "function_score": {  
211 - "query": outer_bool,  
212 - "functions": self._build_score_functions(),  
213 - "score_mode": self.function_score_config.score_mode if self.function_score_config else "sum",  
214 - "boost_mode": self.function_score_config.boost_mode if self.function_score_config else "multiply"  
215 - }  
216 - }  
217 -  
218 - es_query = {  
219 - "size": size,  
220 - "from": from_,  
221 - "query": function_score_query  
222 - }  
223 -  
224 - # Add _source filtering if source_fields are configured  
225 - if self.source_fields:  
226 - es_query["_source"] = {  
227 - "includes": self.source_fields  
228 - }  
229 -  
230 - if min_score is not None:  
231 - es_query["min_score"] = min_score  
232 -  
233 - return es_query 123 + finally:
  124 + # 恢复原始配置,避免影响后续查询
  125 + self.match_fields = original_match_fields
234 126
235 def _build_score_functions(self) -> List[Dict[str, Any]]: 127 def _build_score_functions(self) -> List[Dict[str, Any]]:
236 """ 128 """
@@ -291,7 +183,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -291,7 +183,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
291 def _build_multilang_text_query( 183 def _build_multilang_text_query(
292 self, 184 self,
293 parsed_query: ParsedQuery, 185 parsed_query: ParsedQuery,
294 - domain_config: IndexConfig 186 + domain_config: Dict[str, Any]
295 ) -> Dict[str, Any]: 187 ) -> Dict[str, Any]:
296 """ 188 """
297 Build text query with multi-language field routing. 189 Build text query with multi-language field routing.
search/query_config.py
@@ -37,17 +37,32 @@ DOMAIN_FIELDS: Dict[str, List[str]] = { @@ -37,17 +37,32 @@ DOMAIN_FIELDS: Dict[str, List[str]] = {
37 } 37 }
38 38
39 # Source fields to return in search results 39 # Source fields to return in search results
  40 +# 注意:为了在后端做多语言选择,_zh / _en 字段仍然需要从 ES 取出,
  41 +# 但不会原样透出给前端,而是统一映射到 title / description / vendor 等字段。
40 SOURCE_FIELDS = [ 42 SOURCE_FIELDS = [
  43 + # 基本标识
41 "tenant_id", 44 "tenant_id",
42 "spu_id", 45 "spu_id",
  46 + "create_time",
  47 + "update_time",
  48 +
  49 + # 多语言文本字段(仅用于后端选择,不直接返回给前端)
43 "title_zh", 50 "title_zh",
  51 + "title_en",
44 "brief_zh", 52 "brief_zh",
  53 + "brief_en",
45 "description_zh", 54 "description_zh",
  55 + "description_en",
46 "vendor_zh", 56 "vendor_zh",
47 - "tags",  
48 - "image_url", 57 + "vendor_en",
49 "category_path_zh", 58 "category_path_zh",
  59 + "category_path_en",
50 "category_name_zh", 60 "category_name_zh",
  61 + "category_name_en",
  62 +
  63 + # 语言无关字段(直接返回给前端)
  64 + "tags",
  65 + "image_url",
51 "category_id", 66 "category_id",
52 "category_name", 67 "category_name",
53 "category_level", 68 "category_level",
@@ -60,11 +75,12 @@ SOURCE_FIELDS = [ @@ -60,11 +75,12 @@ SOURCE_FIELDS = [
60 "min_price", 75 "min_price",
61 "max_price", 76 "max_price",
62 "compare_at_price", 77 "compare_at_price",
  78 + "sku_prices",
  79 + "sku_weights",
  80 + "sku_weight_units",
63 "total_inventory", 81 "total_inventory",
64 - "create_time",  
65 - "update_time",  
66 "skus", 82 "skus",
67 - "specifications" 83 + "specifications",
68 ] 84 ]
69 85
70 # Query processing settings 86 # Query processing settings
@@ -112,3 +128,13 @@ def load_rewrite_dictionary() -> Dict[str, str]: @@ -112,3 +128,13 @@ def load_rewrite_dictionary() -> Dict[str, str]:
112 128
113 REWRITE_DICTIONARY = load_rewrite_dictionary() 129 REWRITE_DICTIONARY = load_rewrite_dictionary()
114 130
  131 +# Default facets for faceted search
  132 +# 分类分面:使用category1_name, category2_name, category3_name
  133 +# specifications分面:使用嵌套聚合,按name分组,然后按value聚合
  134 +DEFAULT_FACETS = [
  135 + "category1_name", # 一级分类
  136 + "category2_name", # 二级分类
  137 + "category3_name", # 三级分类
  138 + "specifications" # 规格分面(特殊处理:嵌套聚合)
  139 +]
  140 +
search/searcher.py
@@ -8,15 +8,23 @@ from typing import Dict, Any, List, Optional, Union @@ -8,15 +8,23 @@ from typing import Dict, Any, List, Optional, Union
8 import time 8 import time
9 import logging 9 import logging
10 10
11 -from config import SearchConfig  
12 from utils.es_client import ESClient 11 from utils.es_client import ESClient
13 from query import QueryParser, ParsedQuery 12 from query import QueryParser, ParsedQuery
14 -from indexer import MappingGenerator  
15 from embeddings import CLIPImageEncoder 13 from embeddings import CLIPImageEncoder
16 from .boolean_parser import BooleanParser, QueryNode 14 from .boolean_parser import BooleanParser, QueryNode
17 from .es_query_builder import ESQueryBuilder 15 from .es_query_builder import ESQueryBuilder
18 from .multilang_query_builder import MultiLanguageQueryBuilder 16 from .multilang_query_builder import MultiLanguageQueryBuilder
19 from .rerank_engine import RerankEngine 17 from .rerank_engine import RerankEngine
  18 +from .query_config import (
  19 + DEFAULT_INDEX_NAME,
  20 + DEFAULT_MATCH_FIELDS,
  21 + TEXT_EMBEDDING_FIELD,
  22 + IMAGE_EMBEDDING_FIELD,
  23 + SOURCE_FIELDS,
  24 + ENABLE_TRANSLATION,
  25 + ENABLE_TEXT_EMBEDDING,
  26 + RANKING_EXPRESSION
  27 +)
20 from context.request_context import RequestContext, RequestContextStage, create_request_context 28 from context.request_context import RequestContext, RequestContextStage, create_request_context
21 from api.models import FacetResult, FacetValue 29 from api.models import FacetResult, FacetValue
22 from api.result_formatter import ResultFormatter 30 from api.result_formatter import ResultFormatter
@@ -79,39 +87,38 @@ class Searcher: @@ -79,39 +87,38 @@ class Searcher:
79 87
80 def __init__( 88 def __init__(
81 self, 89 self,
82 - config: SearchConfig,  
83 es_client: ESClient, 90 es_client: ESClient,
84 - query_parser: Optional[QueryParser] = None 91 + query_parser: Optional[QueryParser] = None,
  92 + index_name: str = DEFAULT_INDEX_NAME
85 ): 93 ):
86 """ 94 """
87 Initialize searcher. 95 Initialize searcher.
88 96
89 Args: 97 Args:
90 - config: Search configuration  
91 es_client: Elasticsearch client 98 es_client: Elasticsearch client
92 query_parser: Query parser (created if not provided) 99 query_parser: Query parser (created if not provided)
  100 + index_name: ES index name (default: search_products)
93 """ 101 """
94 - self.config = config  
95 self.es_client = es_client 102 self.es_client = es_client
96 - self.query_parser = query_parser or QueryParser(config) 103 + self.index_name = index_name
  104 + self.query_parser = query_parser or QueryParser()
97 105
98 # Initialize components 106 # Initialize components
99 self.boolean_parser = BooleanParser() 107 self.boolean_parser = BooleanParser()
100 - self.rerank_engine = RerankEngine(config.ranking.expression, enabled=False) 108 + self.rerank_engine = RerankEngine(RANKING_EXPRESSION, enabled=False)
101 109
102 - # Get mapping info  
103 - mapping_gen = MappingGenerator(config)  
104 - self.match_fields = mapping_gen.get_match_fields_for_domain("default")  
105 - self.text_embedding_field = mapping_gen.get_text_embedding_field()  
106 - self.image_embedding_field = mapping_gen.get_image_embedding_field() 110 + # Use constants from query_config
  111 + self.match_fields = DEFAULT_MATCH_FIELDS
  112 + self.text_embedding_field = TEXT_EMBEDDING_FIELD
  113 + self.image_embedding_field = IMAGE_EMBEDDING_FIELD
107 114
108 # Query builder - use multi-language version 115 # Query builder - use multi-language version
109 self.query_builder = MultiLanguageQueryBuilder( 116 self.query_builder = MultiLanguageQueryBuilder(
110 - config=config,  
111 - index_name=config.es_index_name, 117 + index_name=index_name,
  118 + match_fields=self.match_fields,
112 text_embedding_field=self.text_embedding_field, 119 text_embedding_field=self.text_embedding_field,
113 image_embedding_field=self.image_embedding_field, 120 image_embedding_field=self.image_embedding_field,
114 - source_fields=config.query_config.source_fields 121 + source_fields=SOURCE_FIELDS
115 ) 122 )
116 123
117 def search( 124 def search(
@@ -127,7 +134,8 @@ class Searcher: @@ -127,7 +134,8 @@ class Searcher:
127 context: Optional[RequestContext] = None, 134 context: Optional[RequestContext] = None,
128 sort_by: Optional[str] = None, 135 sort_by: Optional[str] = None,
129 sort_order: Optional[str] = "desc", 136 sort_order: Optional[str] = "desc",
130 - debug: bool = False 137 + debug: bool = False,
  138 + language: str = "zh",
131 ) -> SearchResult: 139 ) -> SearchResult:
132 """ 140 """
133 Execute search query (外部友好格式). 141 Execute search query (外部友好格式).
@@ -154,8 +162,8 @@ class Searcher: @@ -154,8 +162,8 @@ class Searcher:
154 context = create_request_context() 162 context = create_request_context()
155 163
156 # Always use config defaults (these are backend configuration, not user parameters) 164 # Always use config defaults (these are backend configuration, not user parameters)
157 - enable_translation = self.config.query_config.enable_translation  
158 - enable_embedding = self.config.query_config.enable_text_embedding 165 + enable_translation = ENABLE_TRANSLATION
  166 + enable_embedding = ENABLE_TEXT_EMBEDDING
159 enable_rerank = False # Temporarily disabled 167 enable_rerank = False # Temporarily disabled
160 168
161 # Start timing 169 # Start timing
@@ -278,14 +286,6 @@ class Searcher: @@ -278,14 +286,6 @@ class Searcher:
278 min_score=min_score 286 min_score=min_score
279 ) 287 )
280 288
281 - # Add SPU collapse if configured  
282 - if self.config.spu_config.enabled:  
283 - es_query = self.query_builder.add_spu_collapse(  
284 - es_query,  
285 - self.config.spu_config.spu_field,  
286 - self.config.spu_config.inner_hits_size  
287 - )  
288 -  
289 # Add facets for faceted search 289 # Add facets for faceted search
290 if facets: 290 if facets:
291 facet_aggs = self.query_builder.build_facets(facets) 291 facet_aggs = self.query_builder.build_facets(facets)
@@ -329,7 +329,7 @@ class Searcher: @@ -329,7 +329,7 @@ class Searcher:
329 context.start_stage(RequestContextStage.ELASTICSEARCH_SEARCH) 329 context.start_stage(RequestContextStage.ELASTICSEARCH_SEARCH)
330 try: 330 try:
331 es_response = self.es_client.search( 331 es_response = self.es_client.search(
332 - index_name=self.config.es_index_name, 332 + index_name=self.index_name,
333 body=body_for_es, 333 body=body_for_es,
334 size=size, 334 size=size,
335 from_=from_ 335 from_=from_
@@ -374,7 +374,11 @@ class Searcher: @@ -374,7 +374,11 @@ class Searcher:
374 max_score = es_response.get('hits', {}).get('max_score') or 0.0 374 max_score = es_response.get('hits', {}).get('max_score') or 0.0
375 375
376 # Format results using ResultFormatter 376 # Format results using ResultFormatter
377 - formatted_results = ResultFormatter.format_search_results(es_hits, max_score) 377 + formatted_results = ResultFormatter.format_search_results(
  378 + es_hits,
  379 + max_score,
  380 + language=language
  381 + )
378 382
379 # Format facets 383 # Format facets
380 standardized_facets = None 384 standardized_facets = None
@@ -503,9 +507,9 @@ class Searcher: @@ -503,9 +507,9 @@ class Searcher:
503 } 507 }
504 508
505 # Add _source filtering if source_fields are configured 509 # Add _source filtering if source_fields are configured
506 - if self.config.query_config.source_fields: 510 + if SOURCE_FIELDS:
507 es_query["_source"] = { 511 es_query["_source"] = {
508 - "includes": self.config.query_config.source_fields 512 + "includes": SOURCE_FIELDS
509 } 513 }
510 514
511 if filters or range_filters: 515 if filters or range_filters:
@@ -519,7 +523,7 @@ class Searcher: @@ -519,7 +523,7 @@ class Searcher:
519 523
520 # Execute search 524 # Execute search
521 es_response = self.es_client.search( 525 es_response = self.es_client.search(
522 - index_name=self.config.es_index_name, 526 + index_name=self.index_name,
523 body=es_query, 527 body=es_query,
524 size=size 528 size=size
525 ) 529 )
@@ -573,7 +577,7 @@ class Searcher: @@ -573,7 +577,7 @@ class Searcher:
573 """ 577 """
574 try: 578 try:
575 response = self.es_client.client.get( 579 response = self.es_client.client.get(
576 - index=self.config.es_index_name, 580 + index=self.index_name,
577 id=doc_id 581 id=doc_id
578 ) 582 )
579 return response.get('_source') 583 return response.get('_source')
@@ -657,10 +661,11 @@ class Searcher: @@ -657,10 +661,11 @@ class Searcher:
657 661
658 def _get_field_label(self, field: str) -> str: 662 def _get_field_label(self, field: str) -> str:
659 """获取字段的显示标签""" 663 """获取字段的显示标签"""
660 - # 从配置中获取字段标签  
661 - for field_config in self.config.fields:  
662 - if field_config.name == field:  
663 - # 尝试获取 label 属性  
664 - return getattr(field_config, 'label', field)  
665 - # 如果没有配置,返回字段名  
666 - return field 664 + # 字段标签映射(简化版,不再从配置读取)
  665 + field_labels = {
  666 + "category1_name": "一级分类",
  667 + "category2_name": "二级分类",
  668 + "category3_name": "三级分类",
  669 + "specifications": "规格"
  670 + }
  671 + return field_labels.get(field, field)
utils/es_client.py
@@ -172,6 +172,18 @@ class ESClient: @@ -172,6 +172,18 @@ class ESClient:
172 Returns: 172 Returns:
173 Search results 173 Search results
174 """ 174 """
  175 + # Safety guard: collapse is no longer needed (index is already SPU-level).
  176 + # If any caller accidentally adds a collapse clause (e.g. on product_id),
  177 + # strip it here to avoid 400 errors like:
  178 + # "no mapping found for `product_id` in order to collapse on"
  179 + if isinstance(body, dict) and "collapse" in body:
  180 + logger.warning(
  181 + "Removing unsupported 'collapse' clause from ES query body: %s",
  182 + body.get("collapse")
  183 + )
  184 + body = dict(body) # shallow copy to avoid mutating caller
  185 + body.pop("collapse", None)
  186 +
175 try: 187 try:
176 return self.client.search( 188 return self.client.search(
177 index=index_name, 189 index=index_name,