Commit 3a5fda00462a3880e5db3790944a25fd226db03c

Authored by tangwang
1 parent 6c5ee5bc

1. ES字段 skus的 image_src 字段读取 兼容 imageSrc (ES 本应该写入image_src,但是写入了imageSrc,暂时不做全量了,这里兼容下)

2. 返回query_normlized
api/result_formatter.py
@@ -82,7 +82,7 @@ class ResultFormatter: @@ -82,7 +82,7 @@ class ResultFormatter:
82 option1_value=sku_entry.get('option1_value'), 82 option1_value=sku_entry.get('option1_value'),
83 option2_value=sku_entry.get('option2_value'), 83 option2_value=sku_entry.get('option2_value'),
84 option3_value=sku_entry.get('option3_value'), 84 option3_value=sku_entry.get('option3_value'),
85 - image_src=sku_entry.get('image_src'), 85 + image_src=sku_entry.get('image_src') or sku_entry.get('imageSrc'),
86 options=sku_entry.get('options') 86 options=sku_entry.get('options')
87 ) 87 )
88 skus.append(sku) 88 skus.append(sku)
context/request_context.py
@@ -28,7 +28,7 @@ class RequestContextStage(Enum): @@ -28,7 +28,7 @@ class RequestContextStage(Enum):
28 class QueryAnalysisResult: 28 class QueryAnalysisResult:
29 """查询分析结果""" 29 """查询分析结果"""
30 original_query: Optional[str] = None 30 original_query: Optional[str] = None
31 - normalized_query: Optional[str] = None 31 + query_normalized: Optional[str] = None
32 rewritten_query: Optional[str] = None 32 rewritten_query: Optional[str] = None
33 detected_language: Optional[str] = None 33 detected_language: Optional[str] = None
34 translations: Dict[str, str] = field(default_factory=dict) 34 translations: Dict[str, str] = field(default_factory=dict)
@@ -264,7 +264,7 @@ class RequestContext: @@ -264,7 +264,7 @@ class RequestContext:
264 }, 264 },
265 'query_analysis': { 265 'query_analysis': {
266 'original_query': self.query_analysis.original_query, 266 'original_query': self.query_analysis.original_query,
267 - 'normalized_query': self.query_analysis.normalized_query, 267 + 'query_normalized': self.query_analysis.query_normalized,
268 'rewritten_query': self.query_analysis.rewritten_query, 268 'rewritten_query': self.query_analysis.rewritten_query,
269 'detected_language': self.query_analysis.detected_language, 269 'detected_language': self.query_analysis.detected_language,
270 'domain': self.query_analysis.domain, 270 'domain': self.query_analysis.domain,
docs/Search-API-Examples.md
@@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \ @@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \
977 "debug_info": { 977 "debug_info": {
978 "query_analysis": { 978 "query_analysis": {
979 "original_query": "玩具", 979 "original_query": "玩具",
980 - "normalized_query": "玩具", 980 + "query_normalized": "玩具",
981 "rewritten_query": "玩具", 981 "rewritten_query": "玩具",
982 "detected_language": "zh", 982 "detected_language": "zh",
983 "translations": {"en": "toy"} 983 "translations": {"en": "toy"}
docs/搜索API对接指南.md
@@ -26,6 +26,7 @@ @@ -26,6 +26,7 @@
26 4. [响应格式说明](#响应格式说明) 26 4. [响应格式说明](#响应格式说明)
27 - 4.1 [标准响应结构](#41-标准响应结构) 27 - 4.1 [标准响应结构](#41-标准响应结构)
28 - 4.2 [响应字段说明](#42-响应字段说明) 28 - 4.2 [响应字段说明](#42-响应字段说明)
  29 + - 4.2.1 [query_info 说明](#421-query_info-说明)
29 - 4.3 [SpuResult字段说明](#43-spuresult字段说明) 30 - 4.3 [SpuResult字段说明](#43-spuresult字段说明)
30 - 4.4 [SkuResult字段说明](#44-skuresult字段说明) 31 - 4.4 [SkuResult字段说明](#44-skuresult字段说明)
31 - 4.5 [多语言字段说明](#45-多语言字段说明) 32 - 4.5 [多语言字段说明](#45-多语言字段说明)
@@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345" @@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345"
708 ], 709 ],
709 "query_info": { 710 "query_info": {
710 "original_query": "芭比娃娃", 711 "original_query": "芭比娃娃",
  712 + "query_normalized": "芭比娃娃",
  713 + "rewritten_query": "芭比娃娃",
711 "detected_language": "zh", 714 "detected_language": "zh",
712 "translations": { 715 "translations": {
713 "en": "barbie doll" 716 "en": "barbie doll"
714 - } 717 + },
  718 + "domain": "default",
  719 + "has_vector": true
715 }, 720 },
716 "suggestions": [], 721 "suggestions": [],
717 "related_searches": [], 722 "related_searches": [],
@@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345" @@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345"
734 | `total` | integer | 匹配的总文档数 | 739 | `total` | integer | 匹配的总文档数 |
735 | `max_score` | float | 最高相关性分数 | 740 | `max_score` | float | 最高相关性分数 |
736 | `facets` | array | 分面统计结果 | 741 | `facets` | array | 分面统计结果 |
737 -| `query_info` | object | 查询处理信息 | 742 +| `query_info` | object | 查询处理信息,见 [4.2.1 query_info 说明](#421-query_info-说明) |
738 | `took_ms` | integer | 搜索耗时(毫秒) | 743 | `took_ms` | integer | 搜索耗时(毫秒) |
739 744
  745 +#### 4.2.1 query_info 说明
  746 +
  747 +`query_info` 包含本次搜索的查询解析与处理结果:
  748 +
  749 +| 子字段 | 类型 | 说明 |
  750 +|--------|------|------|
  751 +| `original_query` | string | 用户原始查询 |
  752 +| `query_normalized` | string | 归一化后的查询(去空白、大小写等预处理,用于后续解析与改写) |
  753 +| `rewritten_query` | string | 重写后的查询(同义词/词典扩展等) |
  754 +| `detected_language` | string | 检测到的查询语言(如 `zh`、`en`) |
  755 +| `translations` | object | 翻译结果,键为语言代码,值为翻译文本 |
  756 +| `domain` | string | 查询域(如 `default`、`title`、`brand` 等) |
  757 +| `has_vector` | boolean | 是否生成了查询向量(用于语义检索) |
  758 +
740 ### 4.3 SpuResult字段说明 759 ### 4.3 SpuResult字段说明
741 760
742 | 字段 | 类型 | 说明 | 761 | 字段 | 类型 | 说明 |
docs/搜索推荐数据.md
@@ -291,7 +291,7 @@ Shopify çš„ Search & Discovery/行为报表里,æœç´¢å¸¸çœ‹çš„就是 **query〠@@ -291,7 +291,7 @@ Shopify çš„ Search & Discovery/行为报表里,æœç´¢å¸¸çœ‹çš„就是 **queryã€
291 - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…) 291 - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…)
292 - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase 292 - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase
293 - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency 293 - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency
294 -- `dws_query_daily`(tenant_id, date, normalized_query) 294 +- `dws_query_daily`(tenant_id, date, query_normalized)
295 - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt 295 - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt
296 - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version) 296 - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version)
297 - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression) 297 - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression)
@@ -32,7 +32,7 @@ def example_basic_usage(): @@ -32,7 +32,7 @@ def example_basic_usage():
32 # 存储查询分析结果 32 # 存储查询分析结果
33 context.store_query_analysis( 33 context.store_query_analysis(
34 original_query="红色连衣裙", 34 original_query="红色连衣裙",
35 - normalized_query="红色 连衣裙", 35 + query_normalized="红色 连衣裙",
36 rewritten_query="红色 女 连衣裙", 36 rewritten_query="红色 女 连衣裙",
37 detected_language="zh", 37 detected_language="zh",
38 translations={"en": "red dress"} 38 translations={"en": "red dress"}
frontend/static/js/app.js
@@ -747,7 +747,7 @@ function displayDebugInfo(data) { @@ -747,7 +747,7 @@ function displayDebugInfo(data) {
747 if (debugInfo.query_analysis) { 747 if (debugInfo.query_analysis) {
748 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>'; 748 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>';
749 html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; 749 html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`;
750 - html += `<div>normalized_query: ${escapeHtml(debugInfo.query_analysis.normalized_query || 'N/A')}</div>`; 750 + html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`;
751 html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; 751 html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`;
752 html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`; 752 html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`;
753 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; 753 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`;
query/query_parser.py
@@ -26,7 +26,7 @@ class ParsedQuery: @@ -26,7 +26,7 @@ class ParsedQuery:
26 def __init__( 26 def __init__(
27 self, 27 self,
28 original_query: str, 28 original_query: str,
29 - normalized_query: str, 29 + query_normalized: str,
30 rewritten_query: Optional[str] = None, 30 rewritten_query: Optional[str] = None,
31 detected_language: Optional[str] = None, 31 detected_language: Optional[str] = None,
32 translations: Dict[str, str] = None, 32 translations: Dict[str, str] = None,
@@ -38,8 +38,8 @@ class ParsedQuery: @@ -38,8 +38,8 @@ class ParsedQuery:
38 is_long_query: bool = False 38 is_long_query: bool = False
39 ): 39 ):
40 self.original_query = original_query 40 self.original_query = original_query
41 - self.normalized_query = normalized_query  
42 - self.rewritten_query = rewritten_query or normalized_query 41 + self.query_normalized = query_normalized
  42 + self.rewritten_query = rewritten_query or query_normalized
43 self.detected_language = detected_language 43 self.detected_language = detected_language
44 self.translations = translations or {} 44 self.translations = translations or {}
45 self.query_vector = query_vector 45 self.query_vector = query_vector
@@ -54,7 +54,7 @@ class ParsedQuery: @@ -54,7 +54,7 @@ class ParsedQuery:
54 """Convert to dictionary representation.""" 54 """Convert to dictionary representation."""
55 result = { 55 result = {
56 "original_query": self.original_query, 56 "original_query": self.original_query,
57 - "normalized_query": self.normalized_query, 57 + "query_normalized": self.query_normalized,
58 "rewritten_query": self.rewritten_query, 58 "rewritten_query": self.rewritten_query,
59 "detected_language": self.detected_language, 59 "detected_language": self.detected_language,
60 "translations": self.translations, 60 "translations": self.translations,
@@ -195,7 +195,7 @@ class QueryParser: @@ -195,7 +195,7 @@ class QueryParser:
195 normalized = self.normalizer.normalize(query) 195 normalized = self.normalizer.normalize(query)
196 log_debug(f"Normalization completed | '{query}' -> '{normalized}'") 196 log_debug(f"Normalization completed | '{query}' -> '{normalized}'")
197 if context: 197 if context:
198 - context.store_intermediate_result('normalized_query', normalized) 198 + context.store_intermediate_result('query_normalized', normalized)
199 199
200 # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike") 200 # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike")
201 domain, query_text = self.normalizer.extract_domain_query(normalized) 201 domain, query_text = self.normalizer.extract_domain_query(normalized)
@@ -402,7 +402,7 @@ class QueryParser: @@ -402,7 +402,7 @@ class QueryParser:
402 # Build result 402 # Build result
403 result = ParsedQuery( 403 result = ParsedQuery(
404 original_query=query, 404 original_query=query,
405 - normalized_query=normalized, 405 + query_normalized=normalized,
406 rewritten_query=rewritten, 406 rewritten_query=rewritten,
407 detected_language=detected_lang, 407 detected_language=detected_lang,
408 translations=translations, 408 translations=translations,
search/searcher.py
@@ -212,7 +212,7 @@ class Searcher: @@ -212,7 +212,7 @@ class Searcher:
212 # Store query analysis results in context 212 # Store query analysis results in context
213 context.store_query_analysis( 213 context.store_query_analysis(
214 original_query=parsed_query.original_query, 214 original_query=parsed_query.original_query,
215 - normalized_query=parsed_query.normalized_query, 215 + query_normalized=parsed_query.query_normalized,
216 rewritten_query=parsed_query.rewritten_query, 216 rewritten_query=parsed_query.rewritten_query,
217 detected_language=parsed_query.detected_language, 217 detected_language=parsed_query.detected_language,
218 translations=parsed_query.translations, 218 translations=parsed_query.translations,
@@ -280,7 +280,7 @@ class Searcher: @@ -280,7 +280,7 @@ class Searcher:
280 # No longer need to add tenant_id to filters since each tenant has its own index 280 # No longer need to add tenant_id to filters since each tenant has its own index
281 281
282 es_query = self.query_builder.build_query( 282 es_query = self.query_builder.build_query(
283 - query_text=parsed_query.rewritten_query or parsed_query.normalized_query, 283 + query_text=parsed_query.rewritten_query or parsed_query.query_normalized,
284 query_vector=parsed_query.query_vector if enable_embedding else None, 284 query_vector=parsed_query.query_vector if enable_embedding else None,
285 query_node=query_node, 285 query_node=query_node,
286 filters=filters, 286 filters=filters,
@@ -428,7 +428,7 @@ class Searcher: @@ -428,7 +428,7 @@ class Searcher:
428 debug_info = { 428 debug_info = {
429 "query_analysis": { 429 "query_analysis": {
430 "original_query": context.query_analysis.original_query, 430 "original_query": context.query_analysis.original_query,
431 - "normalized_query": context.query_analysis.normalized_query, 431 + "query_normalized": context.query_analysis.query_normalized,
432 "rewritten_query": context.query_analysis.rewritten_query, 432 "rewritten_query": context.query_analysis.rewritten_query,
433 "detected_language": context.query_analysis.detected_language, 433 "detected_language": context.query_analysis.detected_language,
434 "translations": context.query_analysis.translations, 434 "translations": context.query_analysis.translations,