Commit 3a5fda00462a3880e5db3790944a25fd226db03c

Authored by tangwang
1 parent 6c5ee5bc

1. ES字段 skus的 image_src 字段读取 兼容 imageSrc (ES 本应该写入image_src,但是写入了imageSrc,暂时不做全量了,这里兼容下)

2. 返回query_normlized
api/result_formatter.py
... ... @@ -82,7 +82,7 @@ class ResultFormatter:
82 82 option1_value=sku_entry.get('option1_value'),
83 83 option2_value=sku_entry.get('option2_value'),
84 84 option3_value=sku_entry.get('option3_value'),
85   - image_src=sku_entry.get('image_src'),
  85 + image_src=sku_entry.get('image_src') or sku_entry.get('imageSrc'),
86 86 options=sku_entry.get('options')
87 87 )
88 88 skus.append(sku)
... ...
context/request_context.py
... ... @@ -28,7 +28,7 @@ class RequestContextStage(Enum):
28 28 class QueryAnalysisResult:
29 29 """查询分析结果"""
30 30 original_query: Optional[str] = None
31   - normalized_query: Optional[str] = None
  31 + query_normalized: Optional[str] = None
32 32 rewritten_query: Optional[str] = None
33 33 detected_language: Optional[str] = None
34 34 translations: Dict[str, str] = field(default_factory=dict)
... ... @@ -264,7 +264,7 @@ class RequestContext:
264 264 },
265 265 'query_analysis': {
266 266 'original_query': self.query_analysis.original_query,
267   - 'normalized_query': self.query_analysis.normalized_query,
  267 + 'query_normalized': self.query_analysis.query_normalized,
268 268 'rewritten_query': self.query_analysis.rewritten_query,
269 269 'detected_language': self.query_analysis.detected_language,
270 270 'domain': self.query_analysis.domain,
... ...
docs/Search-API-Examples.md
... ... @@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \
977 977 "debug_info": {
978 978 "query_analysis": {
979 979 "original_query": "玩具",
980   - "normalized_query": "玩具",
  980 + "query_normalized": "玩具",
981 981 "rewritten_query": "玩具",
982 982 "detected_language": "zh",
983 983 "translations": {"en": "toy"}
... ...
docs/搜索API对接指南.md
... ... @@ -26,6 +26,7 @@
26 26 4. [响应格式说明](#响应格式说明)
27 27 - 4.1 [标准响应结构](#41-标准响应结构)
28 28 - 4.2 [响应字段说明](#42-响应字段说明)
  29 + - 4.2.1 [query_info 说明](#421-query_info-说明)
29 30 - 4.3 [SpuResult字段说明](#43-spuresult字段说明)
30 31 - 4.4 [SkuResult字段说明](#44-skuresult字段说明)
31 32 - 4.5 [多语言字段说明](#45-多语言字段说明)
... ... @@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345"
708 709 ],
709 710 "query_info": {
710 711 "original_query": "芭比娃娃",
  712 + "query_normalized": "芭比娃娃",
  713 + "rewritten_query": "芭比娃娃",
711 714 "detected_language": "zh",
712 715 "translations": {
713 716 "en": "barbie doll"
714   - }
  717 + },
  718 + "domain": "default",
  719 + "has_vector": true
715 720 },
716 721 "suggestions": [],
717 722 "related_searches": [],
... ... @@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345"
734 739 | `total` | integer | 匹配的总文档数 |
735 740 | `max_score` | float | 最高相关性分数 |
736 741 | `facets` | array | 分面统计结果 |
737   -| `query_info` | object | 查询处理信息 |
  742 +| `query_info` | object | 查询处理信息,见 [4.2.1 query_info 说明](#421-query_info-说明) |
738 743 | `took_ms` | integer | 搜索耗时(毫秒) |
739 744  
  745 +#### 4.2.1 query_info 说明
  746 +
  747 +`query_info` 包含本次搜索的查询解析与处理结果:
  748 +
  749 +| 子字段 | 类型 | 说明 |
  750 +|--------|------|------|
  751 +| `original_query` | string | 用户原始查询 |
  752 +| `query_normalized` | string | 归一化后的查询(去空白、大小写等预处理,用于后续解析与改写) |
  753 +| `rewritten_query` | string | 重写后的查询(同义词/词典扩展等) |
  754 +| `detected_language` | string | 检测到的查询语言(如 `zh`、`en`) |
  755 +| `translations` | object | 翻译结果,键为语言代码,值为翻译文本 |
  756 +| `domain` | string | 查询域(如 `default`、`title`、`brand` 等) |
  757 +| `has_vector` | boolean | 是否生成了查询向量(用于语义检索) |
  758 +
740 759 ### 4.3 SpuResult字段说明
741 760  
742 761 | 字段 | 类型 | 说明 |
... ...
docs/搜索推荐数据.md
... ... @@ -291,7 +291,7 @@ Shopify çš„ Search & Discovery/行为报表里,æœç´¢å¸¸çœ‹çš„就是 **queryã€
291 291 - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…)
292 292 - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase
293 293 - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency
294   -- `dws_query_daily`(tenant_id, date, normalized_query)
  294 +- `dws_query_daily`(tenant_id, date, query_normalized)
295 295 - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt
296 296 - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version)
297 297 - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression)
... ...
example_usage.py
... ... @@ -32,7 +32,7 @@ def example_basic_usage():
32 32 # 存储查询分析结果
33 33 context.store_query_analysis(
34 34 original_query="红色连衣裙",
35   - normalized_query="红色 连衣裙",
  35 + query_normalized="红色 连衣裙",
36 36 rewritten_query="红色 女 连衣裙",
37 37 detected_language="zh",
38 38 translations={"en": "red dress"}
... ...
frontend/static/js/app.js
... ... @@ -747,7 +747,7 @@ function displayDebugInfo(data) {
747 747 if (debugInfo.query_analysis) {
748 748 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>';
749 749 html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`;
750   - html += `<div>normalized_query: ${escapeHtml(debugInfo.query_analysis.normalized_query || 'N/A')}</div>`;
  750 + html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`;
751 751 html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`;
752 752 html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`;
753 753 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`;
... ...
query/query_parser.py
... ... @@ -26,7 +26,7 @@ class ParsedQuery:
26 26 def __init__(
27 27 self,
28 28 original_query: str,
29   - normalized_query: str,
  29 + query_normalized: str,
30 30 rewritten_query: Optional[str] = None,
31 31 detected_language: Optional[str] = None,
32 32 translations: Dict[str, str] = None,
... ... @@ -38,8 +38,8 @@ class ParsedQuery:
38 38 is_long_query: bool = False
39 39 ):
40 40 self.original_query = original_query
41   - self.normalized_query = normalized_query
42   - self.rewritten_query = rewritten_query or normalized_query
  41 + self.query_normalized = query_normalized
  42 + self.rewritten_query = rewritten_query or query_normalized
43 43 self.detected_language = detected_language
44 44 self.translations = translations or {}
45 45 self.query_vector = query_vector
... ... @@ -54,7 +54,7 @@ class ParsedQuery:
54 54 """Convert to dictionary representation."""
55 55 result = {
56 56 "original_query": self.original_query,
57   - "normalized_query": self.normalized_query,
  57 + "query_normalized": self.query_normalized,
58 58 "rewritten_query": self.rewritten_query,
59 59 "detected_language": self.detected_language,
60 60 "translations": self.translations,
... ... @@ -195,7 +195,7 @@ class QueryParser:
195 195 normalized = self.normalizer.normalize(query)
196 196 log_debug(f"Normalization completed | '{query}' -> '{normalized}'")
197 197 if context:
198   - context.store_intermediate_result('normalized_query', normalized)
  198 + context.store_intermediate_result('query_normalized', normalized)
199 199  
200 200 # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike")
201 201 domain, query_text = self.normalizer.extract_domain_query(normalized)
... ... @@ -402,7 +402,7 @@ class QueryParser:
402 402 # Build result
403 403 result = ParsedQuery(
404 404 original_query=query,
405   - normalized_query=normalized,
  405 + query_normalized=normalized,
406 406 rewritten_query=rewritten,
407 407 detected_language=detected_lang,
408 408 translations=translations,
... ...
search/searcher.py
... ... @@ -212,7 +212,7 @@ class Searcher:
212 212 # Store query analysis results in context
213 213 context.store_query_analysis(
214 214 original_query=parsed_query.original_query,
215   - normalized_query=parsed_query.normalized_query,
  215 + query_normalized=parsed_query.query_normalized,
216 216 rewritten_query=parsed_query.rewritten_query,
217 217 detected_language=parsed_query.detected_language,
218 218 translations=parsed_query.translations,
... ... @@ -280,7 +280,7 @@ class Searcher:
280 280 # No longer need to add tenant_id to filters since each tenant has its own index
281 281  
282 282 es_query = self.query_builder.build_query(
283   - query_text=parsed_query.rewritten_query or parsed_query.normalized_query,
  283 + query_text=parsed_query.rewritten_query or parsed_query.query_normalized,
284 284 query_vector=parsed_query.query_vector if enable_embedding else None,
285 285 query_node=query_node,
286 286 filters=filters,
... ... @@ -428,7 +428,7 @@ class Searcher:
428 428 debug_info = {
429 429 "query_analysis": {
430 430 "original_query": context.query_analysis.original_query,
431   - "normalized_query": context.query_analysis.normalized_query,
  431 + "query_normalized": context.query_analysis.query_normalized,
432 432 "rewritten_query": context.query_analysis.rewritten_query,
433 433 "detected_language": context.query_analysis.detected_language,
434 434 "translations": context.query_analysis.translations,
... ...