Commit 3a5fda00462a3880e5db3790944a25fd226db03c
1 parent
6c5ee5bc
1. ES字段 skus的 image_src 字段读取 兼容 imageSrc (ES 本应该写入image_src,但是写入了imageSrc,暂时不做全量了,这里兼容下)
2. 返回query_normlized
Showing
9 changed files
with
37 additions
and
18 deletions
Show diff stats
api/result_formatter.py
| ... | ... | @@ -82,7 +82,7 @@ class ResultFormatter: |
| 82 | 82 | option1_value=sku_entry.get('option1_value'), |
| 83 | 83 | option2_value=sku_entry.get('option2_value'), |
| 84 | 84 | option3_value=sku_entry.get('option3_value'), |
| 85 | - image_src=sku_entry.get('image_src'), | |
| 85 | + image_src=sku_entry.get('image_src') or sku_entry.get('imageSrc'), | |
| 86 | 86 | options=sku_entry.get('options') |
| 87 | 87 | ) |
| 88 | 88 | skus.append(sku) | ... | ... |
context/request_context.py
| ... | ... | @@ -28,7 +28,7 @@ class RequestContextStage(Enum): |
| 28 | 28 | class QueryAnalysisResult: |
| 29 | 29 | """查询分析结果""" |
| 30 | 30 | original_query: Optional[str] = None |
| 31 | - normalized_query: Optional[str] = None | |
| 31 | + query_normalized: Optional[str] = None | |
| 32 | 32 | rewritten_query: Optional[str] = None |
| 33 | 33 | detected_language: Optional[str] = None |
| 34 | 34 | translations: Dict[str, str] = field(default_factory=dict) |
| ... | ... | @@ -264,7 +264,7 @@ class RequestContext: |
| 264 | 264 | }, |
| 265 | 265 | 'query_analysis': { |
| 266 | 266 | 'original_query': self.query_analysis.original_query, |
| 267 | - 'normalized_query': self.query_analysis.normalized_query, | |
| 267 | + 'query_normalized': self.query_analysis.query_normalized, | |
| 268 | 268 | 'rewritten_query': self.query_analysis.rewritten_query, |
| 269 | 269 | 'detected_language': self.query_analysis.detected_language, |
| 270 | 270 | 'domain': self.query_analysis.domain, | ... | ... |
docs/Search-API-Examples.md
| ... | ... | @@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 977 | 977 | "debug_info": { |
| 978 | 978 | "query_analysis": { |
| 979 | 979 | "original_query": "玩具", |
| 980 | - "normalized_query": "玩具", | |
| 980 | + "query_normalized": "玩具", | |
| 981 | 981 | "rewritten_query": "玩具", |
| 982 | 982 | "detected_language": "zh", |
| 983 | 983 | "translations": {"en": "toy"} | ... | ... |
docs/搜索API对接指南.md
| ... | ... | @@ -26,6 +26,7 @@ |
| 26 | 26 | 4. [响应格式说明](#响应格式说明) |
| 27 | 27 | - 4.1 [标准响应结构](#41-标准响应结构) |
| 28 | 28 | - 4.2 [响应字段说明](#42-响应字段说明) |
| 29 | + - 4.2.1 [query_info 说明](#421-query_info-说明) | |
| 29 | 30 | - 4.3 [SpuResult字段说明](#43-spuresult字段说明) |
| 30 | 31 | - 4.4 [SkuResult字段说明](#44-skuresult字段说明) |
| 31 | 32 | - 4.5 [多语言字段说明](#45-多语言字段说明) |
| ... | ... | @@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345" |
| 708 | 709 | ], |
| 709 | 710 | "query_info": { |
| 710 | 711 | "original_query": "芭比娃娃", |
| 712 | + "query_normalized": "芭比娃娃", | |
| 713 | + "rewritten_query": "芭比娃娃", | |
| 711 | 714 | "detected_language": "zh", |
| 712 | 715 | "translations": { |
| 713 | 716 | "en": "barbie doll" |
| 714 | - } | |
| 717 | + }, | |
| 718 | + "domain": "default", | |
| 719 | + "has_vector": true | |
| 715 | 720 | }, |
| 716 | 721 | "suggestions": [], |
| 717 | 722 | "related_searches": [], |
| ... | ... | @@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345" |
| 734 | 739 | | `total` | integer | 匹配的总文档数 | |
| 735 | 740 | | `max_score` | float | 最高相关性分数 | |
| 736 | 741 | | `facets` | array | 分面统计结果 | |
| 737 | -| `query_info` | object | 查询处理信息 | | |
| 742 | +| `query_info` | object | 查询处理信息,见 [4.2.1 query_info 说明](#421-query_info-说明) | | |
| 738 | 743 | | `took_ms` | integer | 搜索耗时(毫秒) | |
| 739 | 744 | |
| 745 | +#### 4.2.1 query_info 说明 | |
| 746 | + | |
| 747 | +`query_info` 包含本次搜索的查询解析与处理结果: | |
| 748 | + | |
| 749 | +| 子字段 | 类型 | 说明 | | |
| 750 | +|--------|------|------| | |
| 751 | +| `original_query` | string | 用户原始查询 | | |
| 752 | +| `query_normalized` | string | 归一化后的查询(去空白、大小写等预处理,用于后续解析与改写) | | |
| 753 | +| `rewritten_query` | string | 重写后的查询(同义词/词典扩展等) | | |
| 754 | +| `detected_language` | string | 检测到的查询语言(如 `zh`、`en`) | | |
| 755 | +| `translations` | object | 翻译结果,键为语言代码,值为翻译文本 | | |
| 756 | +| `domain` | string | 查询域(如 `default`、`title`、`brand` 等) | | |
| 757 | +| `has_vector` | boolean | 是否生成了查询向量(用于语义检索) | | |
| 758 | + | |
| 740 | 759 | ### 4.3 SpuResult字段说明 |
| 741 | 760 | |
| 742 | 761 | | 字段 | 类型 | 说明 | | ... | ... |
docs/搜索推荐数据.md
| ... | ... | @@ -291,7 +291,7 @@ Shopify çš„ Search & Discovery/行为报表里,æœç´¢å¸¸çœ‹çš„就是 **query〠|
| 291 | 291 | - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…) |
| 292 | 292 | - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase |
| 293 | 293 | - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency |
| 294 | -- `dws_query_daily`(tenant_id, date, normalized_query) | |
| 294 | +- `dws_query_daily`(tenant_id, date, query_normalized) | |
| 295 | 295 | - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt |
| 296 | 296 | - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version) |
| 297 | 297 | - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression) | ... | ... |
example_usage.py
| ... | ... | @@ -32,7 +32,7 @@ def example_basic_usage(): |
| 32 | 32 | # 存储查询分析结果 |
| 33 | 33 | context.store_query_analysis( |
| 34 | 34 | original_query="红色连衣裙", |
| 35 | - normalized_query="红色 连衣裙", | |
| 35 | + query_normalized="红色 连衣裙", | |
| 36 | 36 | rewritten_query="红色 女 连衣裙", |
| 37 | 37 | detected_language="zh", |
| 38 | 38 | translations={"en": "red dress"} | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -747,7 +747,7 @@ function displayDebugInfo(data) { |
| 747 | 747 | if (debugInfo.query_analysis) { |
| 748 | 748 | html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>'; |
| 749 | 749 | html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; |
| 750 | - html += `<div>normalized_query: ${escapeHtml(debugInfo.query_analysis.normalized_query || 'N/A')}</div>`; | |
| 750 | + html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`; | |
| 751 | 751 | html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; |
| 752 | 752 | html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`; |
| 753 | 753 | html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; | ... | ... |
query/query_parser.py
| ... | ... | @@ -26,7 +26,7 @@ class ParsedQuery: |
| 26 | 26 | def __init__( |
| 27 | 27 | self, |
| 28 | 28 | original_query: str, |
| 29 | - normalized_query: str, | |
| 29 | + query_normalized: str, | |
| 30 | 30 | rewritten_query: Optional[str] = None, |
| 31 | 31 | detected_language: Optional[str] = None, |
| 32 | 32 | translations: Dict[str, str] = None, |
| ... | ... | @@ -38,8 +38,8 @@ class ParsedQuery: |
| 38 | 38 | is_long_query: bool = False |
| 39 | 39 | ): |
| 40 | 40 | self.original_query = original_query |
| 41 | - self.normalized_query = normalized_query | |
| 42 | - self.rewritten_query = rewritten_query or normalized_query | |
| 41 | + self.query_normalized = query_normalized | |
| 42 | + self.rewritten_query = rewritten_query or query_normalized | |
| 43 | 43 | self.detected_language = detected_language |
| 44 | 44 | self.translations = translations or {} |
| 45 | 45 | self.query_vector = query_vector |
| ... | ... | @@ -54,7 +54,7 @@ class ParsedQuery: |
| 54 | 54 | """Convert to dictionary representation.""" |
| 55 | 55 | result = { |
| 56 | 56 | "original_query": self.original_query, |
| 57 | - "normalized_query": self.normalized_query, | |
| 57 | + "query_normalized": self.query_normalized, | |
| 58 | 58 | "rewritten_query": self.rewritten_query, |
| 59 | 59 | "detected_language": self.detected_language, |
| 60 | 60 | "translations": self.translations, |
| ... | ... | @@ -195,7 +195,7 @@ class QueryParser: |
| 195 | 195 | normalized = self.normalizer.normalize(query) |
| 196 | 196 | log_debug(f"Normalization completed | '{query}' -> '{normalized}'") |
| 197 | 197 | if context: |
| 198 | - context.store_intermediate_result('normalized_query', normalized) | |
| 198 | + context.store_intermediate_result('query_normalized', normalized) | |
| 199 | 199 | |
| 200 | 200 | # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike") |
| 201 | 201 | domain, query_text = self.normalizer.extract_domain_query(normalized) |
| ... | ... | @@ -402,7 +402,7 @@ class QueryParser: |
| 402 | 402 | # Build result |
| 403 | 403 | result = ParsedQuery( |
| 404 | 404 | original_query=query, |
| 405 | - normalized_query=normalized, | |
| 405 | + query_normalized=normalized, | |
| 406 | 406 | rewritten_query=rewritten, |
| 407 | 407 | detected_language=detected_lang, |
| 408 | 408 | translations=translations, | ... | ... |
search/searcher.py
| ... | ... | @@ -212,7 +212,7 @@ class Searcher: |
| 212 | 212 | # Store query analysis results in context |
| 213 | 213 | context.store_query_analysis( |
| 214 | 214 | original_query=parsed_query.original_query, |
| 215 | - normalized_query=parsed_query.normalized_query, | |
| 215 | + query_normalized=parsed_query.query_normalized, | |
| 216 | 216 | rewritten_query=parsed_query.rewritten_query, |
| 217 | 217 | detected_language=parsed_query.detected_language, |
| 218 | 218 | translations=parsed_query.translations, |
| ... | ... | @@ -280,7 +280,7 @@ class Searcher: |
| 280 | 280 | # No longer need to add tenant_id to filters since each tenant has its own index |
| 281 | 281 | |
| 282 | 282 | es_query = self.query_builder.build_query( |
| 283 | - query_text=parsed_query.rewritten_query or parsed_query.normalized_query, | |
| 283 | + query_text=parsed_query.rewritten_query or parsed_query.query_normalized, | |
| 284 | 284 | query_vector=parsed_query.query_vector if enable_embedding else None, |
| 285 | 285 | query_node=query_node, |
| 286 | 286 | filters=filters, |
| ... | ... | @@ -428,7 +428,7 @@ class Searcher: |
| 428 | 428 | debug_info = { |
| 429 | 429 | "query_analysis": { |
| 430 | 430 | "original_query": context.query_analysis.original_query, |
| 431 | - "normalized_query": context.query_analysis.normalized_query, | |
| 431 | + "query_normalized": context.query_analysis.query_normalized, | |
| 432 | 432 | "rewritten_query": context.query_analysis.rewritten_query, |
| 433 | 433 | "detected_language": context.query_analysis.detected_language, |
| 434 | 434 | "translations": context.query_analysis.translations, | ... | ... |