From 3a5fda00462a3880e5db3790944a25fd226db03c Mon Sep 17 00:00:00 2001 From: tangwang Date: Tue, 27 Jan 2026 10:24:14 +0800 Subject: [PATCH] 1. ES字段 skus的 image_src 字段读取 兼容 imageSrc (ES 本应该写入image_src,但是写入了imageSrc,暂时不做全量了,这里兼容下) 2. 返回query_normlized --- api/result_formatter.py | 2 +- context/request_context.py | 4 ++-- docs/Search-API-Examples.md | 2 +- docs/搜索API对接指南.md | 23 +++++++++++++++++++++-- docs/搜索推荐数据.md | 2 +- example_usage.py | 2 +- frontend/static/js/app.js | 2 +- query/query_parser.py | 12 ++++++------ search/searcher.py | 6 +++--- 9 files changed, 37 insertions(+), 18 deletions(-) diff --git a/api/result_formatter.py b/api/result_formatter.py index 88cf61e..830d38e 100644 --- a/api/result_formatter.py +++ b/api/result_formatter.py @@ -82,7 +82,7 @@ class ResultFormatter: option1_value=sku_entry.get('option1_value'), option2_value=sku_entry.get('option2_value'), option3_value=sku_entry.get('option3_value'), - image_src=sku_entry.get('image_src'), + image_src=sku_entry.get('image_src') or sku_entry.get('imageSrc'), options=sku_entry.get('options') ) skus.append(sku) diff --git a/context/request_context.py b/context/request_context.py index 71dc60e..9f33a3b 100644 --- a/context/request_context.py +++ b/context/request_context.py @@ -28,7 +28,7 @@ class RequestContextStage(Enum): class QueryAnalysisResult: """查询分析结果""" original_query: Optional[str] = None - normalized_query: Optional[str] = None + query_normalized: Optional[str] = None rewritten_query: Optional[str] = None detected_language: Optional[str] = None translations: Dict[str, str] = field(default_factory=dict) @@ -264,7 +264,7 @@ class RequestContext: }, 'query_analysis': { 'original_query': self.query_analysis.original_query, - 'normalized_query': self.query_analysis.normalized_query, + 'query_normalized': self.query_analysis.query_normalized, 'rewritten_query': self.query_analysis.rewritten_query, 'detected_language': self.query_analysis.detected_language, 'domain': self.query_analysis.domain, diff --git a/docs/Search-API-Examples.md b/docs/Search-API-Examples.md index 2427edb..b13d723 100644 --- a/docs/Search-API-Examples.md +++ b/docs/Search-API-Examples.md @@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \ "debug_info": { "query_analysis": { "original_query": "玩具", - "normalized_query": "玩具", + "query_normalized": "玩具", "rewritten_query": "玩具", "detected_language": "zh", "translations": {"en": "toy"} diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index 98dd3eb..3e11f18 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -26,6 +26,7 @@ 4. [响应格式说明](#响应格式说明) - 4.1 [标准响应结构](#41-标准响应结构) - 4.2 [响应字段说明](#42-响应字段说明) + - 4.2.1 [query_info 说明](#421-query_info-说明) - 4.3 [SpuResult字段说明](#43-spuresult字段说明) - 4.4 [SkuResult字段说明](#44-skuresult字段说明) - 4.5 [多语言字段说明](#45-多语言字段说明) @@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345" ], "query_info": { "original_query": "芭比娃娃", + "query_normalized": "芭比娃娃", + "rewritten_query": "芭比娃娃", "detected_language": "zh", "translations": { "en": "barbie doll" - } + }, + "domain": "default", + "has_vector": true }, "suggestions": [], "related_searches": [], @@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345" | `total` | integer | 匹配的总文档数 | | `max_score` | float | 最高相关性分数 | | `facets` | array | 分面统计结果 | -| `query_info` | object | 查询处理信息 | +| `query_info` | object | 查询处理信息,见 [4.2.1 query_info 说明](#421-query_info-说明) | | `took_ms` | integer | 搜索耗时(毫秒) | +#### 4.2.1 query_info 说明 + +`query_info` 包含本次搜索的查询解析与处理结果: + +| 子字段 | 类型 | 说明 | +|--------|------|------| +| `original_query` | string | 用户原始查询 | +| `query_normalized` | string | 归一化后的查询(去空白、大小写等预处理,用于后续解析与改写) | +| `rewritten_query` | string | 重写后的查询(同义词/词典扩展等) | +| `detected_language` | string | 检测到的查询语言(如 `zh`、`en`) | +| `translations` | object | 翻译结果,键为语言代码,值为翻译文本 | +| `domain` | string | 查询域(如 `default`、`title`、`brand` 等) | +| `has_vector` | boolean | 是否生成了查询向量(用于语义检索) | + ### 4.3 SpuResult字段说明 | 字段 | 类型 | 说明 | diff --git a/docs/搜索推荐数据.md b/docs/搜索推荐数据.md index 1de4da0..ccc76e5 100644 --- a/docs/搜索推荐数据.md +++ b/docs/搜索推荐数据.md @@ -291,7 +291,7 @@ Shopify 的 Search & Discovery/行为报表里,搜索常看的就是 **query - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…) - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency -- `dws_query_daily`(tenant_id, date, normalized_query) +- `dws_query_daily`(tenant_id, date, query_normalized) - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version) - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression) diff --git a/example_usage.py b/example_usage.py index a053ecf..e1d7f25 100644 --- a/example_usage.py +++ b/example_usage.py @@ -32,7 +32,7 @@ def example_basic_usage(): # 存储查询分析结果 context.store_query_analysis( original_query="红色连衣裙", - normalized_query="红色 连衣裙", + query_normalized="红色 连衣裙", rewritten_query="红色 女 连衣裙", detected_language="zh", translations={"en": "red dress"} diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index 33cc3c9..7ef39f2 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -747,7 +747,7 @@ function displayDebugInfo(data) { if (debugInfo.query_analysis) { html += '
Query Analysis:'; html += `
original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}
`; - html += `
normalized_query: ${escapeHtml(debugInfo.query_analysis.normalized_query || 'N/A')}
`; + html += `
query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}
`; html += `
rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}
`; html += `
detected_language: ${debugInfo.query_analysis.detected_language}
`; html += `
domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}
`; diff --git a/query/query_parser.py b/query/query_parser.py index f294efd..28416d9 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -26,7 +26,7 @@ class ParsedQuery: def __init__( self, original_query: str, - normalized_query: str, + query_normalized: str, rewritten_query: Optional[str] = None, detected_language: Optional[str] = None, translations: Dict[str, str] = None, @@ -38,8 +38,8 @@ class ParsedQuery: is_long_query: bool = False ): self.original_query = original_query - self.normalized_query = normalized_query - self.rewritten_query = rewritten_query or normalized_query + self.query_normalized = query_normalized + self.rewritten_query = rewritten_query or query_normalized self.detected_language = detected_language self.translations = translations or {} self.query_vector = query_vector @@ -54,7 +54,7 @@ class ParsedQuery: """Convert to dictionary representation.""" result = { "original_query": self.original_query, - "normalized_query": self.normalized_query, + "query_normalized": self.query_normalized, "rewritten_query": self.rewritten_query, "detected_language": self.detected_language, "translations": self.translations, @@ -195,7 +195,7 @@ class QueryParser: normalized = self.normalizer.normalize(query) log_debug(f"Normalization completed | '{query}' -> '{normalized}'") if context: - context.store_intermediate_result('normalized_query', normalized) + context.store_intermediate_result('query_normalized', normalized) # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike") domain, query_text = self.normalizer.extract_domain_query(normalized) @@ -402,7 +402,7 @@ class QueryParser: # Build result result = ParsedQuery( original_query=query, - normalized_query=normalized, + query_normalized=normalized, rewritten_query=rewritten, detected_language=detected_lang, translations=translations, diff --git a/search/searcher.py b/search/searcher.py index 66799d0..98b6033 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -212,7 +212,7 @@ class Searcher: # Store query analysis results in context context.store_query_analysis( original_query=parsed_query.original_query, - normalized_query=parsed_query.normalized_query, + query_normalized=parsed_query.query_normalized, rewritten_query=parsed_query.rewritten_query, detected_language=parsed_query.detected_language, translations=parsed_query.translations, @@ -280,7 +280,7 @@ class Searcher: # No longer need to add tenant_id to filters since each tenant has its own index es_query = self.query_builder.build_query( - query_text=parsed_query.rewritten_query or parsed_query.normalized_query, + query_text=parsed_query.rewritten_query or parsed_query.query_normalized, query_vector=parsed_query.query_vector if enable_embedding else None, query_node=query_node, filters=filters, @@ -428,7 +428,7 @@ class Searcher: debug_info = { "query_analysis": { "original_query": context.query_analysis.original_query, - "normalized_query": context.query_analysis.normalized_query, + "query_normalized": context.query_analysis.query_normalized, "rewritten_query": context.query_analysis.rewritten_query, "detected_language": context.query_analysis.detected_language, "translations": context.query_analysis.translations, -- libgit2 0.21.2