Commit 3a5fda00462a3880e5db3790944a25fd226db03c
1 parent
6c5ee5bc
1. ES字段 skus的 image_src 字段读取 兼容 imageSrc (ES 本应该写入image_src,但是写入了imageSrc,暂时不做全量了,这里兼容下)
2. 返回query_normlized
Showing
9 changed files
with
37 additions
and
18 deletions
Show diff stats
api/result_formatter.py
| @@ -82,7 +82,7 @@ class ResultFormatter: | @@ -82,7 +82,7 @@ class ResultFormatter: | ||
| 82 | option1_value=sku_entry.get('option1_value'), | 82 | option1_value=sku_entry.get('option1_value'), |
| 83 | option2_value=sku_entry.get('option2_value'), | 83 | option2_value=sku_entry.get('option2_value'), |
| 84 | option3_value=sku_entry.get('option3_value'), | 84 | option3_value=sku_entry.get('option3_value'), |
| 85 | - image_src=sku_entry.get('image_src'), | 85 | + image_src=sku_entry.get('image_src') or sku_entry.get('imageSrc'), |
| 86 | options=sku_entry.get('options') | 86 | options=sku_entry.get('options') |
| 87 | ) | 87 | ) |
| 88 | skus.append(sku) | 88 | skus.append(sku) |
context/request_context.py
| @@ -28,7 +28,7 @@ class RequestContextStage(Enum): | @@ -28,7 +28,7 @@ class RequestContextStage(Enum): | ||
| 28 | class QueryAnalysisResult: | 28 | class QueryAnalysisResult: |
| 29 | """查询分析结果""" | 29 | """查询分析结果""" |
| 30 | original_query: Optional[str] = None | 30 | original_query: Optional[str] = None |
| 31 | - normalized_query: Optional[str] = None | 31 | + query_normalized: Optional[str] = None |
| 32 | rewritten_query: Optional[str] = None | 32 | rewritten_query: Optional[str] = None |
| 33 | detected_language: Optional[str] = None | 33 | detected_language: Optional[str] = None |
| 34 | translations: Dict[str, str] = field(default_factory=dict) | 34 | translations: Dict[str, str] = field(default_factory=dict) |
| @@ -264,7 +264,7 @@ class RequestContext: | @@ -264,7 +264,7 @@ class RequestContext: | ||
| 264 | }, | 264 | }, |
| 265 | 'query_analysis': { | 265 | 'query_analysis': { |
| 266 | 'original_query': self.query_analysis.original_query, | 266 | 'original_query': self.query_analysis.original_query, |
| 267 | - 'normalized_query': self.query_analysis.normalized_query, | 267 | + 'query_normalized': self.query_analysis.query_normalized, |
| 268 | 'rewritten_query': self.query_analysis.rewritten_query, | 268 | 'rewritten_query': self.query_analysis.rewritten_query, |
| 269 | 'detected_language': self.query_analysis.detected_language, | 269 | 'detected_language': self.query_analysis.detected_language, |
| 270 | 'domain': self.query_analysis.domain, | 270 | 'domain': self.query_analysis.domain, |
docs/Search-API-Examples.md
| @@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \ | @@ -977,7 +977,7 @@ curl -X POST "http://localhost:6002/search/" \ | ||
| 977 | "debug_info": { | 977 | "debug_info": { |
| 978 | "query_analysis": { | 978 | "query_analysis": { |
| 979 | "original_query": "玩具", | 979 | "original_query": "玩具", |
| 980 | - "normalized_query": "玩具", | 980 | + "query_normalized": "玩具", |
| 981 | "rewritten_query": "玩具", | 981 | "rewritten_query": "玩具", |
| 982 | "detected_language": "zh", | 982 | "detected_language": "zh", |
| 983 | "translations": {"en": "toy"} | 983 | "translations": {"en": "toy"} |
docs/搜索API对接指南.md
| @@ -26,6 +26,7 @@ | @@ -26,6 +26,7 @@ | ||
| 26 | 4. [响应格式说明](#响应格式说明) | 26 | 4. [响应格式说明](#响应格式说明) |
| 27 | - 4.1 [标准响应结构](#41-标准响应结构) | 27 | - 4.1 [标准响应结构](#41-标准响应结构) |
| 28 | - 4.2 [响应字段说明](#42-响应字段说明) | 28 | - 4.2 [响应字段说明](#42-响应字段说明) |
| 29 | + - 4.2.1 [query_info 说明](#421-query_info-说明) | ||
| 29 | - 4.3 [SpuResult字段说明](#43-spuresult字段说明) | 30 | - 4.3 [SpuResult字段说明](#43-spuresult字段说明) |
| 30 | - 4.4 [SkuResult字段说明](#44-skuresult字段说明) | 31 | - 4.4 [SkuResult字段说明](#44-skuresult字段说明) |
| 31 | - 4.5 [多语言字段说明](#45-多语言字段说明) | 32 | - 4.5 [多语言字段说明](#45-多语言字段说明) |
| @@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345" | @@ -708,10 +709,14 @@ curl "http://localhost:6002/search/12345" | ||
| 708 | ], | 709 | ], |
| 709 | "query_info": { | 710 | "query_info": { |
| 710 | "original_query": "芭比娃娃", | 711 | "original_query": "芭比娃娃", |
| 712 | + "query_normalized": "芭比娃娃", | ||
| 713 | + "rewritten_query": "芭比娃娃", | ||
| 711 | "detected_language": "zh", | 714 | "detected_language": "zh", |
| 712 | "translations": { | 715 | "translations": { |
| 713 | "en": "barbie doll" | 716 | "en": "barbie doll" |
| 714 | - } | 717 | + }, |
| 718 | + "domain": "default", | ||
| 719 | + "has_vector": true | ||
| 715 | }, | 720 | }, |
| 716 | "suggestions": [], | 721 | "suggestions": [], |
| 717 | "related_searches": [], | 722 | "related_searches": [], |
| @@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345" | @@ -734,9 +739,23 @@ curl "http://localhost:6002/search/12345" | ||
| 734 | | `total` | integer | 匹配的总文档数 | | 739 | | `total` | integer | 匹配的总文档数 | |
| 735 | | `max_score` | float | 最高相关性分数 | | 740 | | `max_score` | float | 最高相关性分数 | |
| 736 | | `facets` | array | 分面统计结果 | | 741 | | `facets` | array | 分面统计结果 | |
| 737 | -| `query_info` | object | 查询处理信息 | | 742 | +| `query_info` | object | 查询处理信息,见 [4.2.1 query_info 说明](#421-query_info-说明) | |
| 738 | | `took_ms` | integer | 搜索耗时(毫秒) | | 743 | | `took_ms` | integer | 搜索耗时(毫秒) | |
| 739 | 744 | ||
| 745 | +#### 4.2.1 query_info 说明 | ||
| 746 | + | ||
| 747 | +`query_info` 包含本次搜索的查询解析与处理结果: | ||
| 748 | + | ||
| 749 | +| 子字段 | 类型 | 说明 | | ||
| 750 | +|--------|------|------| | ||
| 751 | +| `original_query` | string | 用户原始查询 | | ||
| 752 | +| `query_normalized` | string | 归一化后的查询(去空白、大小写等预处理,用于后续解析与改写) | | ||
| 753 | +| `rewritten_query` | string | 重写后的查询(同义词/词典扩展等) | | ||
| 754 | +| `detected_language` | string | 检测到的查询语言(如 `zh`、`en`) | | ||
| 755 | +| `translations` | object | 翻译结果,键为语言代码,值为翻译文本 | | ||
| 756 | +| `domain` | string | 查询域(如 `default`、`title`、`brand` 等) | | ||
| 757 | +| `has_vector` | boolean | 是否生成了查询向量(用于语义检索) | | ||
| 758 | + | ||
| 740 | ### 4.3 SpuResult字段说明 | 759 | ### 4.3 SpuResult字段说明 |
| 741 | 760 | ||
| 742 | | 字段 | 类型 | 说明 | | 761 | | 字段 | 类型 | 说明 | |
docs/搜索推荐数据.md
| @@ -291,7 +291,7 @@ Shopify çš„ Search & Discovery/行为报表里,æœç´¢å¸¸çœ‹çš„就是 **query〠| @@ -291,7 +291,7 @@ Shopify çš„ Search & Discovery/行为报表里,æœç´¢å¸¸çœ‹çš„就是 **query〠| ||
| 291 | - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…) | 291 | - `dws_search_kpi_daily`(tenant_id, date, page_type/device/geo/new_vs_returning…) |
| 292 | - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase | 292 | - search_pv, search_uv, sessions_with_click, sessions_with_atc, sessions_with_purchase |
| 293 | - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency | 293 | - click_rate_session, purchase_rate_session, zero_result_rate, no_click_rate, p95_latency |
| 294 | -- `dws_query_daily`(tenant_id, date, normalized_query) | 294 | +- `dws_query_daily`(tenant_id, date, query_normalized) |
| 295 | - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt | 295 | - searches, exposure_uv, click_uv, purchase_uv, zero_cnt, no_click_cnt |
| 296 | - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version) | 296 | - `dws_rec_kpi_daily`(tenant_id, date, placement, algo_id/model_version) |
| 297 | - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression) | 297 | - impressions, clicks, ctr, atc, purchases, revenue, rpi(revenue per impression) |
example_usage.py
| @@ -32,7 +32,7 @@ def example_basic_usage(): | @@ -32,7 +32,7 @@ def example_basic_usage(): | ||
| 32 | # 存储查询分析结果 | 32 | # 存储查询分析结果 |
| 33 | context.store_query_analysis( | 33 | context.store_query_analysis( |
| 34 | original_query="红色连衣裙", | 34 | original_query="红色连衣裙", |
| 35 | - normalized_query="红色 连衣裙", | 35 | + query_normalized="红色 连衣裙", |
| 36 | rewritten_query="红色 女 连衣裙", | 36 | rewritten_query="红色 女 连衣裙", |
| 37 | detected_language="zh", | 37 | detected_language="zh", |
| 38 | translations={"en": "red dress"} | 38 | translations={"en": "red dress"} |
frontend/static/js/app.js
| @@ -747,7 +747,7 @@ function displayDebugInfo(data) { | @@ -747,7 +747,7 @@ function displayDebugInfo(data) { | ||
| 747 | if (debugInfo.query_analysis) { | 747 | if (debugInfo.query_analysis) { |
| 748 | html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>'; | 748 | html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>'; |
| 749 | html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; | 749 | html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; |
| 750 | - html += `<div>normalized_query: ${escapeHtml(debugInfo.query_analysis.normalized_query || 'N/A')}</div>`; | 750 | + html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`; |
| 751 | html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; | 751 | html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; |
| 752 | html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`; | 752 | html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`; |
| 753 | html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; | 753 | html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; |
query/query_parser.py
| @@ -26,7 +26,7 @@ class ParsedQuery: | @@ -26,7 +26,7 @@ class ParsedQuery: | ||
| 26 | def __init__( | 26 | def __init__( |
| 27 | self, | 27 | self, |
| 28 | original_query: str, | 28 | original_query: str, |
| 29 | - normalized_query: str, | 29 | + query_normalized: str, |
| 30 | rewritten_query: Optional[str] = None, | 30 | rewritten_query: Optional[str] = None, |
| 31 | detected_language: Optional[str] = None, | 31 | detected_language: Optional[str] = None, |
| 32 | translations: Dict[str, str] = None, | 32 | translations: Dict[str, str] = None, |
| @@ -38,8 +38,8 @@ class ParsedQuery: | @@ -38,8 +38,8 @@ class ParsedQuery: | ||
| 38 | is_long_query: bool = False | 38 | is_long_query: bool = False |
| 39 | ): | 39 | ): |
| 40 | self.original_query = original_query | 40 | self.original_query = original_query |
| 41 | - self.normalized_query = normalized_query | ||
| 42 | - self.rewritten_query = rewritten_query or normalized_query | 41 | + self.query_normalized = query_normalized |
| 42 | + self.rewritten_query = rewritten_query or query_normalized | ||
| 43 | self.detected_language = detected_language | 43 | self.detected_language = detected_language |
| 44 | self.translations = translations or {} | 44 | self.translations = translations or {} |
| 45 | self.query_vector = query_vector | 45 | self.query_vector = query_vector |
| @@ -54,7 +54,7 @@ class ParsedQuery: | @@ -54,7 +54,7 @@ class ParsedQuery: | ||
| 54 | """Convert to dictionary representation.""" | 54 | """Convert to dictionary representation.""" |
| 55 | result = { | 55 | result = { |
| 56 | "original_query": self.original_query, | 56 | "original_query": self.original_query, |
| 57 | - "normalized_query": self.normalized_query, | 57 | + "query_normalized": self.query_normalized, |
| 58 | "rewritten_query": self.rewritten_query, | 58 | "rewritten_query": self.rewritten_query, |
| 59 | "detected_language": self.detected_language, | 59 | "detected_language": self.detected_language, |
| 60 | "translations": self.translations, | 60 | "translations": self.translations, |
| @@ -195,7 +195,7 @@ class QueryParser: | @@ -195,7 +195,7 @@ class QueryParser: | ||
| 195 | normalized = self.normalizer.normalize(query) | 195 | normalized = self.normalizer.normalize(query) |
| 196 | log_debug(f"Normalization completed | '{query}' -> '{normalized}'") | 196 | log_debug(f"Normalization completed | '{query}' -> '{normalized}'") |
| 197 | if context: | 197 | if context: |
| 198 | - context.store_intermediate_result('normalized_query', normalized) | 198 | + context.store_intermediate_result('query_normalized', normalized) |
| 199 | 199 | ||
| 200 | # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike") | 200 | # Extract domain if present (e.g., "brand:Nike" -> domain="brand", query="Nike") |
| 201 | domain, query_text = self.normalizer.extract_domain_query(normalized) | 201 | domain, query_text = self.normalizer.extract_domain_query(normalized) |
| @@ -402,7 +402,7 @@ class QueryParser: | @@ -402,7 +402,7 @@ class QueryParser: | ||
| 402 | # Build result | 402 | # Build result |
| 403 | result = ParsedQuery( | 403 | result = ParsedQuery( |
| 404 | original_query=query, | 404 | original_query=query, |
| 405 | - normalized_query=normalized, | 405 | + query_normalized=normalized, |
| 406 | rewritten_query=rewritten, | 406 | rewritten_query=rewritten, |
| 407 | detected_language=detected_lang, | 407 | detected_language=detected_lang, |
| 408 | translations=translations, | 408 | translations=translations, |
search/searcher.py
| @@ -212,7 +212,7 @@ class Searcher: | @@ -212,7 +212,7 @@ class Searcher: | ||
| 212 | # Store query analysis results in context | 212 | # Store query analysis results in context |
| 213 | context.store_query_analysis( | 213 | context.store_query_analysis( |
| 214 | original_query=parsed_query.original_query, | 214 | original_query=parsed_query.original_query, |
| 215 | - normalized_query=parsed_query.normalized_query, | 215 | + query_normalized=parsed_query.query_normalized, |
| 216 | rewritten_query=parsed_query.rewritten_query, | 216 | rewritten_query=parsed_query.rewritten_query, |
| 217 | detected_language=parsed_query.detected_language, | 217 | detected_language=parsed_query.detected_language, |
| 218 | translations=parsed_query.translations, | 218 | translations=parsed_query.translations, |
| @@ -280,7 +280,7 @@ class Searcher: | @@ -280,7 +280,7 @@ class Searcher: | ||
| 280 | # No longer need to add tenant_id to filters since each tenant has its own index | 280 | # No longer need to add tenant_id to filters since each tenant has its own index |
| 281 | 281 | ||
| 282 | es_query = self.query_builder.build_query( | 282 | es_query = self.query_builder.build_query( |
| 283 | - query_text=parsed_query.rewritten_query or parsed_query.normalized_query, | 283 | + query_text=parsed_query.rewritten_query or parsed_query.query_normalized, |
| 284 | query_vector=parsed_query.query_vector if enable_embedding else None, | 284 | query_vector=parsed_query.query_vector if enable_embedding else None, |
| 285 | query_node=query_node, | 285 | query_node=query_node, |
| 286 | filters=filters, | 286 | filters=filters, |
| @@ -428,7 +428,7 @@ class Searcher: | @@ -428,7 +428,7 @@ class Searcher: | ||
| 428 | debug_info = { | 428 | debug_info = { |
| 429 | "query_analysis": { | 429 | "query_analysis": { |
| 430 | "original_query": context.query_analysis.original_query, | 430 | "original_query": context.query_analysis.original_query, |
| 431 | - "normalized_query": context.query_analysis.normalized_query, | 431 | + "query_normalized": context.query_analysis.query_normalized, |
| 432 | "rewritten_query": context.query_analysis.rewritten_query, | 432 | "rewritten_query": context.query_analysis.rewritten_query, |
| 433 | "detected_language": context.query_analysis.detected_language, | 433 | "detected_language": context.query_analysis.detected_language, |
| 434 | "translations": context.query_analysis.translations, | 434 | "translations": context.query_analysis.translations, |