Compare View
Commits (4)
-
query config/ranking config优化
-
sku_filter_dimension=color sku_filter_dimension=option1 / option2 /option3 以上两种方式都可以
-
后端请求模型变更(api/models.py) SearchRequest.sku_filter_dimension 从 Optional[str] 改为 Optional[List[str]]。 语义:列表表示一个或多个“维度标签”,例如: 单维度:["color"]、["option1"] 多维度:["color", "size"]、["option1", "option2"] 描述更新为:对 维度组合进行分组,每个组合只保留一个 SKU。 结果格式化与去重逻辑(api/result_formatter.py) ResultFormatter.format_search_results(..., sku_filter_dimension: Optional[List[str]] = None),调用处已同步更新。 单维度旧逻辑升级为多维度逻辑: 新方法:_filter_skus_by_dimensions(skus, dimensions, option1_name, option2_name, option3_name, specifications)。 维度解析规则(按顺序处理,并去重): 若维度是 option1 / option2 / option3 → 对应 option1_value / option2_value / option3_value。 否则,将维度字符串转小写后,分别与 option1_name / option2_name / option3_name 对比,相等则映射到对应的 option*_value。 未能映射到任何字段的维度会被忽略。 对每个 SKU: 按解析出的字段列表(例如 ["option1_value", "option2_value"])取值,组成 key,如 ("red", "L");None 用空串 ""。 按 key 分组,每个 key 只保留遇到的第一个 SKU。 若列表为空或所有维度都无法解析,则 不做过滤,返回原始 skus。 Searcher 参数类型同步(search/searcher.py) Searcher.search(...) 中 sku_filter_dimension 参数类型从 Optional[str] 改为 Optional[List[str]]。 传给 ResultFormatter.format_search_results 时,直接传该列表。 前端参数格式调整(frontend/static/js/app.js) 输入框 #skuFilterDimension 依旧是一个文本框,但解析方式改为: 函数 getSkuFilterDimension(): 读取文本,如:"color" 或 "color,size" 或 "option1, color"。 用逗号 , 拆分,trim() 后过滤空串,返回 字符串数组,例如: "color" → ["color"] "color,size" → ["color", "size"] 若最终数组为空,则返回 null。 搜索请求体中仍使用字段名 sku_filter_dimension,但现在值是 string[] 或 null: body: JSON.stringify({ // ... sku_filter_dimension: skuFilterDimension, // 例如 ["color", "size"] debug: state.debug }) 文档更新(docs/搜索API对接指南.md) 请求体示例中的类型由: "sku_filter_dimension": "string" 改为: "sku_filter_dimension": ["string"] 参数表中: 从 string 改为 array[string],说明为“维度列表,按组合分组,每个组合保留一个 SKU”。 功能说明章节“SKU筛选维度 (sku_filter_dimension)”已调整为 列表语义 + 组合去重,并补充了示例: 单维度: { "query": "芭比娃娃", "sku_filter_dimension": ["color"] } 多维度组合: { "query": "芭比娃娃", "sku_filter_dimension": ["color", "size"] } 使用方式总结 单维度去重(保持旧行为的等价写法) 旧:"sku_filter_dimension": "color" 新:"sku_filter_dimension": ["color"] 多维度组合去重(你新提的需求) 例如希望“每个 SPU 下,同一颜色+尺码组合只保留一个 SKU”: { "query": "芭比娃娃", "sku_filter_dimension": ["color", "size"] }
Showing
21 changed files
Show diff stats
api/app.py
| @@ -41,15 +41,16 @@ limiter = Limiter(key_func=get_remote_address) | @@ -41,15 +41,16 @@ limiter = Limiter(key_func=get_remote_address) | ||
| 41 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 41 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 42 | 42 | ||
| 43 | from config.env_config import ES_CONFIG | 43 | from config.env_config import ES_CONFIG |
| 44 | +from config import ConfigLoader | ||
| 44 | from utils import ESClient | 45 | from utils import ESClient |
| 45 | from search import Searcher | 46 | from search import Searcher |
| 46 | -from search.query_config import DEFAULT_INDEX_NAME | ||
| 47 | from query import QueryParser | 47 | from query import QueryParser |
| 48 | 48 | ||
| 49 | # Global instances | 49 | # Global instances |
| 50 | _es_client: Optional[ESClient] = None | 50 | _es_client: Optional[ESClient] = None |
| 51 | _searcher: Optional[Searcher] = None | 51 | _searcher: Optional[Searcher] = None |
| 52 | _query_parser: Optional[QueryParser] = None | 52 | _query_parser: Optional[QueryParser] = None |
| 53 | +_config = None | ||
| 53 | 54 | ||
| 54 | 55 | ||
| 55 | def init_service(es_host: str = "http://localhost:9200"): | 56 | def init_service(es_host: str = "http://localhost:9200"): |
| @@ -59,11 +60,17 @@ def init_service(es_host: str = "http://localhost:9200"): | @@ -59,11 +60,17 @@ def init_service(es_host: str = "http://localhost:9200"): | ||
| 59 | Args: | 60 | Args: |
| 60 | es_host: Elasticsearch host URL | 61 | es_host: Elasticsearch host URL |
| 61 | """ | 62 | """ |
| 62 | - global _es_client, _searcher, _query_parser | 63 | + global _es_client, _searcher, _query_parser, _config |
| 63 | 64 | ||
| 64 | start_time = time.time() | 65 | start_time = time.time() |
| 65 | logger.info("Initializing search service (multi-tenant)") | 66 | logger.info("Initializing search service (multi-tenant)") |
| 66 | 67 | ||
| 68 | + # Load configuration | ||
| 69 | + logger.info("Loading configuration...") | ||
| 70 | + config_loader = ConfigLoader("config/config.yaml") | ||
| 71 | + _config = config_loader.load_config() | ||
| 72 | + logger.info("Configuration loaded") | ||
| 73 | + | ||
| 67 | # Get ES credentials | 74 | # Get ES credentials |
| 68 | es_username = os.getenv('ES_USERNAME') or ES_CONFIG.get('username') | 75 | es_username = os.getenv('ES_USERNAME') or ES_CONFIG.get('username') |
| 69 | es_password = os.getenv('ES_PASSWORD') or ES_CONFIG.get('password') | 76 | es_password = os.getenv('ES_PASSWORD') or ES_CONFIG.get('password') |
| @@ -81,13 +88,13 @@ def init_service(es_host: str = "http://localhost:9200"): | @@ -81,13 +88,13 @@ def init_service(es_host: str = "http://localhost:9200"): | ||
| 81 | 88 | ||
| 82 | # Initialize components | 89 | # Initialize components |
| 83 | logger.info("Initializing query parser...") | 90 | logger.info("Initializing query parser...") |
| 84 | - _query_parser = QueryParser() | 91 | + _query_parser = QueryParser(_config) |
| 85 | 92 | ||
| 86 | logger.info("Initializing searcher...") | 93 | logger.info("Initializing searcher...") |
| 87 | - _searcher = Searcher(_es_client, _query_parser, index_name=DEFAULT_INDEX_NAME) | 94 | + _searcher = Searcher(_es_client, _config, _query_parser) |
| 88 | 95 | ||
| 89 | elapsed = time.time() - start_time | 96 | elapsed = time.time() - start_time |
| 90 | - logger.info(f"Search service ready! (took {elapsed:.2f}s) | Index: {DEFAULT_INDEX_NAME}") | 97 | + logger.info(f"Search service ready! (took {elapsed:.2f}s) | Index: {_config.es_index_name}") |
| 91 | 98 | ||
| 92 | 99 | ||
| 93 | 100 | ||
| @@ -113,6 +120,13 @@ def get_query_parser() -> QueryParser: | @@ -113,6 +120,13 @@ def get_query_parser() -> QueryParser: | ||
| 113 | return _query_parser | 120 | return _query_parser |
| 114 | 121 | ||
| 115 | 122 | ||
| 123 | +def get_config(): | ||
| 124 | + """Get global config instance.""" | ||
| 125 | + if _config is None: | ||
| 126 | + raise RuntimeError("Service not initialized") | ||
| 127 | + return _config | ||
| 128 | + | ||
| 129 | + | ||
| 116 | # Create FastAPI app with enhanced configuration | 130 | # Create FastAPI app with enhanced configuration |
| 117 | app = FastAPI( | 131 | app = FastAPI( |
| 118 | title="E-Commerce Search API", | 132 | title="E-Commerce Search API", |
api/models.py
| @@ -146,9 +146,14 @@ class SearchRequest(BaseModel): | @@ -146,9 +146,14 @@ class SearchRequest(BaseModel): | ||
| 146 | debug: bool = Field(False, description="是否返回调试信息") | 146 | debug: bool = Field(False, description="是否返回调试信息") |
| 147 | 147 | ||
| 148 | # SKU筛选参数 | 148 | # SKU筛选参数 |
| 149 | - sku_filter_dimension: Optional[str] = Field( | 149 | + sku_filter_dimension: Optional[List[str]] = Field( |
| 150 | None, | 150 | None, |
| 151 | - description="子SKU筛选维度(店铺配置)。指定后,每个SPU下的SKU将按该维度分组,每组选择第一个SKU返回。例如:'color'表示按颜色分组,每种颜色选一款。支持的值:'option1'、'option2'、'option3'或specifications中的name(如'color'、'size')" | 151 | + description=( |
| 152 | + "子SKU筛选维度(店铺配置),为字符串列表。" | ||
| 153 | + "指定后,每个SPU下的SKU将按这些维度的组合进行分组,每个维度组合只保留一个SKU返回。" | ||
| 154 | + "例如:['color'] 表示按颜色分组,每种颜色选一款;['color', 'size'] 表示按颜色+尺码组合分组。" | ||
| 155 | + "支持的值:'option1'、'option2'、'option3' 或选项名称(如 'color'、'size',将通过 option1_name/2_name/3_name 匹配)。" | ||
| 156 | + ) | ||
| 152 | ) | 157 | ) |
| 153 | 158 | ||
| 154 | # 个性化参数(预留) | 159 | # 个性化参数(预留) |
api/result_formatter.py
| @@ -14,7 +14,7 @@ class ResultFormatter: | @@ -14,7 +14,7 @@ class ResultFormatter: | ||
| 14 | es_hits: List[Dict[str, Any]], | 14 | es_hits: List[Dict[str, Any]], |
| 15 | max_score: float = 1.0, | 15 | max_score: float = 1.0, |
| 16 | language: str = "zh", | 16 | language: str = "zh", |
| 17 | - sku_filter_dimension: Optional[str] = None | 17 | + sku_filter_dimension: Optional[List[str]] = None |
| 18 | ) -> List[SpuResult]: | 18 | ) -> List[SpuResult]: |
| 19 | """ | 19 | """ |
| 20 | Convert ES hits to SpuResult list. | 20 | Convert ES hits to SpuResult list. |
| @@ -85,10 +85,10 @@ class ResultFormatter: | @@ -85,10 +85,10 @@ class ResultFormatter: | ||
| 85 | ) | 85 | ) |
| 86 | skus.append(sku) | 86 | skus.append(sku) |
| 87 | 87 | ||
| 88 | - # Apply SKU filtering if dimension is specified | 88 | + # Apply SKU filtering if dimension list is specified |
| 89 | if sku_filter_dimension and skus: | 89 | if sku_filter_dimension and skus: |
| 90 | - skus = ResultFormatter._filter_skus_by_dimension( | ||
| 91 | - skus, | 90 | + skus = ResultFormatter._filter_skus_by_dimensions( |
| 91 | + skus, | ||
| 92 | sku_filter_dimension, | 92 | sku_filter_dimension, |
| 93 | source.get('option1_name'), | 93 | source.get('option1_name'), |
| 94 | source.get('option2_name'), | 94 | source.get('option2_name'), |
| @@ -138,22 +138,22 @@ class ResultFormatter: | @@ -138,22 +138,22 @@ class ResultFormatter: | ||
| 138 | return results | 138 | return results |
| 139 | 139 | ||
| 140 | @staticmethod | 140 | @staticmethod |
| 141 | - def _filter_skus_by_dimension( | 141 | + def _filter_skus_by_dimensions( |
| 142 | skus: List[SkuResult], | 142 | skus: List[SkuResult], |
| 143 | - dimension: str, | 143 | + dimensions: List[str], |
| 144 | option1_name: Optional[str] = None, | 144 | option1_name: Optional[str] = None, |
| 145 | option2_name: Optional[str] = None, | 145 | option2_name: Optional[str] = None, |
| 146 | option3_name: Optional[str] = None, | 146 | option3_name: Optional[str] = None, |
| 147 | specifications: Optional[List[Dict[str, Any]]] = None | 147 | specifications: Optional[List[Dict[str, Any]]] = None |
| 148 | ) -> List[SkuResult]: | 148 | ) -> List[SkuResult]: |
| 149 | """ | 149 | """ |
| 150 | - Filter SKUs by dimension, keeping only one SKU per dimension value. | 150 | + Filter SKUs by one or more dimensions, keeping only one SKU per dimension value combination. |
| 151 | 151 | ||
| 152 | Args: | 152 | Args: |
| 153 | skus: List of SKU results to filter | 153 | skus: List of SKU results to filter |
| 154 | - dimension: Filter dimension, can be: | 154 | + dimensions: Filter dimensions, each dimension can be: |
| 155 | - 'option1', 'option2', 'option3': Direct option field | 155 | - 'option1', 'option2', 'option3': Direct option field |
| 156 | - - A specification name (e.g., 'color', 'size'): Match by option name | 156 | + - A specification/option name (e.g., 'color', 'size'): Match by option name |
| 157 | option1_name: Name of option1 (e.g., 'color') | 157 | option1_name: Name of option1 (e.g., 'color') |
| 158 | option2_name: Name of option2 (e.g., 'size') | 158 | option2_name: Name of option2 (e.g., 'size') |
| 159 | option3_name: Name of option3 | 159 | option3_name: Name of option3 |
| @@ -162,54 +162,59 @@ class ResultFormatter: | @@ -162,54 +162,59 @@ class ResultFormatter: | ||
| 162 | Returns: | 162 | Returns: |
| 163 | Filtered list of SKUs (one per dimension value) | 163 | Filtered list of SKUs (one per dimension value) |
| 164 | """ | 164 | """ |
| 165 | - if not skus: | 165 | + if not skus or not dimensions: |
| 166 | return skus | 166 | return skus |
| 167 | - | ||
| 168 | - # Determine which field to use for filtering | ||
| 169 | - filter_field = None | ||
| 170 | - | ||
| 171 | - # Direct option field (option1, option2, option3) | ||
| 172 | - if dimension.lower() == 'option1': | ||
| 173 | - filter_field = 'option1_value' | ||
| 174 | - elif dimension.lower() == 'option2': | ||
| 175 | - filter_field = 'option2_value' | ||
| 176 | - elif dimension.lower() == 'option3': | ||
| 177 | - filter_field = 'option3_value' | ||
| 178 | - else: | ||
| 179 | - # Try to match by option name | ||
| 180 | - dimension_lower = dimension.lower() | ||
| 181 | - if option1_name and option1_name.lower() == dimension_lower: | ||
| 182 | - filter_field = 'option1_value' | ||
| 183 | - elif option2_name and option2_name.lower() == dimension_lower: | ||
| 184 | - filter_field = 'option2_value' | ||
| 185 | - elif option3_name and option3_name.lower() == dimension_lower: | ||
| 186 | - filter_field = 'option3_value' | ||
| 187 | - | ||
| 188 | - # If no matching field found, return all SKUs (no filtering) | ||
| 189 | - if not filter_field: | ||
| 190 | - return skus | ||
| 191 | - | ||
| 192 | - # Group SKUs by dimension value and select first one from each group | ||
| 193 | - dimension_groups: Dict[str, SkuResult] = {} | ||
| 194 | - | 167 | + |
| 168 | + # Resolve each dimension to an underlying SKU field (option1_value / option2_value / option3_value) | ||
| 169 | + filter_fields: List[str] = [] | ||
| 170 | + | ||
| 171 | + for dim in dimensions: | ||
| 172 | + if not dim: | ||
| 173 | + continue | ||
| 174 | + dim_lower = dim.lower() | ||
| 175 | + | ||
| 176 | + field_name: Optional[str] = None | ||
| 177 | + # Direct option field (option1, option2, option3) | ||
| 178 | + if dim_lower == 'option1': | ||
| 179 | + field_name = 'option1_value' | ||
| 180 | + elif dim_lower == 'option2': | ||
| 181 | + field_name = 'option2_value' | ||
| 182 | + elif dim_lower == 'option3': | ||
| 183 | + field_name = 'option3_value' | ||
| 184 | + else: | ||
| 185 | + # Try to match by option name | ||
| 186 | + if option1_name and option1_name.lower() == dim_lower: | ||
| 187 | + field_name = 'option1_value' | ||
| 188 | + elif option2_name and option2_name.lower() == dim_lower: | ||
| 189 | + field_name = 'option2_value' | ||
| 190 | + elif option3_name and option3_name.lower() == dim_lower: | ||
| 191 | + field_name = 'option3_value' | ||
| 192 | + | ||
| 193 | + if field_name and field_name not in filter_fields: | ||
| 194 | + filter_fields.append(field_name) | ||
| 195 | + | ||
| 196 | + # If no matching field found for all dimensions, do not return any child SKUs | ||
| 197 | + if not filter_fields: | ||
| 198 | + return [] | ||
| 199 | + | ||
| 200 | + # Group SKUs by dimension value combination and select first one from each group | ||
| 201 | + dimension_groups: Dict[tuple, SkuResult] = {} | ||
| 202 | + | ||
| 195 | for sku in skus: | 203 | for sku in skus: |
| 196 | - # Get dimension value from the determined field | ||
| 197 | - dimension_value = None | ||
| 198 | - if filter_field == 'option1_value': | ||
| 199 | - dimension_value = sku.option1_value | ||
| 200 | - elif filter_field == 'option2_value': | ||
| 201 | - dimension_value = sku.option2_value | ||
| 202 | - elif filter_field == 'option3_value': | ||
| 203 | - dimension_value = sku.option3_value | ||
| 204 | - | ||
| 205 | - # Use empty string as key for None values | ||
| 206 | - key = str(dimension_value) if dimension_value is not None else '' | ||
| 207 | - | ||
| 208 | - # Keep first SKU for each dimension value | 204 | + # Build key as combination of all dimension values |
| 205 | + key_values: List[str] = [] | ||
| 206 | + for field in filter_fields: | ||
| 207 | + dimension_value = getattr(sku, field, None) | ||
| 208 | + # Use empty string as key part for None values | ||
| 209 | + key_values.append(str(dimension_value) if dimension_value is not None else '') | ||
| 210 | + | ||
| 211 | + key = tuple(key_values) | ||
| 212 | + | ||
| 213 | + # Keep first SKU for each dimension combination | ||
| 209 | if key not in dimension_groups: | 214 | if key not in dimension_groups: |
| 210 | dimension_groups[key] = sku | 215 | dimension_groups[key] = sku |
| 211 | - | ||
| 212 | - # Return filtered SKUs (one per dimension value) | 216 | + |
| 217 | + # Return filtered SKUs (one per dimension combination) | ||
| 213 | return list(dimension_groups.values()) | 218 | return list(dimension_groups.values()) |
| 214 | 219 | ||
| 215 | @staticmethod | 220 | @staticmethod |
api/routes/search.py
| @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: | @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: | ||
| 24 | # Try to get request ID from headers | 24 | # Try to get request ID from headers |
| 25 | reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] | 25 | reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] |
| 26 | 26 | ||
| 27 | - # Try to get user ID from headers or default to anonymous | ||
| 28 | - uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous' | 27 | + # Try to get user ID from headers; if not found, use "-1" for correlation |
| 28 | + uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1" | ||
| 29 | 29 | ||
| 30 | return reqid, uid | 30 | return reqid, uid |
| 31 | 31 | ||
| @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): | @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 70 | set_current_request_context(context) | 70 | set_current_request_context(context) |
| 71 | 71 | ||
| 72 | try: | 72 | try: |
| 73 | - # Log request start | 73 | + # Log request start (English logs, with key search parameters) |
| 74 | + client_ip = http_request.client.host if http_request.client else "unknown" | ||
| 75 | + user_agent = http_request.headers.get("User-Agent", "unknown")[:200] | ||
| 74 | context.logger.info( | 76 | context.logger.info( |
| 75 | - f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | " | ||
| 76 | - f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}", | 77 | + "Received search request | " |
| 78 | + f"Tenant: {tenant_id} | " | ||
| 79 | + f"Query: {request.query} | " | ||
| 80 | + f"IP: {client_ip} | " | ||
| 81 | + f"User agent: {user_agent} | " | ||
| 82 | + f"size: {request.size} | from: {request.from_} | " | ||
| 83 | + f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | " | ||
| 84 | + f"min_score: {request.min_score} | " | ||
| 85 | + f"language: {request.language} | " | ||
| 86 | + f"debug: {request.debug} | " | ||
| 87 | + f"sku_filter_dimension: {request.sku_filter_dimension} | " | ||
| 88 | + f"filters: {request.filters} | " | ||
| 89 | + f"range_filters: {request.range_filters} | " | ||
| 90 | + f"facets: {request.facets}", | ||
| 77 | extra={'reqid': context.reqid, 'uid': context.uid} | 91 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 78 | ) | 92 | ) |
| 79 | 93 | ||
| @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): | @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 121 | if context: | 135 | if context: |
| 122 | context.set_error(e) | 136 | context.set_error(e) |
| 123 | context.logger.error( | 137 | context.logger.error( |
| 124 | - f"搜索请求失败 | 错误: {str(e)}", | 138 | + f"Search request failed | error: {str(e)}", |
| 125 | extra={'reqid': context.reqid, 'uid': context.uid} | 139 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 126 | ) | 140 | ) |
| 127 | raise HTTPException(status_code=500, detail=str(e)) | 141 | raise HTTPException(status_code=500, detail=str(e)) |
| @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 164 | set_current_request_context(context) | 178 | set_current_request_context(context) |
| 165 | 179 | ||
| 166 | try: | 180 | try: |
| 167 | - # Log request start | 181 | + # Log request start for image search (English) |
| 182 | + client_ip = http_request.client.host if http_request.client else "unknown" | ||
| 168 | context.logger.info( | 183 | context.logger.info( |
| 169 | - f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | " | ||
| 170 | - f"IP: {http_request.client.host if http_request.client else 'unknown'}", | 184 | + "Received image search request | " |
| 185 | + f"Tenant: {tenant_id} | " | ||
| 186 | + f"Image URL: {request.image_url} | " | ||
| 187 | + f"IP: {client_ip}", | ||
| 171 | extra={'reqid': context.reqid, 'uid': context.uid} | 188 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 172 | ) | 189 | ) |
| 173 | 190 | ||
| @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 202 | if context: | 219 | if context: |
| 203 | context.set_error(e) | 220 | context.set_error(e) |
| 204 | context.logger.error( | 221 | context.logger.error( |
| 205 | - f"图片搜索请求参数错误 | 错误: {str(e)}", | 222 | + f"Image search request parameter error | error: {str(e)}", |
| 206 | extra={'reqid': context.reqid, 'uid': context.uid} | 223 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 207 | ) | 224 | ) |
| 208 | raise HTTPException(status_code=400, detail=str(e)) | 225 | raise HTTPException(status_code=400, detail=str(e)) |
| @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 210 | if context: | 227 | if context: |
| 211 | context.set_error(e) | 228 | context.set_error(e) |
| 212 | context.logger.error( | 229 | context.logger.error( |
| 213 | - f"图片搜索请求失败 | 错误: {str(e)}", | 230 | + f"Image search request failed | error: {str(e)}", |
| 214 | extra={'reqid': context.reqid, 'uid': context.uid} | 231 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 215 | ) | 232 | ) |
| 216 | raise HTTPException(status_code=500, detail=str(e)) | 233 | raise HTTPException(status_code=500, detail=str(e)) |
config/__init__.py
| @@ -23,6 +23,10 @@ from .config_loader import ( | @@ -23,6 +23,10 @@ from .config_loader import ( | ||
| 23 | RerankConfig, | 23 | RerankConfig, |
| 24 | ConfigurationError | 24 | ConfigurationError |
| 25 | ) | 25 | ) |
| 26 | +from .utils import ( | ||
| 27 | + get_match_fields_for_index, | ||
| 28 | + get_domain_fields | ||
| 29 | +) | ||
| 26 | 30 | ||
| 27 | __all__ = [ | 31 | __all__ = [ |
| 28 | # Field types | 32 | # Field types |
| @@ -46,4 +50,6 @@ __all__ = [ | @@ -46,4 +50,6 @@ __all__ = [ | ||
| 46 | 'FunctionScoreConfig', | 50 | 'FunctionScoreConfig', |
| 47 | 'RerankConfig', | 51 | 'RerankConfig', |
| 48 | 'ConfigurationError', | 52 | 'ConfigurationError', |
| 53 | + 'get_match_fields_for_index', | ||
| 54 | + 'get_domain_fields', | ||
| 49 | ] | 55 | ] |
config/config.yaml
| @@ -412,6 +412,11 @@ query_config: | @@ -412,6 +412,11 @@ query_config: | ||
| 412 | text_embedding_field: "title_embedding" # Field name for text embeddings | 412 | text_embedding_field: "title_embedding" # Field name for text embeddings |
| 413 | image_embedding_field: null # Field name for image embeddings (if not set, will auto-detect) | 413 | image_embedding_field: null # Field name for image embeddings (if not set, will auto-detect) |
| 414 | 414 | ||
| 415 | + # Embedding disable thresholds (disable vector search for short queries) | ||
| 416 | + embedding_disable_thresholds: | ||
| 417 | + chinese_char_limit: 4 # Disable embedding for Chinese queries with <= 4 characters | ||
| 418 | + english_word_limit: 3 # Disable embedding for English queries with <= 3 words | ||
| 419 | + | ||
| 415 | # Translation API (DeepL) | 420 | # Translation API (DeepL) |
| 416 | translation_service: "deepl" | 421 | translation_service: "deepl" |
| 417 | translation_api_key: null # Set via environment variable | 422 | translation_api_key: null # Set via environment variable |
config/config_loader.py
| @@ -58,6 +58,10 @@ class QueryConfig: | @@ -58,6 +58,10 @@ class QueryConfig: | ||
| 58 | text_embedding_field: Optional[str] = None # Field name for text embeddings (e.g., "title_embedding") | 58 | text_embedding_field: Optional[str] = None # Field name for text embeddings (e.g., "title_embedding") |
| 59 | image_embedding_field: Optional[str] = None # Field name for image embeddings (e.g., "image_embedding") | 59 | image_embedding_field: Optional[str] = None # Field name for image embeddings (e.g., "image_embedding") |
| 60 | 60 | ||
| 61 | + # Embedding disable thresholds (disable vector search for short queries) | ||
| 62 | + embedding_disable_chinese_char_limit: int = 4 # Disable embedding for Chinese queries with <= this many characters | ||
| 63 | + embedding_disable_english_word_limit: int = 3 # Disable embedding for English queries with <= this many words | ||
| 64 | + | ||
| 61 | # ES source fields configuration - fields to return in search results | 65 | # ES source fields configuration - fields to return in search results |
| 62 | # If None, auto-collect from field configs (fields with return_in_source=True) | 66 | # If None, auto-collect from field configs (fields with return_in_source=True) |
| 63 | # If empty list, return all fields. Otherwise, only return specified fields. | 67 | # If empty list, return all fields. Otherwise, only return specified fields. |
| @@ -165,15 +169,18 @@ class ConfigLoader: | @@ -165,15 +169,18 @@ class ConfigLoader: | ||
| 165 | 169 | ||
| 166 | return rewrite_dict | 170 | return rewrite_dict |
| 167 | 171 | ||
| 168 | - def load_config(self) -> SearchConfig: | 172 | + def load_config(self, validate: bool = True) -> SearchConfig: |
| 169 | """ | 173 | """ |
| 170 | Load unified configuration from YAML file. | 174 | Load unified configuration from YAML file. |
| 171 | 175 | ||
| 176 | + Args: | ||
| 177 | + validate: Whether to validate configuration after loading (default: True) | ||
| 178 | + | ||
| 172 | Returns: | 179 | Returns: |
| 173 | SearchConfig object | 180 | SearchConfig object |
| 174 | 181 | ||
| 175 | Raises: | 182 | Raises: |
| 176 | - ConfigurationError: If config file not found or invalid | 183 | + ConfigurationError: If config file not found, invalid, or validation fails |
| 177 | """ | 184 | """ |
| 178 | if not self.config_file.exists(): | 185 | if not self.config_file.exists(): |
| 179 | raise ConfigurationError(f"Configuration file not found: {self.config_file}") | 186 | raise ConfigurationError(f"Configuration file not found: {self.config_file}") |
| @@ -184,7 +191,16 @@ class ConfigLoader: | @@ -184,7 +191,16 @@ class ConfigLoader: | ||
| 184 | except yaml.YAMLError as e: | 191 | except yaml.YAMLError as e: |
| 185 | raise ConfigurationError(f"Invalid YAML in {self.config_file}: {e}") | 192 | raise ConfigurationError(f"Invalid YAML in {self.config_file}: {e}") |
| 186 | 193 | ||
| 187 | - return self._parse_config(config_data) | 194 | + config = self._parse_config(config_data) |
| 195 | + | ||
| 196 | + # Auto-validate configuration | ||
| 197 | + if validate: | ||
| 198 | + errors = self.validate_config(config) | ||
| 199 | + if errors: | ||
| 200 | + error_msg = "Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors) | ||
| 201 | + raise ConfigurationError(error_msg) | ||
| 202 | + | ||
| 203 | + return config | ||
| 188 | 204 | ||
| 189 | def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig: | 205 | def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig: |
| 190 | """Parse configuration dictionary into SearchConfig object.""" | 206 | """Parse configuration dictionary into SearchConfig object.""" |
| @@ -214,43 +230,48 @@ class ConfigLoader: | @@ -214,43 +230,48 @@ class ConfigLoader: | ||
| 214 | if field.return_in_source | 230 | if field.return_in_source |
| 215 | ] | 231 | ] |
| 216 | 232 | ||
| 233 | + # Parse embedding disable thresholds | ||
| 234 | + embedding_thresholds = query_config_data.get("embedding_disable_thresholds", {}) | ||
| 235 | + | ||
| 217 | query_config = QueryConfig( | 236 | query_config = QueryConfig( |
| 218 | - supported_languages=query_config_data.get("supported_languages", ["zh", "en"]), | ||
| 219 | - default_language=query_config_data.get("default_language", "zh"), | 237 | + supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], |
| 238 | + default_language=query_config_data.get("default_language") or "zh", | ||
| 220 | enable_translation=query_config_data.get("enable_translation", True), | 239 | enable_translation=query_config_data.get("enable_translation", True), |
| 221 | enable_text_embedding=query_config_data.get("enable_text_embedding", True), | 240 | enable_text_embedding=query_config_data.get("enable_text_embedding", True), |
| 222 | enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), | 241 | enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), |
| 223 | rewrite_dictionary=rewrite_dictionary, | 242 | rewrite_dictionary=rewrite_dictionary, |
| 224 | translation_api_key=query_config_data.get("translation_api_key"), | 243 | translation_api_key=query_config_data.get("translation_api_key"), |
| 225 | - translation_service=query_config_data.get("translation_service", "deepl"), | 244 | + translation_service=query_config_data.get("translation_service") or "deepl", |
| 226 | translation_glossary_id=query_config_data.get("translation_glossary_id"), | 245 | translation_glossary_id=query_config_data.get("translation_glossary_id"), |
| 227 | - translation_context=query_config_data.get("translation_context", "e-commerce product search"), | 246 | + translation_context=query_config_data.get("translation_context") or "e-commerce product search", |
| 228 | text_embedding_field=query_config_data.get("text_embedding_field"), | 247 | text_embedding_field=query_config_data.get("text_embedding_field"), |
| 229 | image_embedding_field=query_config_data.get("image_embedding_field"), | 248 | image_embedding_field=query_config_data.get("image_embedding_field"), |
| 249 | + embedding_disable_chinese_char_limit=embedding_thresholds.get("chinese_char_limit", 4), | ||
| 250 | + embedding_disable_english_word_limit=embedding_thresholds.get("english_word_limit", 3), | ||
| 230 | source_fields=source_fields | 251 | source_fields=source_fields |
| 231 | ) | 252 | ) |
| 232 | 253 | ||
| 233 | # Parse ranking config | 254 | # Parse ranking config |
| 234 | ranking_data = config_data.get("ranking", {}) | 255 | ranking_data = config_data.get("ranking", {}) |
| 235 | ranking = RankingConfig( | 256 | ranking = RankingConfig( |
| 236 | - expression=ranking_data.get("expression", "bm25() + 0.2*text_embedding_relevance()"), | ||
| 237 | - description=ranking_data.get("description", "Default BM25 + text embedding ranking") | 257 | + expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()", |
| 258 | + description=ranking_data.get("description") or "Default BM25 + text embedding ranking" | ||
| 238 | ) | 259 | ) |
| 239 | 260 | ||
| 240 | # Parse Function Score configuration | 261 | # Parse Function Score configuration |
| 241 | fs_data = config_data.get("function_score", {}) | 262 | fs_data = config_data.get("function_score", {}) |
| 242 | function_score = FunctionScoreConfig( | 263 | function_score = FunctionScoreConfig( |
| 243 | - score_mode=fs_data.get("score_mode", "sum"), | ||
| 244 | - boost_mode=fs_data.get("boost_mode", "multiply"), | ||
| 245 | - functions=fs_data.get("functions", []) | 264 | + score_mode=fs_data.get("score_mode") or "sum", |
| 265 | + boost_mode=fs_data.get("boost_mode") or "multiply", | ||
| 266 | + functions=fs_data.get("functions") or [] | ||
| 246 | ) | 267 | ) |
| 247 | 268 | ||
| 248 | # Parse Rerank configuration | 269 | # Parse Rerank configuration |
| 249 | rerank_data = config_data.get("rerank", {}) | 270 | rerank_data = config_data.get("rerank", {}) |
| 250 | rerank = RerankConfig( | 271 | rerank = RerankConfig( |
| 251 | enabled=rerank_data.get("enabled", False), | 272 | enabled=rerank_data.get("enabled", False), |
| 252 | - expression=rerank_data.get("expression", ""), | ||
| 253 | - description=rerank_data.get("description", "") | 273 | + expression=rerank_data.get("expression") or "", |
| 274 | + description=rerank_data.get("description") or "" | ||
| 254 | ) | 275 | ) |
| 255 | 276 | ||
| 256 | # Parse SPU config | 277 | # Parse SPU config |
| @@ -447,21 +468,43 @@ class ConfigLoader: | @@ -447,21 +468,43 @@ class ConfigLoader: | ||
| 447 | output_path = Path(output_path) | 468 | output_path = Path(output_path) |
| 448 | 469 | ||
| 449 | # Convert config back to dictionary format | 470 | # Convert config back to dictionary format |
| 471 | + query_config_dict = { | ||
| 472 | + "supported_languages": config.query_config.supported_languages, | ||
| 473 | + "default_language": config.query_config.default_language, | ||
| 474 | + "enable_translation": config.query_config.enable_translation, | ||
| 475 | + "enable_text_embedding": config.query_config.enable_text_embedding, | ||
| 476 | + "enable_query_rewrite": config.query_config.enable_query_rewrite, | ||
| 477 | + "translation_service": config.query_config.translation_service, | ||
| 478 | + } | ||
| 479 | + | ||
| 480 | + # Add optional fields only if they are set | ||
| 481 | + if config.query_config.translation_api_key: | ||
| 482 | + query_config_dict["translation_api_key"] = config.query_config.translation_api_key | ||
| 483 | + if config.query_config.translation_glossary_id: | ||
| 484 | + query_config_dict["translation_glossary_id"] = config.query_config.translation_glossary_id | ||
| 485 | + if config.query_config.translation_context: | ||
| 486 | + query_config_dict["translation_context"] = config.query_config.translation_context | ||
| 487 | + if config.query_config.text_embedding_field: | ||
| 488 | + query_config_dict["text_embedding_field"] = config.query_config.text_embedding_field | ||
| 489 | + if config.query_config.image_embedding_field: | ||
| 490 | + query_config_dict["image_embedding_field"] = config.query_config.image_embedding_field | ||
| 491 | + if config.query_config.source_fields: | ||
| 492 | + query_config_dict["source_fields"] = config.query_config.source_fields | ||
| 493 | + | ||
| 494 | + # Add embedding disable thresholds | ||
| 495 | + if (config.query_config.embedding_disable_chinese_char_limit != 4 or | ||
| 496 | + config.query_config.embedding_disable_english_word_limit != 3): | ||
| 497 | + query_config_dict["embedding_disable_thresholds"] = { | ||
| 498 | + "chinese_char_limit": config.query_config.embedding_disable_chinese_char_limit, | ||
| 499 | + "english_word_limit": config.query_config.embedding_disable_english_word_limit | ||
| 500 | + } | ||
| 501 | + | ||
| 450 | config_dict = { | 502 | config_dict = { |
| 451 | "es_index_name": config.es_index_name, | 503 | "es_index_name": config.es_index_name, |
| 452 | "es_settings": config.es_settings, | 504 | "es_settings": config.es_settings, |
| 453 | "fields": [self._field_to_dict(field) for field in config.fields], | 505 | "fields": [self._field_to_dict(field) for field in config.fields], |
| 454 | "indexes": [self._index_to_dict(index) for index in config.indexes], | 506 | "indexes": [self._index_to_dict(index) for index in config.indexes], |
| 455 | - "query_config": { | ||
| 456 | - "supported_languages": config.query_config.supported_languages, | ||
| 457 | - "default_language": config.query_config.default_language, | ||
| 458 | - "enable_translation": config.query_config.enable_translation, | ||
| 459 | - "enable_text_embedding": config.query_config.enable_text_embedding, | ||
| 460 | - "enable_query_rewrite": config.query_config.enable_query_rewrite, | ||
| 461 | - # rewrite_dictionary is stored in separate file, not in config | ||
| 462 | - "translation_api_key": config.query_config.translation_api_key, | ||
| 463 | - "translation_service": config.query_config.translation_service, | ||
| 464 | - }, | 507 | + "query_config": query_config_dict, |
| 465 | "ranking": { | 508 | "ranking": { |
| 466 | "expression": config.ranking.expression, | 509 | "expression": config.ranking.expression, |
| 467 | "description": config.ranking.description | 510 | "description": config.ranking.description |
| @@ -505,7 +548,7 @@ class ConfigLoader: | @@ -505,7 +548,7 @@ class ConfigLoader: | ||
| 505 | f.write(f"{key}\t{value}\n") | 548 | f.write(f"{key}\t{value}\n") |
| 506 | 549 | ||
| 507 | def _field_to_dict(self, field: FieldConfig) -> Dict[str, Any]: | 550 | def _field_to_dict(self, field: FieldConfig) -> Dict[str, Any]: |
| 508 | - """Convert FieldConfig to dictionary.""" | 551 | + """Convert FieldConfig to dictionary, preserving all fields.""" |
| 509 | result = { | 552 | result = { |
| 510 | "name": field.name, | 553 | "name": field.name, |
| 511 | "type": field.field_type.value, | 554 | "type": field.field_type.value, |
| @@ -513,36 +556,49 @@ class ConfigLoader: | @@ -513,36 +556,49 @@ class ConfigLoader: | ||
| 513 | "boost": field.boost, | 556 | "boost": field.boost, |
| 514 | "store": field.store, | 557 | "store": field.store, |
| 515 | "index": field.index, | 558 | "index": field.index, |
| 559 | + "return_in_source": field.return_in_source, | ||
| 516 | } | 560 | } |
| 517 | 561 | ||
| 562 | + # Add optional fields only if they differ from defaults or are set | ||
| 518 | if field.analyzer: | 563 | if field.analyzer: |
| 519 | result["analyzer"] = field.analyzer.value | 564 | result["analyzer"] = field.analyzer.value |
| 520 | if field.search_analyzer: | 565 | if field.search_analyzer: |
| 521 | result["search_analyzer"] = field.search_analyzer.value | 566 | result["search_analyzer"] = field.search_analyzer.value |
| 522 | if field.multi_language: | 567 | if field.multi_language: |
| 523 | result["multi_language"] = field.multi_language | 568 | result["multi_language"] = field.multi_language |
| 524 | - result["languages"] = field.languages | 569 | + if field.languages: |
| 570 | + result["languages"] = field.languages | ||
| 525 | if field.embedding_dims != 1024: | 571 | if field.embedding_dims != 1024: |
| 526 | result["embedding_dims"] = field.embedding_dims | 572 | result["embedding_dims"] = field.embedding_dims |
| 527 | if field.embedding_similarity != "dot_product": | 573 | if field.embedding_similarity != "dot_product": |
| 528 | result["embedding_similarity"] = field.embedding_similarity | 574 | result["embedding_similarity"] = field.embedding_similarity |
| 529 | if field.nested: | 575 | if field.nested: |
| 530 | result["nested"] = field.nested | 576 | result["nested"] = field.nested |
| 531 | - result["nested_properties"] = field.nested_properties | 577 | + if field.nested_properties: |
| 578 | + result["nested_properties"] = field.nested_properties | ||
| 579 | + if field.keyword_subfield: | ||
| 580 | + result["keyword_subfield"] = field.keyword_subfield | ||
| 581 | + if field.keyword_ignore_above != 256: | ||
| 582 | + result["keyword_ignore_above"] = field.keyword_ignore_above | ||
| 583 | + if field.keyword_normalizer: | ||
| 584 | + result["keyword_normalizer"] = field.keyword_normalizer | ||
| 532 | 585 | ||
| 533 | return result | 586 | return result |
| 534 | 587 | ||
| 535 | def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]: | 588 | def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]: |
| 536 | - """Convert IndexConfig to dictionary.""" | 589 | + """Convert IndexConfig to dictionary, preserving all fields.""" |
| 537 | result = { | 590 | result = { |
| 538 | "name": index.name, | 591 | "name": index.name, |
| 539 | "label": index.label, | 592 | "label": index.label, |
| 540 | "fields": index.fields, | 593 | "fields": index.fields, |
| 541 | "analyzer": index.analyzer.value, | 594 | "analyzer": index.analyzer.value, |
| 542 | - "boost": index.boost, | ||
| 543 | - "example": index.example | ||
| 544 | } | 595 | } |
| 545 | - | 596 | + |
| 597 | + # Add optional fields only if they differ from defaults or are set | ||
| 598 | + if index.boost != 1.0: | ||
| 599 | + result["boost"] = index.boost | ||
| 600 | + if index.example: | ||
| 601 | + result["example"] = index.example | ||
| 546 | if index.language_field_mapping: | 602 | if index.language_field_mapping: |
| 547 | result["language_field_mapping"] = index.language_field_mapping | 603 | result["language_field_mapping"] = index.language_field_mapping |
| 548 | 604 |
| @@ -0,0 +1,70 @@ | @@ -0,0 +1,70 @@ | ||
| 1 | +""" | ||
| 2 | +Configuration utility functions. | ||
| 3 | + | ||
| 4 | +Helper functions for working with SearchConfig objects. | ||
| 5 | +""" | ||
| 6 | + | ||
| 7 | +from typing import Dict, List | ||
| 8 | +from .config_loader import SearchConfig | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +def get_match_fields_for_index(config: SearchConfig, index_name: str = "default") -> List[str]: | ||
| 12 | + """ | ||
| 13 | + Generate match fields list with boost from IndexConfig and FieldConfig. | ||
| 14 | + | ||
| 15 | + Args: | ||
| 16 | + config: SearchConfig instance | ||
| 17 | + index_name: Name of the index domain (default: "default") | ||
| 18 | + | ||
| 19 | + Returns: | ||
| 20 | + List of field names with boost, e.g., ["title_zh^3.0", "brief_zh^1.5"] | ||
| 21 | + """ | ||
| 22 | + # Find the index config | ||
| 23 | + index_config = None | ||
| 24 | + for idx in config.indexes: | ||
| 25 | + if idx.name == index_name: | ||
| 26 | + index_config = idx | ||
| 27 | + break | ||
| 28 | + | ||
| 29 | + if not index_config: | ||
| 30 | + return [] | ||
| 31 | + | ||
| 32 | + # Create a field name to FieldConfig mapping | ||
| 33 | + field_map = {field.name: field for field in config.fields} | ||
| 34 | + | ||
| 35 | + # Generate match fields with boost | ||
| 36 | + match_fields = [] | ||
| 37 | + for field_name in index_config.fields: | ||
| 38 | + field_config = field_map.get(field_name) | ||
| 39 | + if field_config: | ||
| 40 | + # Combine index boost and field boost | ||
| 41 | + total_boost = index_config.boost * field_config.boost | ||
| 42 | + if total_boost != 1.0: | ||
| 43 | + match_fields.append(f"{field_name}^{total_boost}") | ||
| 44 | + else: | ||
| 45 | + match_fields.append(field_name) | ||
| 46 | + else: | ||
| 47 | + # Field not found in config, use index boost only | ||
| 48 | + if index_config.boost != 1.0: | ||
| 49 | + match_fields.append(f"{field_name}^{index_config.boost}") | ||
| 50 | + else: | ||
| 51 | + match_fields.append(field_name) | ||
| 52 | + | ||
| 53 | + return match_fields | ||
| 54 | + | ||
| 55 | + | ||
| 56 | +def get_domain_fields(config: SearchConfig) -> Dict[str, List[str]]: | ||
| 57 | + """ | ||
| 58 | + Generate domain-specific match fields from all index configs. | ||
| 59 | + | ||
| 60 | + Args: | ||
| 61 | + config: SearchConfig instance | ||
| 62 | + | ||
| 63 | + Returns: | ||
| 64 | + Dictionary mapping domain name to list of match fields | ||
| 65 | + """ | ||
| 66 | + domain_fields = {} | ||
| 67 | + for index_config in config.indexes: | ||
| 68 | + domain_fields[index_config.name] = get_match_fields_for_index(config, index_config.name) | ||
| 69 | + return domain_fields | ||
| 70 | + |
context/request_context.py
| @@ -59,9 +59,10 @@ class RequestContext: | @@ -59,9 +59,10 @@ class RequestContext: | ||
| 59 | """ | 59 | """ |
| 60 | 60 | ||
| 61 | def __init__(self, reqid: str = None, uid: str = None): | 61 | def __init__(self, reqid: str = None, uid: str = None): |
| 62 | - # 生成唯一请求ID | 62 | + # 生成唯一请求ID;如果外部未提供,则自动生成 |
| 63 | + # 如果无法获取到 uid,则使用 "-1" 作为占位,用于日志关联 | ||
| 63 | self.reqid = reqid or str(uuid.uuid4())[:8] | 64 | self.reqid = reqid or str(uuid.uuid4())[:8] |
| 64 | - self.uid = uid or 'anonymous' | 65 | + self.uid = uid or "-1" |
| 65 | 66 | ||
| 66 | # 查询分析结果 | 67 | # 查询分析结果 |
| 67 | self.query_analysis = QueryAnalysisResult() | 68 | self.query_analysis = QueryAnalysisResult() |
| @@ -111,7 +112,10 @@ class RequestContext: | @@ -111,7 +112,10 @@ class RequestContext: | ||
| 111 | """ | 112 | """ |
| 112 | start_time = time.time() | 113 | start_time = time.time() |
| 113 | self.performance_metrics.stage_start_times[stage.value] = start_time | 114 | self.performance_metrics.stage_start_times[stage.value] = start_time |
| 114 | - self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) | 115 | + self.logger.debug( |
| 116 | + f"Start stage | {stage.value}", | ||
| 117 | + extra={'reqid': self.reqid, 'uid': self.uid} | ||
| 118 | + ) | ||
| 115 | return start_time | 119 | return start_time |
| 116 | 120 | ||
| 117 | def end_stage(self, stage: RequestContextStage) -> float: | 121 | def end_stage(self, stage: RequestContextStage) -> float: |
| @@ -125,7 +129,10 @@ class RequestContext: | @@ -125,7 +129,10 @@ class RequestContext: | ||
| 125 | 阶段耗时(毫秒) | 129 | 阶段耗时(毫秒) |
| 126 | """ | 130 | """ |
| 127 | if stage.value not in self.performance_metrics.stage_start_times: | 131 | if stage.value not in self.performance_metrics.stage_start_times: |
| 128 | - self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) | 132 | + self.logger.warning( |
| 133 | + f"Stage not started | {stage.value}", | ||
| 134 | + extra={'reqid': self.reqid, 'uid': self.uid} | ||
| 135 | + ) | ||
| 129 | return 0.0 | 136 | return 0.0 |
| 130 | 137 | ||
| 131 | start_time = self.performance_metrics.stage_start_times[stage.value] | 138 | start_time = self.performance_metrics.stage_start_times[stage.value] |
| @@ -133,7 +140,7 @@ class RequestContext: | @@ -133,7 +140,7 @@ class RequestContext: | ||
| 133 | self.performance_metrics.stage_timings[stage.value] = duration_ms | 140 | self.performance_metrics.stage_timings[stage.value] = duration_ms |
| 134 | 141 | ||
| 135 | self.logger.debug( | 142 | self.logger.debug( |
| 136 | - f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms", | 143 | + f"End stage | {stage.value} | duration: {duration_ms:.2f}ms", |
| 137 | extra={'reqid': self.reqid, 'uid': self.uid} | 144 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 138 | ) | 145 | ) |
| 139 | return duration_ms | 146 | return duration_ms |
| @@ -162,7 +169,7 @@ class RequestContext: | @@ -162,7 +169,7 @@ class RequestContext: | ||
| 162 | setattr(self.query_analysis, key, value) | 169 | setattr(self.query_analysis, key, value) |
| 163 | else: | 170 | else: |
| 164 | self.logger.warning( | 171 | self.logger.warning( |
| 165 | - f"未知的查询分析字段 | {key}", | 172 | + f"Unknown query analysis field | {key}", |
| 166 | extra={'reqid': self.reqid, 'uid': self.uid} | 173 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 167 | ) | 174 | ) |
| 168 | 175 | ||
| @@ -175,7 +182,10 @@ class RequestContext: | @@ -175,7 +182,10 @@ class RequestContext: | ||
| 175 | value: 结果值 | 182 | value: 结果值 |
| 176 | """ | 183 | """ |
| 177 | self.intermediate_results[key] = value | 184 | self.intermediate_results[key] = value |
| 178 | - self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid}) | 185 | + self.logger.debug( |
| 186 | + f"Store intermediate result | {key}", | ||
| 187 | + extra={'reqid': self.reqid, 'uid': self.uid} | ||
| 188 | + ) | ||
| 179 | 189 | ||
| 180 | def get_intermediate_result(self, key: str, default: Any = None) -> Any: | 190 | def get_intermediate_result(self, key: str, default: Any = None) -> Any: |
| 181 | """ | 191 | """ |
| @@ -213,7 +223,7 @@ class RequestContext: | @@ -213,7 +223,7 @@ class RequestContext: | ||
| 213 | 'details': {} | 223 | 'details': {} |
| 214 | } | 224 | } |
| 215 | self.logger.error( | 225 | self.logger.error( |
| 216 | - f"设置错误信息 | {type(error).__name__}: {str(error)}", | 226 | + f"Set error info | {type(error).__name__}: {str(error)}", |
| 217 | extra={'reqid': self.reqid, 'uid': self.uid} | 227 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 218 | ) | 228 | ) |
| 219 | 229 | ||
| @@ -286,13 +296,13 @@ class RequestContext: | @@ -286,13 +296,13 @@ class RequestContext: | ||
| 286 | 296 | ||
| 287 | # 构建详细的日志消息 | 297 | # 构建详细的日志消息 |
| 288 | msg_parts = [ | 298 | msg_parts = [ |
| 289 | - f"搜索请求性能摘要 | reqid: {self.reqid}", | ||
| 290 | - f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms" | 299 | + f"Search request performance summary | reqid: {self.reqid}", |
| 300 | + f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms" | ||
| 291 | ] | 301 | ] |
| 292 | 302 | ||
| 293 | # 添加各阶段耗时 | 303 | # 添加各阶段耗时 |
| 294 | if summary['performance']['stage_timings_ms']: | 304 | if summary['performance']['stage_timings_ms']: |
| 295 | - msg_parts.append("阶段耗时:") | 305 | + msg_parts.append("Stage durations:") |
| 296 | for stage, duration in summary['performance']['stage_timings_ms'].items(): | 306 | for stage, duration in summary['performance']['stage_timings_ms'].items(): |
| 297 | percentage = summary['performance']['stage_percentages'].get(stage, 0) | 307 | percentage = summary['performance']['stage_percentages'].get(stage, 0) |
| 298 | msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") | 308 | msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") |
| @@ -300,25 +310,26 @@ class RequestContext: | @@ -300,25 +310,26 @@ class RequestContext: | ||
| 300 | # 添加查询信息 | 310 | # 添加查询信息 |
| 301 | if summary['query_analysis']['original_query']: | 311 | if summary['query_analysis']['original_query']: |
| 302 | msg_parts.append( | 312 | msg_parts.append( |
| 303 | - f"查询: '{summary['query_analysis']['original_query']}' " | 313 | + "Query: " |
| 314 | + f"'{summary['query_analysis']['original_query']}' " | ||
| 304 | f"-> '{summary['query_analysis']['rewritten_query']}' " | 315 | f"-> '{summary['query_analysis']['rewritten_query']}' " |
| 305 | f"({summary['query_analysis']['detected_language']})" | 316 | f"({summary['query_analysis']['detected_language']})" |
| 306 | ) | 317 | ) |
| 307 | 318 | ||
| 308 | # 添加结果统计 | 319 | # 添加结果统计 |
| 309 | msg_parts.append( | 320 | msg_parts.append( |
| 310 | - f"结果: {summary['results']['total_hits']} hits " | ||
| 311 | - f"ES查询: {summary['results']['es_query_size']} chars" | 321 | + f"Results: {summary['results']['total_hits']} hits " |
| 322 | + f"ES query size: {summary['results']['es_query_size']} chars" | ||
| 312 | ) | 323 | ) |
| 313 | 324 | ||
| 314 | # 添加错误信息(如果有) | 325 | # 添加错误信息(如果有) |
| 315 | if summary['request_info']['has_error']: | 326 | if summary['request_info']['has_error']: |
| 316 | error_info = self.metadata['error_info'] | 327 | error_info = self.metadata['error_info'] |
| 317 | - msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}") | 328 | + msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}") |
| 318 | 329 | ||
| 319 | # 添加警告信息(如果有) | 330 | # 添加警告信息(如果有) |
| 320 | if summary['request_info']['warnings_count'] > 0: | 331 | if summary['request_info']['warnings_count'] > 0: |
| 321 | - msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个") | 332 | + msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}") |
| 322 | 333 | ||
| 323 | log_message = " | ".join(msg_parts) | 334 | log_message = " | ".join(msg_parts) |
| 324 | 335 |
docs/常用查询 - ES.md
| @@ -17,4 +17,19 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -17,4 +17,19 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 17 | ] | 17 | ] |
| 18 | } | 18 | } |
| 19 | } | 19 | } |
| 20 | - }' | ||
| 21 | \ No newline at end of file | 20 | \ No newline at end of file |
| 21 | + }' | ||
| 22 | + | ||
| 23 | + | ||
| 24 | + | ||
| 25 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 26 | + "size": 5, | ||
| 27 | + "query": { | ||
| 28 | + "bool": { | ||
| 29 | + "filter": [ | ||
| 30 | + { "term": { "spu_id": "74123" } } | ||
| 31 | + ] | ||
| 32 | + } | ||
| 33 | + } | ||
| 34 | + }' | ||
| 35 | + | ||
| 36 | + |
docs/搜索API对接指南.md
| @@ -104,7 +104,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -104,7 +104,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 104 | "sort_by": "string", | 104 | "sort_by": "string", |
| 105 | "sort_order": "desc", | 105 | "sort_order": "desc", |
| 106 | "min_score": 0.0, | 106 | "min_score": 0.0, |
| 107 | - "sku_filter_dimension": "string", | 107 | + "sku_filter_dimension": ["string"], |
| 108 | "debug": false, | 108 | "debug": false, |
| 109 | "user_id": "string", | 109 | "user_id": "string", |
| 110 | "session_id": "string" | 110 | "session_id": "string" |
| @@ -127,7 +127,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -127,7 +127,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 127 | | `sort_by` | string | N | null | 排序字段名(如 `min_price`, `max_price`) | | 127 | | `sort_by` | string | N | null | 排序字段名(如 `min_price`, `max_price`) | |
| 128 | | `sort_order` | string | N | "desc" | 排序方向:`asc`(升序)或 `desc`(降序) | | 128 | | `sort_order` | string | N | "desc" | 排序方向:`asc`(升序)或 `desc`(降序) | |
| 129 | | `min_score` | float | N | null | 最小相关性分数阈值 | | 129 | | `min_score` | float | N | null | 最小相关性分数阈值 | |
| 130 | -| `sku_filter_dimension` | string | N | null | 子SKU筛选维度(店铺配置)。指定后,每个SPU下的SKU将按该维度分组,每组选择第一个SKU返回。支持的值:`option1`、`option2`、`option3` 或 specifications 中的 name(如 `color`、`size`)。详见下文说明 | | 130 | +| `sku_filter_dimension` | array[string] | N | null | 子SKU筛选维度列表(店铺配置)。指定后,每个SPU下的SKU将按这些维度的组合进行分组,每个组合只返回第一个SKU。支持的值:`option1`、`option2`、`option3` 或选项名称(如 `color`、`size`)。详见下文说明 | |
| 131 | | `debug` | boolean | N | false | 是否返回调试信息 | | 131 | | `debug` | boolean | N | false | 是否返回调试信息 | |
| 132 | | `user_id` | string | N | null | 用户ID(用于个性化,预留) | | 132 | | `user_id` | string | N | null | 用户ID(用于个性化,预留) | |
| 133 | | `session_id` | string | N | null | 会话ID(用于分析,预留) | | 133 | | `session_id` | string | N | null | 会话ID(用于分析,预留) | |
| @@ -349,7 +349,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -349,7 +349,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 349 | ### SKU筛选维度 (sku_filter_dimension) | 349 | ### SKU筛选维度 (sku_filter_dimension) |
| 350 | 350 | ||
| 351 | **功能说明**: | 351 | **功能说明**: |
| 352 | -`sku_filter_dimension` 用于控制每个SPU下返回的SKU数量。当指定此参数后,系统会按指定维度对SKU进行分组,每个分组只返回第一个SKU(从简实现,选择该维度下的第一款)。 | 352 | +`sku_filter_dimension` 用于控制每个SPU下返回的SKU数量,为字符串列表。当指定此参数后,系统会按指定维度**组合**对SKU进行分组,每个维度组合只返回第一个SKU(从简实现,选择该组合下的第一款)。 |
| 353 | 353 | ||
| 354 | **使用场景**: | 354 | **使用场景**: |
| 355 | - 店铺配置了SKU筛选维度(如 `color`),希望每个SPU下每种颜色只显示一个SKU | 355 | - 店铺配置了SKU筛选维度(如 `color`),希望每个SPU下每种颜色只显示一个SKU |
| @@ -360,8 +360,8 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -360,8 +360,8 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 360 | 1. **直接选项字段**: `option1`、`option2`、`option3` | 360 | 1. **直接选项字段**: `option1`、`option2`、`option3` |
| 361 | - 直接使用对应的 `option1_value`、`option2_value`、`option3_value` 字段进行分组 | 361 | - 直接使用对应的 `option1_value`、`option2_value`、`option3_value` 字段进行分组 |
| 362 | 362 | ||
| 363 | -2. **规格名称**: 通过 `option1_name`、`option2_name`、`option3_name` 匹配 | ||
| 364 | - - 例如:如果 `option1_name` 为 `"color"`,则可以使用 `sku_filter_dimension: "color"` 来按颜色分组 | 363 | +2. **规格/选项名称**: 通过 `option1_name`、`option2_name`、`option3_name` 匹配 |
| 364 | + - 例如:如果 `option1_name` 为 `"color"`,则可以使用 `sku_filter_dimension: ["color"]` 来按颜色分组 | ||
| 365 | 365 | ||
| 366 | **示例**: | 366 | **示例**: |
| 367 | 367 | ||
| @@ -369,7 +369,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -369,7 +369,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 369 | ```json | 369 | ```json |
| 370 | { | 370 | { |
| 371 | "query": "芭比娃娃", | 371 | "query": "芭比娃娃", |
| 372 | - "sku_filter_dimension": "color" | 372 | + "sku_filter_dimension": ["color"] |
| 373 | } | 373 | } |
| 374 | ``` | 374 | ``` |
| 375 | 375 | ||
| @@ -377,7 +377,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -377,7 +377,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 377 | ```json | 377 | ```json |
| 378 | { | 378 | { |
| 379 | "query": "芭比娃娃", | 379 | "query": "芭比娃娃", |
| 380 | - "sku_filter_dimension": "option1" | 380 | + "sku_filter_dimension": ["option1"] |
| 381 | } | 381 | } |
| 382 | ``` | 382 | ``` |
| 383 | 383 | ||
| @@ -385,7 +385,15 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -385,7 +385,15 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 385 | ```json | 385 | ```json |
| 386 | { | 386 | { |
| 387 | "query": "芭比娃娃", | 387 | "query": "芭比娃娃", |
| 388 | - "sku_filter_dimension": "option2" | 388 | + "sku_filter_dimension": ["option2"] |
| 389 | +} | ||
| 390 | +``` | ||
| 391 | + | ||
| 392 | +**按颜色 + 尺寸组合筛选(假设 option1_name = "color", option2_name = "size")**: | ||
| 393 | +```json | ||
| 394 | +{ | ||
| 395 | + "query": "芭比娃娃", | ||
| 396 | + "sku_filter_dimension": ["color", "size"] | ||
| 389 | } | 397 | } |
| 390 | ``` | 398 | ``` |
| 391 | 399 |
frontend/index.html
| @@ -25,6 +25,10 @@ | @@ -25,6 +25,10 @@ | ||
| 25 | <label for="tenantInput">tenant ID:</label> | 25 | <label for="tenantInput">tenant ID:</label> |
| 26 | <input type="text" id="tenantInput" placeholder="请输入租户ID" value="1"> | 26 | <input type="text" id="tenantInput" placeholder="请输入租户ID" value="1"> |
| 27 | </div> | 27 | </div> |
| 28 | + <div class="tenant-input-wrapper"> | ||
| 29 | + <label for="skuFilterDimension">sku_filter_dimension:</label> | ||
| 30 | + <input type="text" id="skuFilterDimension" placeholder="SKU筛选维度" value="color"> | ||
| 31 | + </div> | ||
| 28 | <input type="text" id="searchInput" placeholder="输入搜索关键词... (支持中文、英文、俄文)" | 32 | <input type="text" id="searchInput" placeholder="输入搜索关键词... (支持中文、英文、俄文)" |
| 29 | onkeypress="handleKeyPress(event)"> | 33 | onkeypress="handleKeyPress(event)"> |
| 30 | <button onclick="performSearch()" class="search-btn">Search</button> | 34 | <button onclick="performSearch()" class="search-btn">Search</button> |
| @@ -100,9 +104,10 @@ | @@ -100,9 +104,10 @@ | ||
| 100 | 104 | ||
| 101 | <div class="sort-right"> | 105 | <div class="sort-right"> |
| 102 | <select id="resultSize" onchange="performSearch()"> | 106 | <select id="resultSize" onchange="performSearch()"> |
| 103 | - <option value="10">10 per page</option> | ||
| 104 | - <option value="20" selected>20 per page</option> | ||
| 105 | - <option value="50">50 per page</option> | 107 | + <option value="20">20 per page</option> |
| 108 | + <option value="50" selected>50 per page</option> | ||
| 109 | + <option value="100">50 per page</option> | ||
| 110 | + <option value="200">50 per page</option> | ||
| 106 | </select> | 111 | </select> |
| 107 | </div> | 112 | </div> |
| 108 | </div> | 113 | </div> |
| @@ -130,6 +135,6 @@ | @@ -130,6 +135,6 @@ | ||
| 130 | <p>SearchEngine © 2025 | API: <span id="apiUrl">Loading...</span></p> | 135 | <p>SearchEngine © 2025 | API: <span id="apiUrl">Loading...</span></p> |
| 131 | </footer> | 136 | </footer> |
| 132 | 137 | ||
| 133 | - <script src="/static/js/app.js?v=3.2"></script> | 138 | + <script src="/static/js/app.js?v=3.4"></script> |
| 134 | </body> | 139 | </body> |
| 135 | </html> | 140 | </html> |
frontend/static/css/style.css
| @@ -84,7 +84,8 @@ body { | @@ -84,7 +84,8 @@ body { | ||
| 84 | white-space: nowrap; | 84 | white-space: nowrap; |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | -#tenantInput { | 87 | +#tenantInput, |
| 88 | +#skuFilterDimension { | ||
| 88 | width: 120px; | 89 | width: 120px; |
| 89 | padding: 10px 15px; | 90 | padding: 10px 15px; |
| 90 | font-size: 14px; | 91 | font-size: 14px; |
| @@ -93,7 +94,8 @@ body { | @@ -93,7 +94,8 @@ body { | ||
| 93 | outline: none; | 94 | outline: none; |
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | -#tenantInput:focus { | 97 | +#tenantInput:focus, |
| 98 | +#skuFilterDimension:focus { | ||
| 97 | border-color: #e74c3c; | 99 | border-color: #e74c3c; |
| 98 | } | 100 | } |
| 99 | 101 |
frontend/static/js/app.js
| @@ -14,6 +14,21 @@ function getTenantId() { | @@ -14,6 +14,21 @@ function getTenantId() { | ||
| 14 | return '1'; // Default fallback | 14 | return '1'; // Default fallback |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | +// Get sku_filter_dimension (as list) from input | ||
| 18 | +function getSkuFilterDimension() { | ||
| 19 | + const skuFilterInput = document.getElementById('skuFilterDimension'); | ||
| 20 | + if (skuFilterInput) { | ||
| 21 | + const value = skuFilterInput.value.trim(); | ||
| 22 | + if (!value.length) { | ||
| 23 | + return null; | ||
| 24 | + } | ||
| 25 | + // 支持用逗号分隔多个维度,例如:color,size 或 option1,color | ||
| 26 | + const parts = value.split(',').map(v => v.trim()).filter(v => v.length > 0); | ||
| 27 | + return parts.length > 0 ? parts : null; | ||
| 28 | + } | ||
| 29 | + return null; | ||
| 30 | +} | ||
| 31 | + | ||
| 17 | // State Management | 32 | // State Management |
| 18 | let state = { | 33 | let state = { |
| 19 | query: '', | 34 | query: '', |
| @@ -51,6 +66,7 @@ function toggleFilters() { | @@ -51,6 +66,7 @@ function toggleFilters() { | ||
| 51 | async function performSearch(page = 1) { | 66 | async function performSearch(page = 1) { |
| 52 | const query = document.getElementById('searchInput').value.trim(); | 67 | const query = document.getElementById('searchInput').value.trim(); |
| 53 | const tenantId = getTenantId(); | 68 | const tenantId = getTenantId(); |
| 69 | + const skuFilterDimension = getSkuFilterDimension(); | ||
| 54 | 70 | ||
| 55 | if (!query) { | 71 | if (!query) { |
| 56 | alert('Please enter search keywords'); | 72 | alert('Please enter search keywords'); |
| @@ -96,6 +112,7 @@ async function performSearch(page = 1) { | @@ -96,6 +112,7 @@ async function performSearch(page = 1) { | ||
| 96 | facets: facets, | 112 | facets: facets, |
| 97 | sort_by: state.sortBy || null, | 113 | sort_by: state.sortBy || null, |
| 98 | sort_order: state.sortOrder, | 114 | sort_order: state.sortOrder, |
| 115 | + sku_filter_dimension: skuFilterDimension, | ||
| 99 | debug: state.debug | 116 | debug: state.debug |
| 100 | }) | 117 | }) |
| 101 | }); | 118 | }); |
main.py
| @@ -93,7 +93,7 @@ def cmd_search(args): | @@ -93,7 +93,7 @@ def cmd_search(args): | ||
| 93 | 93 | ||
| 94 | from query import QueryParser | 94 | from query import QueryParser |
| 95 | query_parser = QueryParser(config) | 95 | query_parser = QueryParser(config) |
| 96 | - searcher = Searcher(config, es_client, query_parser) | 96 | + searcher = Searcher(es_client, config, query_parser) |
| 97 | 97 | ||
| 98 | # Execute search | 98 | # Execute search |
| 99 | print(f"Searching for: '{args.query}' (tenant: {args.tenant_id})") | 99 | print(f"Searching for: '{args.query}' (tenant: {args.tenant_id})") |
query/query_parser.py
| @@ -9,13 +9,7 @@ import numpy as np | @@ -9,13 +9,7 @@ import numpy as np | ||
| 9 | import logging | 9 | import logging |
| 10 | 10 | ||
| 11 | from embeddings import BgeEncoder | 11 | from embeddings import BgeEncoder |
| 12 | -from search.query_config import ( | ||
| 13 | - ENABLE_TEXT_EMBEDDING, | ||
| 14 | - ENABLE_TRANSLATION, | ||
| 15 | - REWRITE_DICTIONARY, | ||
| 16 | - TRANSLATION_API_KEY, | ||
| 17 | - TRANSLATION_SERVICE | ||
| 18 | -) | 12 | +from config import SearchConfig |
| 19 | from .language_detector import LanguageDetector | 13 | from .language_detector import LanguageDetector |
| 20 | from .translator import Translator | 14 | from .translator import Translator |
| 21 | from .query_rewriter import QueryRewriter, QueryNormalizer | 15 | from .query_rewriter import QueryRewriter, QueryNormalizer |
| @@ -70,6 +64,7 @@ class QueryParser: | @@ -70,6 +64,7 @@ class QueryParser: | ||
| 70 | 64 | ||
| 71 | def __init__( | 65 | def __init__( |
| 72 | self, | 66 | self, |
| 67 | + config: SearchConfig, | ||
| 73 | text_encoder: Optional[BgeEncoder] = None, | 68 | text_encoder: Optional[BgeEncoder] = None, |
| 74 | translator: Optional[Translator] = None | 69 | translator: Optional[Translator] = None |
| 75 | ): | 70 | ): |
| @@ -77,21 +72,23 @@ class QueryParser: | @@ -77,21 +72,23 @@ class QueryParser: | ||
| 77 | Initialize query parser. | 72 | Initialize query parser. |
| 78 | 73 | ||
| 79 | Args: | 74 | Args: |
| 75 | + config: SearchConfig instance | ||
| 80 | text_encoder: Text embedding encoder (lazy loaded if not provided) | 76 | text_encoder: Text embedding encoder (lazy loaded if not provided) |
| 81 | translator: Translator instance (lazy loaded if not provided) | 77 | translator: Translator instance (lazy loaded if not provided) |
| 82 | """ | 78 | """ |
| 79 | + self.config = config | ||
| 83 | self._text_encoder = text_encoder | 80 | self._text_encoder = text_encoder |
| 84 | self._translator = translator | 81 | self._translator = translator |
| 85 | 82 | ||
| 86 | # Initialize components | 83 | # Initialize components |
| 87 | self.normalizer = QueryNormalizer() | 84 | self.normalizer = QueryNormalizer() |
| 88 | self.language_detector = LanguageDetector() | 85 | self.language_detector = LanguageDetector() |
| 89 | - self.rewriter = QueryRewriter(REWRITE_DICTIONARY) | 86 | + self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary) |
| 90 | 87 | ||
| 91 | @property | 88 | @property |
| 92 | def text_encoder(self) -> BgeEncoder: | 89 | def text_encoder(self) -> BgeEncoder: |
| 93 | """Lazy load text encoder.""" | 90 | """Lazy load text encoder.""" |
| 94 | - if self._text_encoder is None and ENABLE_TEXT_EMBEDDING: | 91 | + if self._text_encoder is None and self.config.query_config.enable_text_embedding: |
| 95 | logger.info("Initializing text encoder (lazy load)...") | 92 | logger.info("Initializing text encoder (lazy load)...") |
| 96 | self._text_encoder = BgeEncoder() | 93 | self._text_encoder = BgeEncoder() |
| 97 | return self._text_encoder | 94 | return self._text_encoder |
| @@ -99,13 +96,13 @@ class QueryParser: | @@ -99,13 +96,13 @@ class QueryParser: | ||
| 99 | @property | 96 | @property |
| 100 | def translator(self) -> Translator: | 97 | def translator(self) -> Translator: |
| 101 | """Lazy load translator.""" | 98 | """Lazy load translator.""" |
| 102 | - if self._translator is None and ENABLE_TRANSLATION: | 99 | + if self._translator is None and self.config.query_config.enable_translation: |
| 103 | logger.info("Initializing translator (lazy load)...") | 100 | logger.info("Initializing translator (lazy load)...") |
| 104 | self._translator = Translator( | 101 | self._translator = Translator( |
| 105 | - api_key=TRANSLATION_API_KEY, | 102 | + api_key=self.config.query_config.translation_api_key, |
| 106 | use_cache=True, | 103 | use_cache=True, |
| 107 | - glossary_id=None, # Can be added to query_config if needed | ||
| 108 | - translation_context='e-commerce product search' | 104 | + glossary_id=self.config.query_config.translation_glossary_id, |
| 105 | + translation_context=self.config.query_config.translation_context | ||
| 109 | ) | 106 | ) |
| 110 | return self._translator | 107 | return self._translator |
| 111 | 108 | ||
| @@ -156,7 +153,7 @@ class QueryParser: | @@ -156,7 +153,7 @@ class QueryParser: | ||
| 156 | 153 | ||
| 157 | # Stage 2: Query rewriting | 154 | # Stage 2: Query rewriting |
| 158 | rewritten = None | 155 | rewritten = None |
| 159 | - if REWRITE_DICTIONARY: # Enable rewrite if dictionary exists | 156 | + if self.config.query_config.rewrite_dictionary: # Enable rewrite if dictionary exists |
| 160 | rewritten = self.rewriter.rewrite(query_text) | 157 | rewritten = self.rewriter.rewrite(query_text) |
| 161 | if rewritten != query_text: | 158 | if rewritten != query_text: |
| 162 | log_info(f"查询重写 | '{query_text}' -> '{rewritten}'") | 159 | log_info(f"查询重写 | '{query_text}' -> '{rewritten}'") |
| @@ -173,7 +170,7 @@ class QueryParser: | @@ -173,7 +170,7 @@ class QueryParser: | ||
| 173 | 170 | ||
| 174 | # Stage 4: Translation | 171 | # Stage 4: Translation |
| 175 | translations = {} | 172 | translations = {} |
| 176 | - if ENABLE_TRANSLATION: | 173 | + if self.config.query_config.enable_translation: |
| 177 | try: | 174 | try: |
| 178 | # Determine target languages for translation | 175 | # Determine target languages for translation |
| 179 | # Simplified: always translate to Chinese and English | 176 | # Simplified: always translate to Chinese and English |
| @@ -210,19 +207,47 @@ class QueryParser: | @@ -210,19 +207,47 @@ class QueryParser: | ||
| 210 | # Stage 5: Text embedding | 207 | # Stage 5: Text embedding |
| 211 | query_vector = None | 208 | query_vector = None |
| 212 | if (generate_vector and | 209 | if (generate_vector and |
| 213 | - ENABLE_TEXT_EMBEDDING and | 210 | + self.config.query_config.enable_text_embedding and |
| 214 | domain == "default"): # Only generate vector for default domain | 211 | domain == "default"): # Only generate vector for default domain |
| 215 | - try: | ||
| 216 | - log_debug("开始生成查询向量") | ||
| 217 | - query_vector = self.text_encoder.encode([query_text])[0] | ||
| 218 | - log_debug(f"查询向量生成完成 | 形状: {query_vector.shape}") | ||
| 219 | - if context: | ||
| 220 | - context.store_intermediate_result('query_vector_shape', query_vector.shape) | ||
| 221 | - except Exception as e: | ||
| 222 | - error_msg = f"查询向量生成失败 | 错误: {str(e)}" | ||
| 223 | - log_info(error_msg) | ||
| 224 | - if context: | ||
| 225 | - context.add_warning(error_msg) | 212 | + # Get thresholds from config |
| 213 | + chinese_limit = self.config.query_config.embedding_disable_chinese_char_limit | ||
| 214 | + english_limit = self.config.query_config.embedding_disable_english_word_limit | ||
| 215 | + | ||
| 216 | + # Check if embedding should be disabled for short queries | ||
| 217 | + should_disable_embedding = False | ||
| 218 | + disable_reason = None | ||
| 219 | + | ||
| 220 | + if detected_lang == 'zh': | ||
| 221 | + # For Chinese: disable embedding if character count <= threshold | ||
| 222 | + char_count = len(query_text.strip()) | ||
| 223 | + if char_count <= chinese_limit: | ||
| 224 | + should_disable_embedding = True | ||
| 225 | + disable_reason = f"中文查询字数({char_count}) <= {chinese_limit},禁用向量搜索" | ||
| 226 | + log_info(disable_reason) | ||
| 227 | + if context: | ||
| 228 | + context.store_intermediate_result('embedding_disabled_reason', disable_reason) | ||
| 229 | + else: | ||
| 230 | + # For English: disable embedding if word count <= threshold | ||
| 231 | + word_count = len(query_text.strip().split()) | ||
| 232 | + if word_count <= english_limit: | ||
| 233 | + should_disable_embedding = True | ||
| 234 | + disable_reason = f"英文查询单词数({word_count}) <= {english_limit},禁用向量搜索" | ||
| 235 | + log_info(disable_reason) | ||
| 236 | + if context: | ||
| 237 | + context.store_intermediate_result('embedding_disabled_reason', disable_reason) | ||
| 238 | + | ||
| 239 | + if not should_disable_embedding: | ||
| 240 | + try: | ||
| 241 | + log_debug("开始生成查询向量") | ||
| 242 | + query_vector = self.text_encoder.encode([query_text])[0] | ||
| 243 | + log_debug(f"查询向量生成完成 | 形状: {query_vector.shape}") | ||
| 244 | + if context: | ||
| 245 | + context.store_intermediate_result('query_vector_shape', query_vector.shape) | ||
| 246 | + except Exception as e: | ||
| 247 | + error_msg = f"查询向量生成失败 | 错误: {str(e)}" | ||
| 248 | + log_info(error_msg) | ||
| 249 | + if context: | ||
| 250 | + context.add_warning(error_msg) | ||
| 226 | 251 | ||
| 227 | # Build result | 252 | # Build result |
| 228 | result = ParsedQuery( | 253 | result = ParsedQuery( |
search/es_query_builder.py
| @@ -11,7 +11,7 @@ Simplified architecture: | @@ -11,7 +11,7 @@ Simplified architecture: | ||
| 11 | from typing import Dict, Any, List, Optional, Union | 11 | from typing import Dict, Any, List, Optional, Union |
| 12 | import numpy as np | 12 | import numpy as np |
| 13 | from .boolean_parser import QueryNode | 13 | from .boolean_parser import QueryNode |
| 14 | -from .query_config import FUNCTION_SCORE_CONFIG | 14 | +from config import FunctionScoreConfig |
| 15 | 15 | ||
| 16 | 16 | ||
| 17 | class ESQueryBuilder: | 17 | class ESQueryBuilder: |
| @@ -23,7 +23,8 @@ class ESQueryBuilder: | @@ -23,7 +23,8 @@ class ESQueryBuilder: | ||
| 23 | match_fields: List[str], | 23 | match_fields: List[str], |
| 24 | text_embedding_field: Optional[str] = None, | 24 | text_embedding_field: Optional[str] = None, |
| 25 | image_embedding_field: Optional[str] = None, | 25 | image_embedding_field: Optional[str] = None, |
| 26 | - source_fields: Optional[List[str]] = None | 26 | + source_fields: Optional[List[str]] = None, |
| 27 | + function_score_config: Optional[FunctionScoreConfig] = None | ||
| 27 | ): | 28 | ): |
| 28 | """ | 29 | """ |
| 29 | Initialize query builder. | 30 | Initialize query builder. |
| @@ -34,12 +35,14 @@ class ESQueryBuilder: | @@ -34,12 +35,14 @@ class ESQueryBuilder: | ||
| 34 | text_embedding_field: Field name for text embeddings | 35 | text_embedding_field: Field name for text embeddings |
| 35 | image_embedding_field: Field name for image embeddings | 36 | image_embedding_field: Field name for image embeddings |
| 36 | source_fields: Fields to return in search results (_source includes) | 37 | source_fields: Fields to return in search results (_source includes) |
| 38 | + function_score_config: Function score configuration | ||
| 37 | """ | 39 | """ |
| 38 | self.index_name = index_name | 40 | self.index_name = index_name |
| 39 | self.match_fields = match_fields | 41 | self.match_fields = match_fields |
| 40 | self.text_embedding_field = text_embedding_field | 42 | self.text_embedding_field = text_embedding_field |
| 41 | self.image_embedding_field = image_embedding_field | 43 | self.image_embedding_field = image_embedding_field |
| 42 | self.source_fields = source_fields | 44 | self.source_fields = source_fields |
| 45 | + self.function_score_config = function_score_config | ||
| 43 | 46 | ||
| 44 | def build_query( | 47 | def build_query( |
| 45 | self, | 48 | self, |
| @@ -182,12 +185,15 @@ class ESQueryBuilder: | @@ -182,12 +185,15 @@ class ESQueryBuilder: | ||
| 182 | return query | 185 | return query |
| 183 | 186 | ||
| 184 | # Build function_score query | 187 | # Build function_score query |
| 188 | + score_mode = self.function_score_config.score_mode if self.function_score_config else "sum" | ||
| 189 | + boost_mode = self.function_score_config.boost_mode if self.function_score_config else "multiply" | ||
| 190 | + | ||
| 185 | function_score_query = { | 191 | function_score_query = { |
| 186 | "function_score": { | 192 | "function_score": { |
| 187 | "query": query, | 193 | "query": query, |
| 188 | "functions": functions, | 194 | "functions": functions, |
| 189 | - "score_mode": FUNCTION_SCORE_CONFIG.get("score_mode", "sum"), | ||
| 190 | - "boost_mode": FUNCTION_SCORE_CONFIG.get("boost_mode", "multiply") | 195 | + "score_mode": score_mode, |
| 196 | + "boost_mode": boost_mode | ||
| 191 | } | 197 | } |
| 192 | } | 198 | } |
| 193 | 199 | ||
| @@ -201,7 +207,10 @@ class ESQueryBuilder: | @@ -201,7 +207,10 @@ class ESQueryBuilder: | ||
| 201 | List of function score functions | 207 | List of function score functions |
| 202 | """ | 208 | """ |
| 203 | functions = [] | 209 | functions = [] |
| 204 | - config_functions = FUNCTION_SCORE_CONFIG.get("functions", []) | 210 | + if not self.function_score_config: |
| 211 | + return functions | ||
| 212 | + | ||
| 213 | + config_functions = self.function_score_config.functions or [] | ||
| 205 | 214 | ||
| 206 | for func_config in config_functions: | 215 | for func_config in config_functions: |
| 207 | func_type = func_config.get("type") | 216 | func_type = func_config.get("type") |
search/searcher.py
| @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. | @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. | ||
| 5 | """ | 5 | """ |
| 6 | 6 | ||
| 7 | from typing import Dict, Any, List, Optional, Union | 7 | from typing import Dict, Any, List, Optional, Union |
| 8 | -import time | 8 | +import time, json |
| 9 | import logging | 9 | import logging |
| 10 | 10 | ||
| 11 | from utils.es_client import ESClient | 11 | from utils.es_client import ESClient |
| @@ -14,16 +14,8 @@ from embeddings import CLIPImageEncoder | @@ -14,16 +14,8 @@ from embeddings import CLIPImageEncoder | ||
| 14 | from .boolean_parser import BooleanParser, QueryNode | 14 | from .boolean_parser import BooleanParser, QueryNode |
| 15 | from .es_query_builder import ESQueryBuilder | 15 | from .es_query_builder import ESQueryBuilder |
| 16 | from .rerank_engine import RerankEngine | 16 | from .rerank_engine import RerankEngine |
| 17 | -from .query_config import ( | ||
| 18 | - DEFAULT_INDEX_NAME, | ||
| 19 | - DEFAULT_MATCH_FIELDS, | ||
| 20 | - TEXT_EMBEDDING_FIELD, | ||
| 21 | - IMAGE_EMBEDDING_FIELD, | ||
| 22 | - SOURCE_FIELDS, | ||
| 23 | - ENABLE_TRANSLATION, | ||
| 24 | - ENABLE_TEXT_EMBEDDING, | ||
| 25 | - RANKING_EXPRESSION | ||
| 26 | -) | 17 | +from config import SearchConfig |
| 18 | +from config.utils import get_match_fields_for_index | ||
| 27 | from context.request_context import RequestContext, RequestContextStage, create_request_context | 19 | from context.request_context import RequestContext, RequestContextStage, create_request_context |
| 28 | from api.models import FacetResult, FacetValue | 20 | from api.models import FacetResult, FacetValue |
| 29 | from api.result_formatter import ResultFormatter | 21 | from api.result_formatter import ResultFormatter |
| @@ -87,37 +79,40 @@ class Searcher: | @@ -87,37 +79,40 @@ class Searcher: | ||
| 87 | def __init__( | 79 | def __init__( |
| 88 | self, | 80 | self, |
| 89 | es_client: ESClient, | 81 | es_client: ESClient, |
| 90 | - query_parser: Optional[QueryParser] = None, | ||
| 91 | - index_name: str = DEFAULT_INDEX_NAME | 82 | + config: SearchConfig, |
| 83 | + query_parser: Optional[QueryParser] = None | ||
| 92 | ): | 84 | ): |
| 93 | """ | 85 | """ |
| 94 | Initialize searcher. | 86 | Initialize searcher. |
| 95 | 87 | ||
| 96 | Args: | 88 | Args: |
| 97 | es_client: Elasticsearch client | 89 | es_client: Elasticsearch client |
| 90 | + config: SearchConfig instance | ||
| 98 | query_parser: Query parser (created if not provided) | 91 | query_parser: Query parser (created if not provided) |
| 99 | - index_name: ES index name (default: search_products) | ||
| 100 | """ | 92 | """ |
| 101 | self.es_client = es_client | 93 | self.es_client = es_client |
| 102 | - self.index_name = index_name | ||
| 103 | - self.query_parser = query_parser or QueryParser() | 94 | + self.config = config |
| 95 | + self.index_name = config.es_index_name | ||
| 96 | + self.query_parser = query_parser or QueryParser(config) | ||
| 104 | 97 | ||
| 105 | # Initialize components | 98 | # Initialize components |
| 106 | self.boolean_parser = BooleanParser() | 99 | self.boolean_parser = BooleanParser() |
| 107 | - self.rerank_engine = RerankEngine(RANKING_EXPRESSION, enabled=False) | 100 | + self.rerank_engine = RerankEngine(config.ranking.expression, enabled=False) |
| 108 | 101 | ||
| 109 | - # Use constants from query_config | ||
| 110 | - self.match_fields = DEFAULT_MATCH_FIELDS | ||
| 111 | - self.text_embedding_field = TEXT_EMBEDDING_FIELD | ||
| 112 | - self.image_embedding_field = IMAGE_EMBEDDING_FIELD | 102 | + # Get match fields from config |
| 103 | + self.match_fields = get_match_fields_for_index(config, "default") | ||
| 104 | + self.text_embedding_field = config.query_config.text_embedding_field or "title_embedding" | ||
| 105 | + self.image_embedding_field = config.query_config.image_embedding_field or "image_embedding" | ||
| 106 | + self.source_fields = config.query_config.source_fields or [] | ||
| 113 | 107 | ||
| 114 | # Query builder - simplified single-layer architecture | 108 | # Query builder - simplified single-layer architecture |
| 115 | self.query_builder = ESQueryBuilder( | 109 | self.query_builder = ESQueryBuilder( |
| 116 | - index_name=index_name, | 110 | + index_name=self.index_name, |
| 117 | match_fields=self.match_fields, | 111 | match_fields=self.match_fields, |
| 118 | text_embedding_field=self.text_embedding_field, | 112 | text_embedding_field=self.text_embedding_field, |
| 119 | image_embedding_field=self.image_embedding_field, | 113 | image_embedding_field=self.image_embedding_field, |
| 120 | - source_fields=SOURCE_FIELDS | 114 | + source_fields=self.source_fields, |
| 115 | + function_score_config=self.config.function_score | ||
| 121 | ) | 116 | ) |
| 122 | 117 | ||
| 123 | def search( | 118 | def search( |
| @@ -135,7 +130,7 @@ class Searcher: | @@ -135,7 +130,7 @@ class Searcher: | ||
| 135 | sort_order: Optional[str] = "desc", | 130 | sort_order: Optional[str] = "desc", |
| 136 | debug: bool = False, | 131 | debug: bool = False, |
| 137 | language: str = "zh", | 132 | language: str = "zh", |
| 138 | - sku_filter_dimension: Optional[str] = None, | 133 | + sku_filter_dimension: Optional[List[str]] = None, |
| 139 | ) -> SearchResult: | 134 | ) -> SearchResult: |
| 140 | """ | 135 | """ |
| 141 | Execute search query (外部友好格式). | 136 | Execute search query (外部友好格式). |
| @@ -162,8 +157,8 @@ class Searcher: | @@ -162,8 +157,8 @@ class Searcher: | ||
| 162 | context = create_request_context() | 157 | context = create_request_context() |
| 163 | 158 | ||
| 164 | # Always use config defaults (these are backend configuration, not user parameters) | 159 | # Always use config defaults (these are backend configuration, not user parameters) |
| 165 | - enable_translation = ENABLE_TRANSLATION | ||
| 166 | - enable_embedding = ENABLE_TEXT_EMBEDDING | 160 | + enable_translation = self.config.query_config.enable_translation |
| 161 | + enable_embedding = self.config.query_config.enable_text_embedding | ||
| 167 | enable_rerank = False # Temporarily disabled | 162 | enable_rerank = False # Temporarily disabled |
| 168 | 163 | ||
| 169 | # Start timing | 164 | # Start timing |
| @@ -305,14 +300,14 @@ class Searcher: | @@ -305,14 +300,14 @@ class Searcher: | ||
| 305 | context.store_intermediate_result('es_query', es_query) | 300 | context.store_intermediate_result('es_query', es_query) |
| 306 | context.store_intermediate_result('es_body_for_search', body_for_es) | 301 | context.store_intermediate_result('es_body_for_search', body_for_es) |
| 307 | 302 | ||
| 303 | + # Serialize ES query as a compact JSON string (no spaces or newlines) | ||
| 304 | + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) | ||
| 305 | + | ||
| 308 | context.logger.info( | 306 | context.logger.info( |
| 309 | - f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | " | ||
| 310 | - f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | " | ||
| 311 | - f"分面: {'是' if facets else '否'}", | ||
| 312 | - extra={'reqid': context.reqid, 'uid': context.uid} | ||
| 313 | - ) | ||
| 314 | - context.logger.debug( | ||
| 315 | - f"ES查询详情: {es_query}", | 307 | + f"ES query built | size: {len(es_query_compact)} chars | " |
| 308 | + f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " | ||
| 309 | + f"facets: {'yes' if facets else 'no'} | " | ||
| 310 | + f"query: {es_query_compact}", | ||
| 316 | extra={'reqid': context.reqid, 'uid': context.uid} | 311 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 317 | ) | 312 | ) |
| 318 | except Exception as e: | 313 | except Exception as e: |
| @@ -508,9 +503,9 @@ class Searcher: | @@ -508,9 +503,9 @@ class Searcher: | ||
| 508 | } | 503 | } |
| 509 | 504 | ||
| 510 | # Add _source filtering if source_fields are configured | 505 | # Add _source filtering if source_fields are configured |
| 511 | - if SOURCE_FIELDS: | 506 | + if self.source_fields: |
| 512 | es_query["_source"] = { | 507 | es_query["_source"] = { |
| 513 | - "includes": SOURCE_FIELDS | 508 | + "includes": self.source_fields |
| 514 | } | 509 | } |
| 515 | 510 | ||
| 516 | if filters or range_filters: | 511 | if filters or range_filters: |
tests/conftest.py
| @@ -137,8 +137,8 @@ def mock_es_client() -> Mock: | @@ -137,8 +137,8 @@ def mock_es_client() -> Mock: | ||
| 137 | def test_searcher(sample_search_config, mock_es_client) -> Searcher: | 137 | def test_searcher(sample_search_config, mock_es_client) -> Searcher: |
| 138 | """测试用Searcher实例""" | 138 | """测试用Searcher实例""" |
| 139 | return Searcher( | 139 | return Searcher( |
| 140 | - config=sample_search_config, | ||
| 141 | - es_client=mock_es_client | 140 | + es_client=mock_es_client, |
| 141 | + config=sample_search_config | ||
| 142 | ) | 142 | ) |
| 143 | 143 | ||
| 144 | 144 |
utils/logger.py
| @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): | @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): | ||
| 38 | # Add request context if available | 38 | # Add request context if available |
| 39 | reqid = getattr(record, 'reqid', None) | 39 | reqid = getattr(record, 'reqid', None) |
| 40 | uid = getattr(record, 'uid', None) | 40 | uid = getattr(record, 'uid', None) |
| 41 | - if reqid or uid: | 41 | + if reqid is not None or uid is not None: |
| 42 | + # Normalize missing values to "-1" for easier correlation | ||
| 42 | log_entry['request_context'] = { | 43 | log_entry['request_context'] = { |
| 43 | - 'reqid': reqid, | ||
| 44 | - 'uid': uid | 44 | + 'reqid': reqid if reqid is not None else "-1", |
| 45 | + 'uid': uid if uid is not None else "-1" | ||
| 45 | } | 46 | } |
| 46 | 47 | ||
| 47 | # Add extra data if available | 48 | # Add extra data if available |
| @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): | @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): | ||
| 98 | from context.request_context import get_current_request_context | 99 | from context.request_context import get_current_request_context |
| 99 | context = get_current_request_context() | 100 | context = get_current_request_context() |
| 100 | if context: | 101 | if context: |
| 101 | - record.reqid = context.reqid | ||
| 102 | - record.uid = context.uid | 102 | + # Ensure every request-scoped log record carries reqid/uid. |
| 103 | + # If they are missing in the context, fall back to "-1". | ||
| 104 | + record.reqid = getattr(context, "reqid", None) or "-1" | ||
| 105 | + record.uid = getattr(context, "uid", None) or "-1" | ||
| 103 | except (ImportError, AttributeError): | 106 | except (ImportError, AttributeError): |
| 104 | pass | 107 | pass |
| 105 | return True | 108 | return True |
| 106 | 109 | ||
| 107 | 110 | ||
| 111 | +class ContextAwareConsoleFormatter(logging.Formatter): | ||
| 112 | + """ | ||
| 113 | + Console formatter that injects reqid/uid into the log line. | ||
| 114 | + | ||
| 115 | + For non-request logs (no context), reqid/uid will be "-1". | ||
| 116 | + """ | ||
| 117 | + | ||
| 118 | + def format(self, record: logging.LogRecord) -> str: | ||
| 119 | + # Provide safe defaults so format string never fails | ||
| 120 | + if not hasattr(record, "reqid"): | ||
| 121 | + record.reqid = "-1" | ||
| 122 | + if not hasattr(record, "uid"): | ||
| 123 | + record.uid = "-1" | ||
| 124 | + return super().format(record) | ||
| 125 | + | ||
| 126 | + | ||
| 108 | def setup_logging( | 127 | def setup_logging( |
| 109 | log_level: str = "INFO", | 128 | log_level: str = "INFO", |
| 110 | log_dir: str = "logs", | 129 | log_dir: str = "logs", |
| @@ -137,8 +156,8 @@ def setup_logging( | @@ -137,8 +156,8 @@ def setup_logging( | ||
| 137 | 156 | ||
| 138 | # Create formatters | 157 | # Create formatters |
| 139 | structured_formatter = StructuredFormatter() | 158 | structured_formatter = StructuredFormatter() |
| 140 | - console_formatter = logging.Formatter( | ||
| 141 | - '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s' | 159 | + console_formatter = ContextAwareConsoleFormatter( |
| 160 | + '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s' | ||
| 142 | ) | 161 | ) |
| 143 | 162 | ||
| 144 | # Add console handler | 163 | # Add console handler |