Commit 99bea633a7b77a90f83fcefab602ceb822ccc52b
1 parent
e2539fd3
add logs
Showing
4 changed files
with
89 additions
and
42 deletions
Show diff stats
api/routes/search.py
| @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: | @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: | ||
| 24 | # Try to get request ID from headers | 24 | # Try to get request ID from headers |
| 25 | reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] | 25 | reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] |
| 26 | 26 | ||
| 27 | - # Try to get user ID from headers or default to anonymous | ||
| 28 | - uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous' | 27 | + # Try to get user ID from headers; if not found, use "-1" for correlation |
| 28 | + uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1" | ||
| 29 | 29 | ||
| 30 | return reqid, uid | 30 | return reqid, uid |
| 31 | 31 | ||
| @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): | @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 70 | set_current_request_context(context) | 70 | set_current_request_context(context) |
| 71 | 71 | ||
| 72 | try: | 72 | try: |
| 73 | - # Log request start | 73 | + # Log request start (English logs, with key search parameters) |
| 74 | + client_ip = http_request.client.host if http_request.client else "unknown" | ||
| 75 | + user_agent = http_request.headers.get("User-Agent", "unknown")[:200] | ||
| 74 | context.logger.info( | 76 | context.logger.info( |
| 75 | - f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | " | ||
| 76 | - f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}", | 77 | + "Received search request | " |
| 78 | + f"Tenant: {tenant_id} | " | ||
| 79 | + f"Query: {request.query} | " | ||
| 80 | + f"IP: {client_ip} | " | ||
| 81 | + f"User agent: {user_agent} | " | ||
| 82 | + f"size: {request.size} | from: {request.from_} | " | ||
| 83 | + f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | " | ||
| 84 | + f"min_score: {request.min_score} | " | ||
| 85 | + f"language: {request.language} | " | ||
| 86 | + f"debug: {request.debug} | " | ||
| 87 | + f"sku_filter_dimension: {request.sku_filter_dimension} | " | ||
| 88 | + f"filters: {request.filters} | " | ||
| 89 | + f"range_filters: {request.range_filters} | " | ||
| 90 | + f"facets: {request.facets}", | ||
| 77 | extra={'reqid': context.reqid, 'uid': context.uid} | 91 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 78 | ) | 92 | ) |
| 79 | 93 | ||
| @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): | @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 121 | if context: | 135 | if context: |
| 122 | context.set_error(e) | 136 | context.set_error(e) |
| 123 | context.logger.error( | 137 | context.logger.error( |
| 124 | - f"搜索请求失败 | 错误: {str(e)}", | 138 | + f"Search request failed | error: {str(e)}", |
| 125 | extra={'reqid': context.reqid, 'uid': context.uid} | 139 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 126 | ) | 140 | ) |
| 127 | raise HTTPException(status_code=500, detail=str(e)) | 141 | raise HTTPException(status_code=500, detail=str(e)) |
| @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 164 | set_current_request_context(context) | 178 | set_current_request_context(context) |
| 165 | 179 | ||
| 166 | try: | 180 | try: |
| 167 | - # Log request start | 181 | + # Log request start for image search (English) |
| 182 | + client_ip = http_request.client.host if http_request.client else "unknown" | ||
| 168 | context.logger.info( | 183 | context.logger.info( |
| 169 | - f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | " | ||
| 170 | - f"IP: {http_request.client.host if http_request.client else 'unknown'}", | 184 | + "Received image search request | " |
| 185 | + f"Tenant: {tenant_id} | " | ||
| 186 | + f"Image URL: {request.image_url} | " | ||
| 187 | + f"IP: {client_ip}", | ||
| 171 | extra={'reqid': context.reqid, 'uid': context.uid} | 188 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 172 | ) | 189 | ) |
| 173 | 190 | ||
| @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 202 | if context: | 219 | if context: |
| 203 | context.set_error(e) | 220 | context.set_error(e) |
| 204 | context.logger.error( | 221 | context.logger.error( |
| 205 | - f"图片搜索请求参数错误 | 错误: {str(e)}", | 222 | + f"Image search request parameter error | error: {str(e)}", |
| 206 | extra={'reqid': context.reqid, 'uid': context.uid} | 223 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 207 | ) | 224 | ) |
| 208 | raise HTTPException(status_code=400, detail=str(e)) | 225 | raise HTTPException(status_code=400, detail=str(e)) |
| @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 210 | if context: | 227 | if context: |
| 211 | context.set_error(e) | 228 | context.set_error(e) |
| 212 | context.logger.error( | 229 | context.logger.error( |
| 213 | - f"图片搜索请求失败 | 错误: {str(e)}", | 230 | + f"Image search request failed | error: {str(e)}", |
| 214 | extra={'reqid': context.reqid, 'uid': context.uid} | 231 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 215 | ) | 232 | ) |
| 216 | raise HTTPException(status_code=500, detail=str(e)) | 233 | raise HTTPException(status_code=500, detail=str(e)) |
context/request_context.py
| @@ -59,9 +59,10 @@ class RequestContext: | @@ -59,9 +59,10 @@ class RequestContext: | ||
| 59 | """ | 59 | """ |
| 60 | 60 | ||
| 61 | def __init__(self, reqid: str = None, uid: str = None): | 61 | def __init__(self, reqid: str = None, uid: str = None): |
| 62 | - # 生成唯一请求ID | 62 | + # 生成唯一请求ID;如果外部未提供,则自动生成 |
| 63 | + # 如果无法获取到 uid,则使用 "-1" 作为占位,用于日志关联 | ||
| 63 | self.reqid = reqid or str(uuid.uuid4())[:8] | 64 | self.reqid = reqid or str(uuid.uuid4())[:8] |
| 64 | - self.uid = uid or 'anonymous' | 65 | + self.uid = uid or "-1" |
| 65 | 66 | ||
| 66 | # 查询分析结果 | 67 | # 查询分析结果 |
| 67 | self.query_analysis = QueryAnalysisResult() | 68 | self.query_analysis = QueryAnalysisResult() |
| @@ -111,7 +112,10 @@ class RequestContext: | @@ -111,7 +112,10 @@ class RequestContext: | ||
| 111 | """ | 112 | """ |
| 112 | start_time = time.time() | 113 | start_time = time.time() |
| 113 | self.performance_metrics.stage_start_times[stage.value] = start_time | 114 | self.performance_metrics.stage_start_times[stage.value] = start_time |
| 114 | - self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) | 115 | + self.logger.debug( |
| 116 | + f"Start stage | {stage.value}", | ||
| 117 | + extra={'reqid': self.reqid, 'uid': self.uid} | ||
| 118 | + ) | ||
| 115 | return start_time | 119 | return start_time |
| 116 | 120 | ||
| 117 | def end_stage(self, stage: RequestContextStage) -> float: | 121 | def end_stage(self, stage: RequestContextStage) -> float: |
| @@ -125,7 +129,10 @@ class RequestContext: | @@ -125,7 +129,10 @@ class RequestContext: | ||
| 125 | 阶段耗时(毫秒) | 129 | 阶段耗时(毫秒) |
| 126 | """ | 130 | """ |
| 127 | if stage.value not in self.performance_metrics.stage_start_times: | 131 | if stage.value not in self.performance_metrics.stage_start_times: |
| 128 | - self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) | 132 | + self.logger.warning( |
| 133 | + f"Stage not started | {stage.value}", | ||
| 134 | + extra={'reqid': self.reqid, 'uid': self.uid} | ||
| 135 | + ) | ||
| 129 | return 0.0 | 136 | return 0.0 |
| 130 | 137 | ||
| 131 | start_time = self.performance_metrics.stage_start_times[stage.value] | 138 | start_time = self.performance_metrics.stage_start_times[stage.value] |
| @@ -133,7 +140,7 @@ class RequestContext: | @@ -133,7 +140,7 @@ class RequestContext: | ||
| 133 | self.performance_metrics.stage_timings[stage.value] = duration_ms | 140 | self.performance_metrics.stage_timings[stage.value] = duration_ms |
| 134 | 141 | ||
| 135 | self.logger.debug( | 142 | self.logger.debug( |
| 136 | - f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms", | 143 | + f"End stage | {stage.value} | duration: {duration_ms:.2f}ms", |
| 137 | extra={'reqid': self.reqid, 'uid': self.uid} | 144 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 138 | ) | 145 | ) |
| 139 | return duration_ms | 146 | return duration_ms |
| @@ -162,7 +169,7 @@ class RequestContext: | @@ -162,7 +169,7 @@ class RequestContext: | ||
| 162 | setattr(self.query_analysis, key, value) | 169 | setattr(self.query_analysis, key, value) |
| 163 | else: | 170 | else: |
| 164 | self.logger.warning( | 171 | self.logger.warning( |
| 165 | - f"未知的查询分析字段 | {key}", | 172 | + f"Unknown query analysis field | {key}", |
| 166 | extra={'reqid': self.reqid, 'uid': self.uid} | 173 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 167 | ) | 174 | ) |
| 168 | 175 | ||
| @@ -175,7 +182,10 @@ class RequestContext: | @@ -175,7 +182,10 @@ class RequestContext: | ||
| 175 | value: 结果值 | 182 | value: 结果值 |
| 176 | """ | 183 | """ |
| 177 | self.intermediate_results[key] = value | 184 | self.intermediate_results[key] = value |
| 178 | - self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid}) | 185 | + self.logger.debug( |
| 186 | + f"Store intermediate result | {key}", | ||
| 187 | + extra={'reqid': self.reqid, 'uid': self.uid} | ||
| 188 | + ) | ||
| 179 | 189 | ||
| 180 | def get_intermediate_result(self, key: str, default: Any = None) -> Any: | 190 | def get_intermediate_result(self, key: str, default: Any = None) -> Any: |
| 181 | """ | 191 | """ |
| @@ -213,7 +223,7 @@ class RequestContext: | @@ -213,7 +223,7 @@ class RequestContext: | ||
| 213 | 'details': {} | 223 | 'details': {} |
| 214 | } | 224 | } |
| 215 | self.logger.error( | 225 | self.logger.error( |
| 216 | - f"设置错误信息 | {type(error).__name__}: {str(error)}", | 226 | + f"Set error info | {type(error).__name__}: {str(error)}", |
| 217 | extra={'reqid': self.reqid, 'uid': self.uid} | 227 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 218 | ) | 228 | ) |
| 219 | 229 | ||
| @@ -286,13 +296,13 @@ class RequestContext: | @@ -286,13 +296,13 @@ class RequestContext: | ||
| 286 | 296 | ||
| 287 | # 构建详细的日志消息 | 297 | # 构建详细的日志消息 |
| 288 | msg_parts = [ | 298 | msg_parts = [ |
| 289 | - f"搜索请求性能摘要 | reqid: {self.reqid}", | ||
| 290 | - f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms" | 299 | + f"Search request performance summary | reqid: {self.reqid}", |
| 300 | + f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms" | ||
| 291 | ] | 301 | ] |
| 292 | 302 | ||
| 293 | # 添加各阶段耗时 | 303 | # 添加各阶段耗时 |
| 294 | if summary['performance']['stage_timings_ms']: | 304 | if summary['performance']['stage_timings_ms']: |
| 295 | - msg_parts.append("阶段耗时:") | 305 | + msg_parts.append("Stage durations:") |
| 296 | for stage, duration in summary['performance']['stage_timings_ms'].items(): | 306 | for stage, duration in summary['performance']['stage_timings_ms'].items(): |
| 297 | percentage = summary['performance']['stage_percentages'].get(stage, 0) | 307 | percentage = summary['performance']['stage_percentages'].get(stage, 0) |
| 298 | msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") | 308 | msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") |
| @@ -300,25 +310,26 @@ class RequestContext: | @@ -300,25 +310,26 @@ class RequestContext: | ||
| 300 | # 添加查询信息 | 310 | # 添加查询信息 |
| 301 | if summary['query_analysis']['original_query']: | 311 | if summary['query_analysis']['original_query']: |
| 302 | msg_parts.append( | 312 | msg_parts.append( |
| 303 | - f"查询: '{summary['query_analysis']['original_query']}' " | 313 | + "Query: " |
| 314 | + f"'{summary['query_analysis']['original_query']}' " | ||
| 304 | f"-> '{summary['query_analysis']['rewritten_query']}' " | 315 | f"-> '{summary['query_analysis']['rewritten_query']}' " |
| 305 | f"({summary['query_analysis']['detected_language']})" | 316 | f"({summary['query_analysis']['detected_language']})" |
| 306 | ) | 317 | ) |
| 307 | 318 | ||
| 308 | # 添加结果统计 | 319 | # 添加结果统计 |
| 309 | msg_parts.append( | 320 | msg_parts.append( |
| 310 | - f"结果: {summary['results']['total_hits']} hits " | ||
| 311 | - f"ES查询: {summary['results']['es_query_size']} chars" | 321 | + f"Results: {summary['results']['total_hits']} hits " |
| 322 | + f"ES query size: {summary['results']['es_query_size']} chars" | ||
| 312 | ) | 323 | ) |
| 313 | 324 | ||
| 314 | # 添加错误信息(如果有) | 325 | # 添加错误信息(如果有) |
| 315 | if summary['request_info']['has_error']: | 326 | if summary['request_info']['has_error']: |
| 316 | error_info = self.metadata['error_info'] | 327 | error_info = self.metadata['error_info'] |
| 317 | - msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}") | 328 | + msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}") |
| 318 | 329 | ||
| 319 | # 添加警告信息(如果有) | 330 | # 添加警告信息(如果有) |
| 320 | if summary['request_info']['warnings_count'] > 0: | 331 | if summary['request_info']['warnings_count'] > 0: |
| 321 | - msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个") | 332 | + msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}") |
| 322 | 333 | ||
| 323 | log_message = " | ".join(msg_parts) | 334 | log_message = " | ".join(msg_parts) |
| 324 | 335 |
search/searcher.py
| @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. | @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. | ||
| 5 | """ | 5 | """ |
| 6 | 6 | ||
| 7 | from typing import Dict, Any, List, Optional, Union | 7 | from typing import Dict, Any, List, Optional, Union |
| 8 | -import time | 8 | +import time, json |
| 9 | import logging | 9 | import logging |
| 10 | 10 | ||
| 11 | from utils.es_client import ESClient | 11 | from utils.es_client import ESClient |
| @@ -305,14 +305,14 @@ class Searcher: | @@ -305,14 +305,14 @@ class Searcher: | ||
| 305 | context.store_intermediate_result('es_query', es_query) | 305 | context.store_intermediate_result('es_query', es_query) |
| 306 | context.store_intermediate_result('es_body_for_search', body_for_es) | 306 | context.store_intermediate_result('es_body_for_search', body_for_es) |
| 307 | 307 | ||
| 308 | + # Serialize ES query as a compact JSON string (no spaces or newlines) | ||
| 309 | + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) | ||
| 310 | + | ||
| 308 | context.logger.info( | 311 | context.logger.info( |
| 309 | - f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | " | ||
| 310 | - f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | " | ||
| 311 | - f"分面: {'是' if facets else '否'}", | ||
| 312 | - extra={'reqid': context.reqid, 'uid': context.uid} | ||
| 313 | - ) | ||
| 314 | - context.logger.debug( | ||
| 315 | - f"ES查询详情: {es_query}", | 312 | + f"ES query built | size: {len(es_query_compact)} chars | " |
| 313 | + f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " | ||
| 314 | + f"facets: {'yes' if facets else 'no'} | " | ||
| 315 | + f"query: {es_query_compact}", | ||
| 316 | extra={'reqid': context.reqid, 'uid': context.uid} | 316 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 317 | ) | 317 | ) |
| 318 | except Exception as e: | 318 | except Exception as e: |
utils/logger.py
| @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): | @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): | ||
| 38 | # Add request context if available | 38 | # Add request context if available |
| 39 | reqid = getattr(record, 'reqid', None) | 39 | reqid = getattr(record, 'reqid', None) |
| 40 | uid = getattr(record, 'uid', None) | 40 | uid = getattr(record, 'uid', None) |
| 41 | - if reqid or uid: | 41 | + if reqid is not None or uid is not None: |
| 42 | + # Normalize missing values to "-1" for easier correlation | ||
| 42 | log_entry['request_context'] = { | 43 | log_entry['request_context'] = { |
| 43 | - 'reqid': reqid, | ||
| 44 | - 'uid': uid | 44 | + 'reqid': reqid if reqid is not None else "-1", |
| 45 | + 'uid': uid if uid is not None else "-1" | ||
| 45 | } | 46 | } |
| 46 | 47 | ||
| 47 | # Add extra data if available | 48 | # Add extra data if available |
| @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): | @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): | ||
| 98 | from context.request_context import get_current_request_context | 99 | from context.request_context import get_current_request_context |
| 99 | context = get_current_request_context() | 100 | context = get_current_request_context() |
| 100 | if context: | 101 | if context: |
| 101 | - record.reqid = context.reqid | ||
| 102 | - record.uid = context.uid | 102 | + # Ensure every request-scoped log record carries reqid/uid. |
| 103 | + # If they are missing in the context, fall back to "-1". | ||
| 104 | + record.reqid = getattr(context, "reqid", None) or "-1" | ||
| 105 | + record.uid = getattr(context, "uid", None) or "-1" | ||
| 103 | except (ImportError, AttributeError): | 106 | except (ImportError, AttributeError): |
| 104 | pass | 107 | pass |
| 105 | return True | 108 | return True |
| 106 | 109 | ||
| 107 | 110 | ||
| 111 | +class ContextAwareConsoleFormatter(logging.Formatter): | ||
| 112 | + """ | ||
| 113 | + Console formatter that injects reqid/uid into the log line. | ||
| 114 | + | ||
| 115 | + For non-request logs (no context), reqid/uid will be "-1". | ||
| 116 | + """ | ||
| 117 | + | ||
| 118 | + def format(self, record: logging.LogRecord) -> str: | ||
| 119 | + # Provide safe defaults so format string never fails | ||
| 120 | + if not hasattr(record, "reqid"): | ||
| 121 | + record.reqid = "-1" | ||
| 122 | + if not hasattr(record, "uid"): | ||
| 123 | + record.uid = "-1" | ||
| 124 | + return super().format(record) | ||
| 125 | + | ||
| 126 | + | ||
| 108 | def setup_logging( | 127 | def setup_logging( |
| 109 | log_level: str = "INFO", | 128 | log_level: str = "INFO", |
| 110 | log_dir: str = "logs", | 129 | log_dir: str = "logs", |
| @@ -137,8 +156,8 @@ def setup_logging( | @@ -137,8 +156,8 @@ def setup_logging( | ||
| 137 | 156 | ||
| 138 | # Create formatters | 157 | # Create formatters |
| 139 | structured_formatter = StructuredFormatter() | 158 | structured_formatter = StructuredFormatter() |
| 140 | - console_formatter = logging.Formatter( | ||
| 141 | - '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s' | 159 | + console_formatter = ContextAwareConsoleFormatter( |
| 160 | + '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s' | ||
| 142 | ) | 161 | ) |
| 143 | 162 | ||
| 144 | # Add console handler | 163 | # Add console handler |