Commit 99bea633a7b77a90f83fcefab602ceb822ccc52b
1 parent
e2539fd3
add logs
Showing
4 changed files
with
89 additions
and
42 deletions
Show diff stats
api/routes/search.py
| ... | ... | @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: |
| 24 | 24 | # Try to get request ID from headers |
| 25 | 25 | reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] |
| 26 | 26 | |
| 27 | - # Try to get user ID from headers or default to anonymous | |
| 28 | - uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous' | |
| 27 | + # Try to get user ID from headers; if not found, use "-1" for correlation | |
| 28 | + uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1" | |
| 29 | 29 | |
| 30 | 30 | return reqid, uid |
| 31 | 31 | |
| ... | ... | @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): |
| 70 | 70 | set_current_request_context(context) |
| 71 | 71 | |
| 72 | 72 | try: |
| 73 | - # Log request start | |
| 73 | + # Log request start (English logs, with key search parameters) | |
| 74 | + client_ip = http_request.client.host if http_request.client else "unknown" | |
| 75 | + user_agent = http_request.headers.get("User-Agent", "unknown")[:200] | |
| 74 | 76 | context.logger.info( |
| 75 | - f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | " | |
| 76 | - f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}", | |
| 77 | + "Received search request | " | |
| 78 | + f"Tenant: {tenant_id} | " | |
| 79 | + f"Query: {request.query} | " | |
| 80 | + f"IP: {client_ip} | " | |
| 81 | + f"User agent: {user_agent} | " | |
| 82 | + f"size: {request.size} | from: {request.from_} | " | |
| 83 | + f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | " | |
| 84 | + f"min_score: {request.min_score} | " | |
| 85 | + f"language: {request.language} | " | |
| 86 | + f"debug: {request.debug} | " | |
| 87 | + f"sku_filter_dimension: {request.sku_filter_dimension} | " | |
| 88 | + f"filters: {request.filters} | " | |
| 89 | + f"range_filters: {request.range_filters} | " | |
| 90 | + f"facets: {request.facets}", | |
| 77 | 91 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 78 | 92 | ) |
| 79 | 93 | |
| ... | ... | @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): |
| 121 | 135 | if context: |
| 122 | 136 | context.set_error(e) |
| 123 | 137 | context.logger.error( |
| 124 | - f"搜索请求失败 | 错误: {str(e)}", | |
| 138 | + f"Search request failed | error: {str(e)}", | |
| 125 | 139 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 126 | 140 | ) |
| 127 | 141 | raise HTTPException(status_code=500, detail=str(e)) |
| ... | ... | @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): |
| 164 | 178 | set_current_request_context(context) |
| 165 | 179 | |
| 166 | 180 | try: |
| 167 | - # Log request start | |
| 181 | + # Log request start for image search (English) | |
| 182 | + client_ip = http_request.client.host if http_request.client else "unknown" | |
| 168 | 183 | context.logger.info( |
| 169 | - f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | " | |
| 170 | - f"IP: {http_request.client.host if http_request.client else 'unknown'}", | |
| 184 | + "Received image search request | " | |
| 185 | + f"Tenant: {tenant_id} | " | |
| 186 | + f"Image URL: {request.image_url} | " | |
| 187 | + f"IP: {client_ip}", | |
| 171 | 188 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 172 | 189 | ) |
| 173 | 190 | |
| ... | ... | @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): |
| 202 | 219 | if context: |
| 203 | 220 | context.set_error(e) |
| 204 | 221 | context.logger.error( |
| 205 | - f"图片搜索请求参数错误 | 错误: {str(e)}", | |
| 222 | + f"Image search request parameter error | error: {str(e)}", | |
| 206 | 223 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 207 | 224 | ) |
| 208 | 225 | raise HTTPException(status_code=400, detail=str(e)) |
| ... | ... | @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): |
| 210 | 227 | if context: |
| 211 | 228 | context.set_error(e) |
| 212 | 229 | context.logger.error( |
| 213 | - f"图片搜索请求失败 | 错误: {str(e)}", | |
| 230 | + f"Image search request failed | error: {str(e)}", | |
| 214 | 231 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 215 | 232 | ) |
| 216 | 233 | raise HTTPException(status_code=500, detail=str(e)) | ... | ... |
context/request_context.py
| ... | ... | @@ -59,9 +59,10 @@ class RequestContext: |
| 59 | 59 | """ |
| 60 | 60 | |
| 61 | 61 | def __init__(self, reqid: str = None, uid: str = None): |
| 62 | - # 生成唯一请求ID | |
| 62 | + # 生成唯一请求ID;如果外部未提供,则自动生成 | |
| 63 | + # 如果无法获取到 uid,则使用 "-1" 作为占位,用于日志关联 | |
| 63 | 64 | self.reqid = reqid or str(uuid.uuid4())[:8] |
| 64 | - self.uid = uid or 'anonymous' | |
| 65 | + self.uid = uid or "-1" | |
| 65 | 66 | |
| 66 | 67 | # 查询分析结果 |
| 67 | 68 | self.query_analysis = QueryAnalysisResult() |
| ... | ... | @@ -111,7 +112,10 @@ class RequestContext: |
| 111 | 112 | """ |
| 112 | 113 | start_time = time.time() |
| 113 | 114 | self.performance_metrics.stage_start_times[stage.value] = start_time |
| 114 | - self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) | |
| 115 | + self.logger.debug( | |
| 116 | + f"Start stage | {stage.value}", | |
| 117 | + extra={'reqid': self.reqid, 'uid': self.uid} | |
| 118 | + ) | |
| 115 | 119 | return start_time |
| 116 | 120 | |
| 117 | 121 | def end_stage(self, stage: RequestContextStage) -> float: |
| ... | ... | @@ -125,7 +129,10 @@ class RequestContext: |
| 125 | 129 | 阶段耗时(毫秒) |
| 126 | 130 | """ |
| 127 | 131 | if stage.value not in self.performance_metrics.stage_start_times: |
| 128 | - self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) | |
| 132 | + self.logger.warning( | |
| 133 | + f"Stage not started | {stage.value}", | |
| 134 | + extra={'reqid': self.reqid, 'uid': self.uid} | |
| 135 | + ) | |
| 129 | 136 | return 0.0 |
| 130 | 137 | |
| 131 | 138 | start_time = self.performance_metrics.stage_start_times[stage.value] |
| ... | ... | @@ -133,7 +140,7 @@ class RequestContext: |
| 133 | 140 | self.performance_metrics.stage_timings[stage.value] = duration_ms |
| 134 | 141 | |
| 135 | 142 | self.logger.debug( |
| 136 | - f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms", | |
| 143 | + f"End stage | {stage.value} | duration: {duration_ms:.2f}ms", | |
| 137 | 144 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 138 | 145 | ) |
| 139 | 146 | return duration_ms |
| ... | ... | @@ -162,7 +169,7 @@ class RequestContext: |
| 162 | 169 | setattr(self.query_analysis, key, value) |
| 163 | 170 | else: |
| 164 | 171 | self.logger.warning( |
| 165 | - f"未知的查询分析字段 | {key}", | |
| 172 | + f"Unknown query analysis field | {key}", | |
| 166 | 173 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 167 | 174 | ) |
| 168 | 175 | |
| ... | ... | @@ -175,7 +182,10 @@ class RequestContext: |
| 175 | 182 | value: 结果值 |
| 176 | 183 | """ |
| 177 | 184 | self.intermediate_results[key] = value |
| 178 | - self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid}) | |
| 185 | + self.logger.debug( | |
| 186 | + f"Store intermediate result | {key}", | |
| 187 | + extra={'reqid': self.reqid, 'uid': self.uid} | |
| 188 | + ) | |
| 179 | 189 | |
| 180 | 190 | def get_intermediate_result(self, key: str, default: Any = None) -> Any: |
| 181 | 191 | """ |
| ... | ... | @@ -213,7 +223,7 @@ class RequestContext: |
| 213 | 223 | 'details': {} |
| 214 | 224 | } |
| 215 | 225 | self.logger.error( |
| 216 | - f"设置错误信息 | {type(error).__name__}: {str(error)}", | |
| 226 | + f"Set error info | {type(error).__name__}: {str(error)}", | |
| 217 | 227 | extra={'reqid': self.reqid, 'uid': self.uid} |
| 218 | 228 | ) |
| 219 | 229 | |
| ... | ... | @@ -286,13 +296,13 @@ class RequestContext: |
| 286 | 296 | |
| 287 | 297 | # 构建详细的日志消息 |
| 288 | 298 | msg_parts = [ |
| 289 | - f"搜索请求性能摘要 | reqid: {self.reqid}", | |
| 290 | - f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms" | |
| 299 | + f"Search request performance summary | reqid: {self.reqid}", | |
| 300 | + f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms" | |
| 291 | 301 | ] |
| 292 | 302 | |
| 293 | 303 | # 添加各阶段耗时 |
| 294 | 304 | if summary['performance']['stage_timings_ms']: |
| 295 | - msg_parts.append("阶段耗时:") | |
| 305 | + msg_parts.append("Stage durations:") | |
| 296 | 306 | for stage, duration in summary['performance']['stage_timings_ms'].items(): |
| 297 | 307 | percentage = summary['performance']['stage_percentages'].get(stage, 0) |
| 298 | 308 | msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") |
| ... | ... | @@ -300,25 +310,26 @@ class RequestContext: |
| 300 | 310 | # 添加查询信息 |
| 301 | 311 | if summary['query_analysis']['original_query']: |
| 302 | 312 | msg_parts.append( |
| 303 | - f"查询: '{summary['query_analysis']['original_query']}' " | |
| 313 | + "Query: " | |
| 314 | + f"'{summary['query_analysis']['original_query']}' " | |
| 304 | 315 | f"-> '{summary['query_analysis']['rewritten_query']}' " |
| 305 | 316 | f"({summary['query_analysis']['detected_language']})" |
| 306 | 317 | ) |
| 307 | 318 | |
| 308 | 319 | # 添加结果统计 |
| 309 | 320 | msg_parts.append( |
| 310 | - f"结果: {summary['results']['total_hits']} hits " | |
| 311 | - f"ES查询: {summary['results']['es_query_size']} chars" | |
| 321 | + f"Results: {summary['results']['total_hits']} hits " | |
| 322 | + f"ES query size: {summary['results']['es_query_size']} chars" | |
| 312 | 323 | ) |
| 313 | 324 | |
| 314 | 325 | # 添加错误信息(如果有) |
| 315 | 326 | if summary['request_info']['has_error']: |
| 316 | 327 | error_info = self.metadata['error_info'] |
| 317 | - msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}") | |
| 328 | + msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}") | |
| 318 | 329 | |
| 319 | 330 | # 添加警告信息(如果有) |
| 320 | 331 | if summary['request_info']['warnings_count'] > 0: |
| 321 | - msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个") | |
| 332 | + msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}") | |
| 322 | 333 | |
| 323 | 334 | log_message = " | ".join(msg_parts) |
| 324 | 335 | ... | ... |
search/searcher.py
| ... | ... | @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | 7 | from typing import Dict, Any, List, Optional, Union |
| 8 | -import time | |
| 8 | +import time, json | |
| 9 | 9 | import logging |
| 10 | 10 | |
| 11 | 11 | from utils.es_client import ESClient |
| ... | ... | @@ -305,14 +305,14 @@ class Searcher: |
| 305 | 305 | context.store_intermediate_result('es_query', es_query) |
| 306 | 306 | context.store_intermediate_result('es_body_for_search', body_for_es) |
| 307 | 307 | |
| 308 | + # Serialize ES query as a compact JSON string (no spaces or newlines) | |
| 309 | + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) | |
| 310 | + | |
| 308 | 311 | context.logger.info( |
| 309 | - f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | " | |
| 310 | - f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | " | |
| 311 | - f"分面: {'是' if facets else '否'}", | |
| 312 | - extra={'reqid': context.reqid, 'uid': context.uid} | |
| 313 | - ) | |
| 314 | - context.logger.debug( | |
| 315 | - f"ES查询详情: {es_query}", | |
| 312 | + f"ES query built | size: {len(es_query_compact)} chars | " | |
| 313 | + f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " | |
| 314 | + f"facets: {'yes' if facets else 'no'} | " | |
| 315 | + f"query: {es_query_compact}", | |
| 316 | 316 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 317 | 317 | ) |
| 318 | 318 | except Exception as e: | ... | ... |
utils/logger.py
| ... | ... | @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): |
| 38 | 38 | # Add request context if available |
| 39 | 39 | reqid = getattr(record, 'reqid', None) |
| 40 | 40 | uid = getattr(record, 'uid', None) |
| 41 | - if reqid or uid: | |
| 41 | + if reqid is not None or uid is not None: | |
| 42 | + # Normalize missing values to "-1" for easier correlation | |
| 42 | 43 | log_entry['request_context'] = { |
| 43 | - 'reqid': reqid, | |
| 44 | - 'uid': uid | |
| 44 | + 'reqid': reqid if reqid is not None else "-1", | |
| 45 | + 'uid': uid if uid is not None else "-1" | |
| 45 | 46 | } |
| 46 | 47 | |
| 47 | 48 | # Add extra data if available |
| ... | ... | @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): |
| 98 | 99 | from context.request_context import get_current_request_context |
| 99 | 100 | context = get_current_request_context() |
| 100 | 101 | if context: |
| 101 | - record.reqid = context.reqid | |
| 102 | - record.uid = context.uid | |
| 102 | + # Ensure every request-scoped log record carries reqid/uid. | |
| 103 | + # If they are missing in the context, fall back to "-1". | |
| 104 | + record.reqid = getattr(context, "reqid", None) or "-1" | |
| 105 | + record.uid = getattr(context, "uid", None) or "-1" | |
| 103 | 106 | except (ImportError, AttributeError): |
| 104 | 107 | pass |
| 105 | 108 | return True |
| 106 | 109 | |
| 107 | 110 | |
| 111 | +class ContextAwareConsoleFormatter(logging.Formatter): | |
| 112 | + """ | |
| 113 | + Console formatter that injects reqid/uid into the log line. | |
| 114 | + | |
| 115 | + For non-request logs (no context), reqid/uid will be "-1". | |
| 116 | + """ | |
| 117 | + | |
| 118 | + def format(self, record: logging.LogRecord) -> str: | |
| 119 | + # Provide safe defaults so format string never fails | |
| 120 | + if not hasattr(record, "reqid"): | |
| 121 | + record.reqid = "-1" | |
| 122 | + if not hasattr(record, "uid"): | |
| 123 | + record.uid = "-1" | |
| 124 | + return super().format(record) | |
| 125 | + | |
| 126 | + | |
| 108 | 127 | def setup_logging( |
| 109 | 128 | log_level: str = "INFO", |
| 110 | 129 | log_dir: str = "logs", |
| ... | ... | @@ -137,8 +156,8 @@ def setup_logging( |
| 137 | 156 | |
| 138 | 157 | # Create formatters |
| 139 | 158 | structured_formatter = StructuredFormatter() |
| 140 | - console_formatter = logging.Formatter( | |
| 141 | - '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s' | |
| 159 | + console_formatter = ContextAwareConsoleFormatter( | |
| 160 | + '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s' | |
| 142 | 161 | ) |
| 143 | 162 | |
| 144 | 163 | # Add console handler | ... | ... |