diff --git a/api/routes/search.py b/api/routes/search.py index 3d28122..50e586f 100644 --- a/api/routes/search.py +++ b/api/routes/search.py @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: # Try to get request ID from headers reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] - # Try to get user ID from headers or default to anonymous - uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous' + # Try to get user ID from headers; if not found, use "-1" for correlation + uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1" return reqid, uid @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): set_current_request_context(context) try: - # Log request start + # Log request start (English logs, with key search parameters) + client_ip = http_request.client.host if http_request.client else "unknown" + user_agent = http_request.headers.get("User-Agent", "unknown")[:200] context.logger.info( - f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | " - f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}", + "Received search request | " + f"Tenant: {tenant_id} | " + f"Query: {request.query} | " + f"IP: {client_ip} | " + f"User agent: {user_agent} | " + f"size: {request.size} | from: {request.from_} | " + f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | " + f"min_score: {request.min_score} | " + f"language: {request.language} | " + f"debug: {request.debug} | " + f"sku_filter_dimension: {request.sku_filter_dimension} | " + f"filters: {request.filters} | " + f"range_filters: {request.range_filters} | " + f"facets: {request.facets}", extra={'reqid': context.reqid, 'uid': context.uid} ) @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): if context: context.set_error(e) context.logger.error( - f"搜索请求失败 | 错误: {str(e)}", + f"Search request failed | error: {str(e)}", extra={'reqid': context.reqid, 'uid': context.uid} ) raise HTTPException(status_code=500, detail=str(e)) @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): set_current_request_context(context) try: - # Log request start + # Log request start for image search (English) + client_ip = http_request.client.host if http_request.client else "unknown" context.logger.info( - f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | " - f"IP: {http_request.client.host if http_request.client else 'unknown'}", + "Received image search request | " + f"Tenant: {tenant_id} | " + f"Image URL: {request.image_url} | " + f"IP: {client_ip}", extra={'reqid': context.reqid, 'uid': context.uid} ) @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): if context: context.set_error(e) context.logger.error( - f"图片搜索请求参数错误 | 错误: {str(e)}", + f"Image search request parameter error | error: {str(e)}", extra={'reqid': context.reqid, 'uid': context.uid} ) raise HTTPException(status_code=400, detail=str(e)) @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): if context: context.set_error(e) context.logger.error( - f"图片搜索请求失败 | 错误: {str(e)}", + f"Image search request failed | error: {str(e)}", extra={'reqid': context.reqid, 'uid': context.uid} ) raise HTTPException(status_code=500, detail=str(e)) diff --git a/context/request_context.py b/context/request_context.py index bb24309..b37e82b 100644 --- a/context/request_context.py +++ b/context/request_context.py @@ -59,9 +59,10 @@ class RequestContext: """ def __init__(self, reqid: str = None, uid: str = None): - # 生成唯一请求ID + # 生成唯一请求ID;如果外部未提供,则自动生成 + # 如果无法获取到 uid,则使用 "-1" 作为占位,用于日志关联 self.reqid = reqid or str(uuid.uuid4())[:8] - self.uid = uid or 'anonymous' + self.uid = uid or "-1" # 查询分析结果 self.query_analysis = QueryAnalysisResult() @@ -111,7 +112,10 @@ class RequestContext: """ start_time = time.time() self.performance_metrics.stage_start_times[stage.value] = start_time - self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) + self.logger.debug( + f"Start stage | {stage.value}", + extra={'reqid': self.reqid, 'uid': self.uid} + ) return start_time def end_stage(self, stage: RequestContextStage) -> float: @@ -125,7 +129,10 @@ class RequestContext: 阶段耗时(毫秒) """ if stage.value not in self.performance_metrics.stage_start_times: - self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) + self.logger.warning( + f"Stage not started | {stage.value}", + extra={'reqid': self.reqid, 'uid': self.uid} + ) return 0.0 start_time = self.performance_metrics.stage_start_times[stage.value] @@ -133,7 +140,7 @@ class RequestContext: self.performance_metrics.stage_timings[stage.value] = duration_ms self.logger.debug( - f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms", + f"End stage | {stage.value} | duration: {duration_ms:.2f}ms", extra={'reqid': self.reqid, 'uid': self.uid} ) return duration_ms @@ -162,7 +169,7 @@ class RequestContext: setattr(self.query_analysis, key, value) else: self.logger.warning( - f"未知的查询分析字段 | {key}", + f"Unknown query analysis field | {key}", extra={'reqid': self.reqid, 'uid': self.uid} ) @@ -175,7 +182,10 @@ class RequestContext: value: 结果值 """ self.intermediate_results[key] = value - self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid}) + self.logger.debug( + f"Store intermediate result | {key}", + extra={'reqid': self.reqid, 'uid': self.uid} + ) def get_intermediate_result(self, key: str, default: Any = None) -> Any: """ @@ -213,7 +223,7 @@ class RequestContext: 'details': {} } self.logger.error( - f"设置错误信息 | {type(error).__name__}: {str(error)}", + f"Set error info | {type(error).__name__}: {str(error)}", extra={'reqid': self.reqid, 'uid': self.uid} ) @@ -286,13 +296,13 @@ class RequestContext: # 构建详细的日志消息 msg_parts = [ - f"搜索请求性能摘要 | reqid: {self.reqid}", - f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms" + f"Search request performance summary | reqid: {self.reqid}", + f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms" ] # 添加各阶段耗时 if summary['performance']['stage_timings_ms']: - msg_parts.append("阶段耗时:") + msg_parts.append("Stage durations:") for stage, duration in summary['performance']['stage_timings_ms'].items(): percentage = summary['performance']['stage_percentages'].get(stage, 0) msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") @@ -300,25 +310,26 @@ class RequestContext: # 添加查询信息 if summary['query_analysis']['original_query']: msg_parts.append( - f"查询: '{summary['query_analysis']['original_query']}' " + "Query: " + f"'{summary['query_analysis']['original_query']}' " f"-> '{summary['query_analysis']['rewritten_query']}' " f"({summary['query_analysis']['detected_language']})" ) # 添加结果统计 msg_parts.append( - f"结果: {summary['results']['total_hits']} hits " - f"ES查询: {summary['results']['es_query_size']} chars" + f"Results: {summary['results']['total_hits']} hits " + f"ES query size: {summary['results']['es_query_size']} chars" ) # 添加错误信息(如果有) if summary['request_info']['has_error']: error_info = self.metadata['error_info'] - msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}") + msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}") # 添加警告信息(如果有) if summary['request_info']['warnings_count'] > 0: - msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个") + msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}") log_message = " | ".join(msg_parts) diff --git a/search/searcher.py b/search/searcher.py index dd79fb5..c81afea 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. """ from typing import Dict, Any, List, Optional, Union -import time +import time, json import logging from utils.es_client import ESClient @@ -305,14 +305,14 @@ class Searcher: context.store_intermediate_result('es_query', es_query) context.store_intermediate_result('es_body_for_search', body_for_es) + # Serialize ES query as a compact JSON string (no spaces or newlines) + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) + context.logger.info( - f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | " - f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | " - f"分面: {'是' if facets else '否'}", - extra={'reqid': context.reqid, 'uid': context.uid} - ) - context.logger.debug( - f"ES查询详情: {es_query}", + f"ES query built | size: {len(es_query_compact)} chars | " + f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " + f"facets: {'yes' if facets else 'no'} | " + f"query: {es_query_compact}", extra={'reqid': context.reqid, 'uid': context.uid} ) except Exception as e: diff --git a/utils/logger.py b/utils/logger.py index 509a693..718947d 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): # Add request context if available reqid = getattr(record, 'reqid', None) uid = getattr(record, 'uid', None) - if reqid or uid: + if reqid is not None or uid is not None: + # Normalize missing values to "-1" for easier correlation log_entry['request_context'] = { - 'reqid': reqid, - 'uid': uid + 'reqid': reqid if reqid is not None else "-1", + 'uid': uid if uid is not None else "-1" } # Add extra data if available @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): from context.request_context import get_current_request_context context = get_current_request_context() if context: - record.reqid = context.reqid - record.uid = context.uid + # Ensure every request-scoped log record carries reqid/uid. + # If they are missing in the context, fall back to "-1". + record.reqid = getattr(context, "reqid", None) or "-1" + record.uid = getattr(context, "uid", None) or "-1" except (ImportError, AttributeError): pass return True +class ContextAwareConsoleFormatter(logging.Formatter): + """ + Console formatter that injects reqid/uid into the log line. + + For non-request logs (no context), reqid/uid will be "-1". + """ + + def format(self, record: logging.LogRecord) -> str: + # Provide safe defaults so format string never fails + if not hasattr(record, "reqid"): + record.reqid = "-1" + if not hasattr(record, "uid"): + record.uid = "-1" + return super().format(record) + + def setup_logging( log_level: str = "INFO", log_dir: str = "logs", @@ -137,8 +156,8 @@ def setup_logging( # Create formatters structured_formatter = StructuredFormatter() - console_formatter = logging.Formatter( - '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s' + console_formatter = ContextAwareConsoleFormatter( + '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s' ) # Add console handler -- libgit2 0.21.2