Commit 99bea633a7b77a90f83fcefab602ceb822ccc52b

Authored by tangwang
1 parent e2539fd3

add logs

api/routes/search.py
@@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]: @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]:
24 # Try to get request ID from headers 24 # Try to get request ID from headers
25 reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8] 25 reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8]
26 26
27 - # Try to get user ID from headers or default to anonymous  
28 - uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous' 27 + # Try to get user ID from headers; if not found, use "-1" for correlation
  28 + uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1"
29 29
30 return reqid, uid 30 return reqid, uid
31 31
@@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request): @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request):
70 set_current_request_context(context) 70 set_current_request_context(context)
71 71
72 try: 72 try:
73 - # Log request start 73 + # Log request start (English logs, with key search parameters)
  74 + client_ip = http_request.client.host if http_request.client else "unknown"
  75 + user_agent = http_request.headers.get("User-Agent", "unknown")[:200]
74 context.logger.info( 76 context.logger.info(
75 - f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | "  
76 - f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}", 77 + "Received search request | "
  78 + f"Tenant: {tenant_id} | "
  79 + f"Query: {request.query} | "
  80 + f"IP: {client_ip} | "
  81 + f"User agent: {user_agent} | "
  82 + f"size: {request.size} | from: {request.from_} | "
  83 + f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | "
  84 + f"min_score: {request.min_score} | "
  85 + f"language: {request.language} | "
  86 + f"debug: {request.debug} | "
  87 + f"sku_filter_dimension: {request.sku_filter_dimension} | "
  88 + f"filters: {request.filters} | "
  89 + f"range_filters: {request.range_filters} | "
  90 + f"facets: {request.facets}",
77 extra={'reqid': context.reqid, 'uid': context.uid} 91 extra={'reqid': context.reqid, 'uid': context.uid}
78 ) 92 )
79 93
@@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request): @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request):
121 if context: 135 if context:
122 context.set_error(e) 136 context.set_error(e)
123 context.logger.error( 137 context.logger.error(
124 - f"搜索请求失败 | 错误: {str(e)}", 138 + f"Search request failed | error: {str(e)}",
125 extra={'reqid': context.reqid, 'uid': context.uid} 139 extra={'reqid': context.reqid, 'uid': context.uid}
126 ) 140 )
127 raise HTTPException(status_code=500, detail=str(e)) 141 raise HTTPException(status_code=500, detail=str(e))
@@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
164 set_current_request_context(context) 178 set_current_request_context(context)
165 179
166 try: 180 try:
167 - # Log request start 181 + # Log request start for image search (English)
  182 + client_ip = http_request.client.host if http_request.client else "unknown"
168 context.logger.info( 183 context.logger.info(
169 - f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | "  
170 - f"IP: {http_request.client.host if http_request.client else 'unknown'}", 184 + "Received image search request | "
  185 + f"Tenant: {tenant_id} | "
  186 + f"Image URL: {request.image_url} | "
  187 + f"IP: {client_ip}",
171 extra={'reqid': context.reqid, 'uid': context.uid} 188 extra={'reqid': context.reqid, 'uid': context.uid}
172 ) 189 )
173 190
@@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
202 if context: 219 if context:
203 context.set_error(e) 220 context.set_error(e)
204 context.logger.error( 221 context.logger.error(
205 - f"图片搜索请求参数错误 | 错误: {str(e)}", 222 + f"Image search request parameter error | error: {str(e)}",
206 extra={'reqid': context.reqid, 'uid': context.uid} 223 extra={'reqid': context.reqid, 'uid': context.uid}
207 ) 224 )
208 raise HTTPException(status_code=400, detail=str(e)) 225 raise HTTPException(status_code=400, detail=str(e))
@@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
210 if context: 227 if context:
211 context.set_error(e) 228 context.set_error(e)
212 context.logger.error( 229 context.logger.error(
213 - f"图片搜索请求失败 | 错误: {str(e)}", 230 + f"Image search request failed | error: {str(e)}",
214 extra={'reqid': context.reqid, 'uid': context.uid} 231 extra={'reqid': context.reqid, 'uid': context.uid}
215 ) 232 )
216 raise HTTPException(status_code=500, detail=str(e)) 233 raise HTTPException(status_code=500, detail=str(e))
context/request_context.py
@@ -59,9 +59,10 @@ class RequestContext: @@ -59,9 +59,10 @@ class RequestContext:
59 """ 59 """
60 60
61 def __init__(self, reqid: str = None, uid: str = None): 61 def __init__(self, reqid: str = None, uid: str = None):
62 - # 生成唯一请求ID 62 + # 生成唯一请求ID;如果外部未提供,则自动生成
  63 + # 如果无法获取到 uid,则使用 "-1" 作为占位,用于日志关联
63 self.reqid = reqid or str(uuid.uuid4())[:8] 64 self.reqid = reqid or str(uuid.uuid4())[:8]
64 - self.uid = uid or 'anonymous' 65 + self.uid = uid or "-1"
65 66
66 # 查询分析结果 67 # 查询分析结果
67 self.query_analysis = QueryAnalysisResult() 68 self.query_analysis = QueryAnalysisResult()
@@ -111,7 +112,10 @@ class RequestContext: @@ -111,7 +112,10 @@ class RequestContext:
111 """ 112 """
112 start_time = time.time() 113 start_time = time.time()
113 self.performance_metrics.stage_start_times[stage.value] = start_time 114 self.performance_metrics.stage_start_times[stage.value] = start_time
114 - self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) 115 + self.logger.debug(
  116 + f"Start stage | {stage.value}",
  117 + extra={'reqid': self.reqid, 'uid': self.uid}
  118 + )
115 return start_time 119 return start_time
116 120
117 def end_stage(self, stage: RequestContextStage) -> float: 121 def end_stage(self, stage: RequestContextStage) -> float:
@@ -125,7 +129,10 @@ class RequestContext: @@ -125,7 +129,10 @@ class RequestContext:
125 阶段耗时(毫秒) 129 阶段耗时(毫秒)
126 """ 130 """
127 if stage.value not in self.performance_metrics.stage_start_times: 131 if stage.value not in self.performance_metrics.stage_start_times:
128 - self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid}) 132 + self.logger.warning(
  133 + f"Stage not started | {stage.value}",
  134 + extra={'reqid': self.reqid, 'uid': self.uid}
  135 + )
129 return 0.0 136 return 0.0
130 137
131 start_time = self.performance_metrics.stage_start_times[stage.value] 138 start_time = self.performance_metrics.stage_start_times[stage.value]
@@ -133,7 +140,7 @@ class RequestContext: @@ -133,7 +140,7 @@ class RequestContext:
133 self.performance_metrics.stage_timings[stage.value] = duration_ms 140 self.performance_metrics.stage_timings[stage.value] = duration_ms
134 141
135 self.logger.debug( 142 self.logger.debug(
136 - f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms", 143 + f"End stage | {stage.value} | duration: {duration_ms:.2f}ms",
137 extra={'reqid': self.reqid, 'uid': self.uid} 144 extra={'reqid': self.reqid, 'uid': self.uid}
138 ) 145 )
139 return duration_ms 146 return duration_ms
@@ -162,7 +169,7 @@ class RequestContext: @@ -162,7 +169,7 @@ class RequestContext:
162 setattr(self.query_analysis, key, value) 169 setattr(self.query_analysis, key, value)
163 else: 170 else:
164 self.logger.warning( 171 self.logger.warning(
165 - f"未知的查询分析字段 | {key}", 172 + f"Unknown query analysis field | {key}",
166 extra={'reqid': self.reqid, 'uid': self.uid} 173 extra={'reqid': self.reqid, 'uid': self.uid}
167 ) 174 )
168 175
@@ -175,7 +182,10 @@ class RequestContext: @@ -175,7 +182,10 @@ class RequestContext:
175 value: 结果值 182 value: 结果值
176 """ 183 """
177 self.intermediate_results[key] = value 184 self.intermediate_results[key] = value
178 - self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid}) 185 + self.logger.debug(
  186 + f"Store intermediate result | {key}",
  187 + extra={'reqid': self.reqid, 'uid': self.uid}
  188 + )
179 189
180 def get_intermediate_result(self, key: str, default: Any = None) -> Any: 190 def get_intermediate_result(self, key: str, default: Any = None) -> Any:
181 """ 191 """
@@ -213,7 +223,7 @@ class RequestContext: @@ -213,7 +223,7 @@ class RequestContext:
213 'details': {} 223 'details': {}
214 } 224 }
215 self.logger.error( 225 self.logger.error(
216 - f"设置错误信息 | {type(error).__name__}: {str(error)}", 226 + f"Set error info | {type(error).__name__}: {str(error)}",
217 extra={'reqid': self.reqid, 'uid': self.uid} 227 extra={'reqid': self.reqid, 'uid': self.uid}
218 ) 228 )
219 229
@@ -286,13 +296,13 @@ class RequestContext: @@ -286,13 +296,13 @@ class RequestContext:
286 296
287 # 构建详细的日志消息 297 # 构建详细的日志消息
288 msg_parts = [ 298 msg_parts = [
289 - f"搜索请求性能摘要 | reqid: {self.reqid}",  
290 - f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms" 299 + f"Search request performance summary | reqid: {self.reqid}",
  300 + f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms"
291 ] 301 ]
292 302
293 # 添加各阶段耗时 303 # 添加各阶段耗时
294 if summary['performance']['stage_timings_ms']: 304 if summary['performance']['stage_timings_ms']:
295 - msg_parts.append("阶段耗时:") 305 + msg_parts.append("Stage durations:")
296 for stage, duration in summary['performance']['stage_timings_ms'].items(): 306 for stage, duration in summary['performance']['stage_timings_ms'].items():
297 percentage = summary['performance']['stage_percentages'].get(stage, 0) 307 percentage = summary['performance']['stage_percentages'].get(stage, 0)
298 msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)") 308 msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)")
@@ -300,25 +310,26 @@ class RequestContext: @@ -300,25 +310,26 @@ class RequestContext:
300 # 添加查询信息 310 # 添加查询信息
301 if summary['query_analysis']['original_query']: 311 if summary['query_analysis']['original_query']:
302 msg_parts.append( 312 msg_parts.append(
303 - f"查询: '{summary['query_analysis']['original_query']}' " 313 + "Query: "
  314 + f"'{summary['query_analysis']['original_query']}' "
304 f"-> '{summary['query_analysis']['rewritten_query']}' " 315 f"-> '{summary['query_analysis']['rewritten_query']}' "
305 f"({summary['query_analysis']['detected_language']})" 316 f"({summary['query_analysis']['detected_language']})"
306 ) 317 )
307 318
308 # 添加结果统计 319 # 添加结果统计
309 msg_parts.append( 320 msg_parts.append(
310 - f"结果: {summary['results']['total_hits']} hits "  
311 - f"ES查询: {summary['results']['es_query_size']} chars" 321 + f"Results: {summary['results']['total_hits']} hits "
  322 + f"ES query size: {summary['results']['es_query_size']} chars"
312 ) 323 )
313 324
314 # 添加错误信息(如果有) 325 # 添加错误信息(如果有)
315 if summary['request_info']['has_error']: 326 if summary['request_info']['has_error']:
316 error_info = self.metadata['error_info'] 327 error_info = self.metadata['error_info']
317 - msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}") 328 + msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}")
318 329
319 # 添加警告信息(如果有) 330 # 添加警告信息(如果有)
320 if summary['request_info']['warnings_count'] > 0: 331 if summary['request_info']['warnings_count'] > 0:
321 - msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个") 332 + msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}")
322 333
323 log_message = " | ".join(msg_parts) 334 log_message = " | ".join(msg_parts)
324 335
search/searcher.py
@@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting.
5 """ 5 """
6 6
7 from typing import Dict, Any, List, Optional, Union 7 from typing import Dict, Any, List, Optional, Union
8 -import time 8 +import time, json
9 import logging 9 import logging
10 10
11 from utils.es_client import ESClient 11 from utils.es_client import ESClient
@@ -305,14 +305,14 @@ class Searcher: @@ -305,14 +305,14 @@ class Searcher:
305 context.store_intermediate_result('es_query', es_query) 305 context.store_intermediate_result('es_query', es_query)
306 context.store_intermediate_result('es_body_for_search', body_for_es) 306 context.store_intermediate_result('es_body_for_search', body_for_es)
307 307
  308 + # Serialize ES query as a compact JSON string (no spaces or newlines)
  309 + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':'))
  310 +
308 context.logger.info( 311 context.logger.info(
309 - f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | "  
310 - f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | "  
311 - f"分面: {'是' if facets else '否'}",  
312 - extra={'reqid': context.reqid, 'uid': context.uid}  
313 - )  
314 - context.logger.debug(  
315 - f"ES查询详情: {es_query}", 312 + f"ES query built | size: {len(es_query_compact)} chars | "
  313 + f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | "
  314 + f"facets: {'yes' if facets else 'no'} | "
  315 + f"query: {es_query_compact}",
316 extra={'reqid': context.reqid, 'uid': context.uid} 316 extra={'reqid': context.reqid, 'uid': context.uid}
317 ) 317 )
318 except Exception as e: 318 except Exception as e:
@@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter): @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter):
38 # Add request context if available 38 # Add request context if available
39 reqid = getattr(record, 'reqid', None) 39 reqid = getattr(record, 'reqid', None)
40 uid = getattr(record, 'uid', None) 40 uid = getattr(record, 'uid', None)
41 - if reqid or uid: 41 + if reqid is not None or uid is not None:
  42 + # Normalize missing values to "-1" for easier correlation
42 log_entry['request_context'] = { 43 log_entry['request_context'] = {
43 - 'reqid': reqid,  
44 - 'uid': uid 44 + 'reqid': reqid if reqid is not None else "-1",
  45 + 'uid': uid if uid is not None else "-1"
45 } 46 }
46 47
47 # Add extra data if available 48 # Add extra data if available
@@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter): @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter):
98 from context.request_context import get_current_request_context 99 from context.request_context import get_current_request_context
99 context = get_current_request_context() 100 context = get_current_request_context()
100 if context: 101 if context:
101 - record.reqid = context.reqid  
102 - record.uid = context.uid 102 + # Ensure every request-scoped log record carries reqid/uid.
  103 + # If they are missing in the context, fall back to "-1".
  104 + record.reqid = getattr(context, "reqid", None) or "-1"
  105 + record.uid = getattr(context, "uid", None) or "-1"
103 except (ImportError, AttributeError): 106 except (ImportError, AttributeError):
104 pass 107 pass
105 return True 108 return True
106 109
107 110
  111 +class ContextAwareConsoleFormatter(logging.Formatter):
  112 + """
  113 + Console formatter that injects reqid/uid into the log line.
  114 +
  115 + For non-request logs (no context), reqid/uid will be "-1".
  116 + """
  117 +
  118 + def format(self, record: logging.LogRecord) -> str:
  119 + # Provide safe defaults so format string never fails
  120 + if not hasattr(record, "reqid"):
  121 + record.reqid = "-1"
  122 + if not hasattr(record, "uid"):
  123 + record.uid = "-1"
  124 + return super().format(record)
  125 +
  126 +
108 def setup_logging( 127 def setup_logging(
109 log_level: str = "INFO", 128 log_level: str = "INFO",
110 log_dir: str = "logs", 129 log_dir: str = "logs",
@@ -137,8 +156,8 @@ def setup_logging( @@ -137,8 +156,8 @@ def setup_logging(
137 156
138 # Create formatters 157 # Create formatters
139 structured_formatter = StructuredFormatter() 158 structured_formatter = StructuredFormatter()
140 - console_formatter = logging.Formatter(  
141 - '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s' 159 + console_formatter = ContextAwareConsoleFormatter(
  160 + '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s'
142 ) 161 )
143 162
144 # Add console handler 163 # Add console handler