Commit 99bea633a7b77a90f83fcefab602ceb822ccc52b

Authored by tangwang
1 parent e2539fd3

add logs

api/routes/search.py
... ... @@ -24,8 +24,8 @@ def extract_request_info(request: Request) -> tuple[str, str]:
24 24 # Try to get request ID from headers
25 25 reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8]
26 26  
27   - # Try to get user ID from headers or default to anonymous
28   - uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or 'anonymous'
  27 + # Try to get user ID from headers; if not found, use "-1" for correlation
  28 + uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1"
29 29  
30 30 return reqid, uid
31 31  
... ... @@ -70,10 +70,24 @@ async def search(request: SearchRequest, http_request: Request):
70 70 set_current_request_context(context)
71 71  
72 72 try:
73   - # Log request start
  73 + # Log request start (English logs, with key search parameters)
  74 + client_ip = http_request.client.host if http_request.client else "unknown"
  75 + user_agent = http_request.headers.get("User-Agent", "unknown")[:200]
74 76 context.logger.info(
75   - f"收到搜索请求 | Tenant: {tenant_id} | IP: {http_request.client.host if http_request.client else 'unknown'} | "
76   - f"用户代理: {http_request.headers.get('User-Agent', 'unknown')[:100]}",
  77 + "Received search request | "
  78 + f"Tenant: {tenant_id} | "
  79 + f"Query: {request.query} | "
  80 + f"IP: {client_ip} | "
  81 + f"User agent: {user_agent} | "
  82 + f"size: {request.size} | from: {request.from_} | "
  83 + f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | "
  84 + f"min_score: {request.min_score} | "
  85 + f"language: {request.language} | "
  86 + f"debug: {request.debug} | "
  87 + f"sku_filter_dimension: {request.sku_filter_dimension} | "
  88 + f"filters: {request.filters} | "
  89 + f"range_filters: {request.range_filters} | "
  90 + f"facets: {request.facets}",
77 91 extra={'reqid': context.reqid, 'uid': context.uid}
78 92 )
79 93  
... ... @@ -121,7 +135,7 @@ async def search(request: SearchRequest, http_request: Request):
121 135 if context:
122 136 context.set_error(e)
123 137 context.logger.error(
124   - f"搜索请求失败 | 错误: {str(e)}",
  138 + f"Search request failed | error: {str(e)}",
125 139 extra={'reqid': context.reqid, 'uid': context.uid}
126 140 )
127 141 raise HTTPException(status_code=500, detail=str(e))
... ... @@ -164,10 +178,13 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
164 178 set_current_request_context(context)
165 179  
166 180 try:
167   - # Log request start
  181 + # Log request start for image search (English)
  182 + client_ip = http_request.client.host if http_request.client else "unknown"
168 183 context.logger.info(
169   - f"收到图片搜索请求 | Tenant: {tenant_id} | 图片URL: {request.image_url} | "
170   - f"IP: {http_request.client.host if http_request.client else 'unknown'}",
  184 + "Received image search request | "
  185 + f"Tenant: {tenant_id} | "
  186 + f"Image URL: {request.image_url} | "
  187 + f"IP: {client_ip}",
171 188 extra={'reqid': context.reqid, 'uid': context.uid}
172 189 )
173 190  
... ... @@ -202,7 +219,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
202 219 if context:
203 220 context.set_error(e)
204 221 context.logger.error(
205   - f"图片搜索请求参数错误 | 错误: {str(e)}",
  222 + f"Image search request parameter error | error: {str(e)}",
206 223 extra={'reqid': context.reqid, 'uid': context.uid}
207 224 )
208 225 raise HTTPException(status_code=400, detail=str(e))
... ... @@ -210,7 +227,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
210 227 if context:
211 228 context.set_error(e)
212 229 context.logger.error(
213   - f"图片搜索请求失败 | 错误: {str(e)}",
  230 + f"Image search request failed | error: {str(e)}",
214 231 extra={'reqid': context.reqid, 'uid': context.uid}
215 232 )
216 233 raise HTTPException(status_code=500, detail=str(e))
... ...
context/request_context.py
... ... @@ -59,9 +59,10 @@ class RequestContext:
59 59 """
60 60  
61 61 def __init__(self, reqid: str = None, uid: str = None):
62   - # 生成唯一请求ID
  62 + # 生成唯一请求ID;如果外部未提供,则自动生成
  63 + # 如果无法获取到 uid,则使用 "-1" 作为占位,用于日志关联
63 64 self.reqid = reqid or str(uuid.uuid4())[:8]
64   - self.uid = uid or 'anonymous'
  65 + self.uid = uid or "-1"
65 66  
66 67 # 查询分析结果
67 68 self.query_analysis = QueryAnalysisResult()
... ... @@ -111,7 +112,10 @@ class RequestContext:
111 112 """
112 113 start_time = time.time()
113 114 self.performance_metrics.stage_start_times[stage.value] = start_time
114   - self.logger.debug(f"开始阶段 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid})
  115 + self.logger.debug(
  116 + f"Start stage | {stage.value}",
  117 + extra={'reqid': self.reqid, 'uid': self.uid}
  118 + )
115 119 return start_time
116 120  
117 121 def end_stage(self, stage: RequestContextStage) -> float:
... ... @@ -125,7 +129,10 @@ class RequestContext:
125 129 阶段耗时(毫秒)
126 130 """
127 131 if stage.value not in self.performance_metrics.stage_start_times:
128   - self.logger.warning(f"阶段未开始计时 | {stage.value}", extra={'reqid': self.reqid, 'uid': self.uid})
  132 + self.logger.warning(
  133 + f"Stage not started | {stage.value}",
  134 + extra={'reqid': self.reqid, 'uid': self.uid}
  135 + )
129 136 return 0.0
130 137  
131 138 start_time = self.performance_metrics.stage_start_times[stage.value]
... ... @@ -133,7 +140,7 @@ class RequestContext:
133 140 self.performance_metrics.stage_timings[stage.value] = duration_ms
134 141  
135 142 self.logger.debug(
136   - f"结束阶段 | {stage.value} | 耗时: {duration_ms:.2f}ms",
  143 + f"End stage | {stage.value} | duration: {duration_ms:.2f}ms",
137 144 extra={'reqid': self.reqid, 'uid': self.uid}
138 145 )
139 146 return duration_ms
... ... @@ -162,7 +169,7 @@ class RequestContext:
162 169 setattr(self.query_analysis, key, value)
163 170 else:
164 171 self.logger.warning(
165   - f"未知的查询分析字段 | {key}",
  172 + f"Unknown query analysis field | {key}",
166 173 extra={'reqid': self.reqid, 'uid': self.uid}
167 174 )
168 175  
... ... @@ -175,7 +182,10 @@ class RequestContext:
175 182 value: 结果值
176 183 """
177 184 self.intermediate_results[key] = value
178   - self.logger.debug(f"存储中间结果 | {key}", extra={'reqid': self.reqid, 'uid': self.uid})
  185 + self.logger.debug(
  186 + f"Store intermediate result | {key}",
  187 + extra={'reqid': self.reqid, 'uid': self.uid}
  188 + )
179 189  
180 190 def get_intermediate_result(self, key: str, default: Any = None) -> Any:
181 191 """
... ... @@ -213,7 +223,7 @@ class RequestContext:
213 223 'details': {}
214 224 }
215 225 self.logger.error(
216   - f"设置错误信息 | {type(error).__name__}: {str(error)}",
  226 + f"Set error info | {type(error).__name__}: {str(error)}",
217 227 extra={'reqid': self.reqid, 'uid': self.uid}
218 228 )
219 229  
... ... @@ -286,13 +296,13 @@ class RequestContext:
286 296  
287 297 # 构建详细的日志消息
288 298 msg_parts = [
289   - f"搜索请求性能摘要 | reqid: {self.reqid}",
290   - f"总耗时: {summary['performance']['total_duration_ms']:.2f}ms"
  299 + f"Search request performance summary | reqid: {self.reqid}",
  300 + f"Total duration: {summary['performance']['total_duration_ms']:.2f}ms"
291 301 ]
292 302  
293 303 # 添加各阶段耗时
294 304 if summary['performance']['stage_timings_ms']:
295   - msg_parts.append("阶段耗时:")
  305 + msg_parts.append("Stage durations:")
296 306 for stage, duration in summary['performance']['stage_timings_ms'].items():
297 307 percentage = summary['performance']['stage_percentages'].get(stage, 0)
298 308 msg_parts.append(f" - {stage}: {duration:.2f}ms ({percentage}%)")
... ... @@ -300,25 +310,26 @@ class RequestContext:
300 310 # 添加查询信息
301 311 if summary['query_analysis']['original_query']:
302 312 msg_parts.append(
303   - f"查询: '{summary['query_analysis']['original_query']}' "
  313 + "Query: "
  314 + f"'{summary['query_analysis']['original_query']}' "
304 315 f"-> '{summary['query_analysis']['rewritten_query']}' "
305 316 f"({summary['query_analysis']['detected_language']})"
306 317 )
307 318  
308 319 # 添加结果统计
309 320 msg_parts.append(
310   - f"结果: {summary['results']['total_hits']} hits "
311   - f"ES查询: {summary['results']['es_query_size']} chars"
  321 + f"Results: {summary['results']['total_hits']} hits "
  322 + f"ES query size: {summary['results']['es_query_size']} chars"
312 323 )
313 324  
314 325 # 添加错误信息(如果有)
315 326 if summary['request_info']['has_error']:
316 327 error_info = self.metadata['error_info']
317   - msg_parts.append(f"错误: {error_info['type']}: {error_info['message']}")
  328 + msg_parts.append(f"Error: {error_info['type']}: {error_info['message']}")
318 329  
319 330 # 添加警告信息(如果有)
320 331 if summary['request_info']['warnings_count'] > 0:
321   - msg_parts.append(f"警告: {summary['request_info']['warnings_count']} 个")
  332 + msg_parts.append(f"Warnings: {summary['request_info']['warnings_count']}")
322 333  
323 334 log_message = " | ".join(msg_parts)
324 335  
... ...
search/searcher.py
... ... @@ -5,7 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting.
5 5 """
6 6  
7 7 from typing import Dict, Any, List, Optional, Union
8   -import time
  8 +import time, json
9 9 import logging
10 10  
11 11 from utils.es_client import ESClient
... ... @@ -305,14 +305,14 @@ class Searcher:
305 305 context.store_intermediate_result('es_query', es_query)
306 306 context.store_intermediate_result('es_body_for_search', body_for_es)
307 307  
  308 + # Serialize ES query as a compact JSON string (no spaces or newlines)
  309 + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':'))
  310 +
308 311 context.logger.info(
309   - f"ES查询构建完成 | 大小: {len(str(es_query))}字符 | "
310   - f"KNN: {'是' if enable_embedding and parsed_query.query_vector is not None else '否'} | "
311   - f"分面: {'是' if facets else '否'}",
312   - extra={'reqid': context.reqid, 'uid': context.uid}
313   - )
314   - context.logger.debug(
315   - f"ES查询详情: {es_query}",
  312 + f"ES query built | size: {len(es_query_compact)} chars | "
  313 + f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | "
  314 + f"facets: {'yes' if facets else 'no'} | "
  315 + f"query: {es_query_compact}",
316 316 extra={'reqid': context.reqid, 'uid': context.uid}
317 317 )
318 318 except Exception as e:
... ...
utils/logger.py
... ... @@ -38,10 +38,11 @@ class StructuredFormatter(logging.Formatter):
38 38 # Add request context if available
39 39 reqid = getattr(record, 'reqid', None)
40 40 uid = getattr(record, 'uid', None)
41   - if reqid or uid:
  41 + if reqid is not None or uid is not None:
  42 + # Normalize missing values to "-1" for easier correlation
42 43 log_entry['request_context'] = {
43   - 'reqid': reqid,
44   - 'uid': uid
  44 + 'reqid': reqid if reqid is not None else "-1",
  45 + 'uid': uid if uid is not None else "-1"
45 46 }
46 47  
47 48 # Add extra data if available
... ... @@ -98,13 +99,31 @@ class RequestContextFilter(logging.Filter):
98 99 from context.request_context import get_current_request_context
99 100 context = get_current_request_context()
100 101 if context:
101   - record.reqid = context.reqid
102   - record.uid = context.uid
  102 + # Ensure every request-scoped log record carries reqid/uid.
  103 + # If they are missing in the context, fall back to "-1".
  104 + record.reqid = getattr(context, "reqid", None) or "-1"
  105 + record.uid = getattr(context, "uid", None) or "-1"
103 106 except (ImportError, AttributeError):
104 107 pass
105 108 return True
106 109  
107 110  
  111 +class ContextAwareConsoleFormatter(logging.Formatter):
  112 + """
  113 + Console formatter that injects reqid/uid into the log line.
  114 +
  115 + For non-request logs (no context), reqid/uid will be "-1".
  116 + """
  117 +
  118 + def format(self, record: logging.LogRecord) -> str:
  119 + # Provide safe defaults so format string never fails
  120 + if not hasattr(record, "reqid"):
  121 + record.reqid = "-1"
  122 + if not hasattr(record, "uid"):
  123 + record.uid = "-1"
  124 + return super().format(record)
  125 +
  126 +
108 127 def setup_logging(
109 128 log_level: str = "INFO",
110 129 log_dir: str = "logs",
... ... @@ -137,8 +156,8 @@ def setup_logging(
137 156  
138 157 # Create formatters
139 158 structured_formatter = StructuredFormatter()
140   - console_formatter = logging.Formatter(
141   - '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s'
  159 + console_formatter = ContextAwareConsoleFormatter(
  160 + '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s'
142 161 )
143 162  
144 163 # Add console handler
... ...