Blame view

utils/logger.py 7.89 KB
16c42787   tangwang   feat: implement r...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  """
  Search Engine structured logging utilities
  
  Provides request-scoped logging with automatic context injection,
  structured JSON output, and daily log rotation.
  """
  
  import logging
  import logging.handlers
  import json
  import sys
  import os
  from datetime import datetime
  from typing import Any, Dict, Optional
  from pathlib import Path
  
4650fcec   tangwang   日志优化、日志串联(uid rqid)
17
18
  from request_log_context import LOG_LINE_FORMAT, RequestLogContextFilter
  
16c42787   tangwang   feat: implement r...
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
  
  class StructuredFormatter(logging.Formatter):
      """Structured JSON formatter with request context support"""
  
      def __init__(self):
          super().__init__()
  
      def format(self, record: logging.LogRecord) -> str:
          """Format log record as structured JSON"""
  
          # Build base log entry
          log_entry = {
              'timestamp': datetime.fromtimestamp(record.created).isoformat(),
              'level': record.levelname,
              'logger': record.name,
              'message': record.getMessage(),
              'module': record.module,
              'function': record.funcName,
              'line': record.lineno
          }
  
          # Add request context if available
          reqid = getattr(record, 'reqid', None)
          uid = getattr(record, 'uid', None)
99bea633   tangwang   add logs
43
44
          if reqid is not None or uid is not None:
              # Normalize missing values to "-1" for easier correlation
16c42787   tangwang   feat: implement r...
45
              log_entry['request_context'] = {
99bea633   tangwang   add logs
46
47
                  'reqid': reqid if reqid is not None else "-1",
                  'uid': uid if uid is not None else "-1"
16c42787   tangwang   feat: implement r...
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
              }
  
          # Add extra data if available
          extra_data = getattr(record, 'extra_data', None)
          if extra_data:
              log_entry['data'] = extra_data
  
          # Add exception info if present
          if record.exc_info:
              log_entry['exception'] = self.formatException(record.exc_info)
  
          # Add stack trace if available
          if record.stack_info:
              log_entry['stack_trace'] = self.formatStack(record.stack_info)
  
          return json.dumps(log_entry, ensure_ascii=False, separators=(',', ':'))
  
  
  def _log_with_context(logger: logging.Logger, level: int, msg: str, **kwargs):
      """Helper function to log with context parameters"""
      # Filter out our custom parameters that shouldn't go to the record
      context_kwargs = {}
      for key in ['reqid', 'uid', 'extra_data']:
          if key in kwargs:
              context_kwargs[key] = kwargs.pop(key)
  
      # Add context parameters to the record
      if context_kwargs:
          old_factory = logging.getLogRecordFactory()
  
          def record_factory(*args, **factory_kwargs):
              record = old_factory(*args, **factory_kwargs)
              for key, value in context_kwargs.items():
                  setattr(record, key, value)
              return record
  
          logging.setLogRecordFactory(record_factory)
  
      try:
          logger.log(level, msg, **kwargs)
      finally:
          # Restore original factory
          if context_kwargs:
              logging.setLogRecordFactory(old_factory)
  
  
99bea633   tangwang   add logs
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  class ContextAwareConsoleFormatter(logging.Formatter):
      """
      Console formatter that injects reqid/uid into the log line.
  
      For non-request logs (no context), reqid/uid will be "-1".
      """
  
      def format(self, record: logging.LogRecord) -> str:
          # Provide safe defaults so format string never fails
          if not hasattr(record, "reqid"):
              record.reqid = "-1"
          if not hasattr(record, "uid"):
              record.uid = "-1"
          return super().format(record)
  
  
16c42787   tangwang   feat: implement r...
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
  def setup_logging(
      log_level: str = "INFO",
      log_dir: str = "logs",
      enable_console: bool = True,
      enable_file: bool = True
  ) -> None:
      """
      Setup structured logging for the Search Engine application
  
      Args:
          log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
          log_dir: Directory for log files
          enable_console: Enable console output
          enable_file: Enable file output with daily rotation
      """
  
      # Convert string log level
      numeric_level = getattr(logging, log_level.upper(), logging.INFO)
  
      # Create log directory
      log_path = Path(log_dir)
      log_path.mkdir(parents=True, exist_ok=True)
  
      # Create root logger
      root_logger = logging.getLogger()
      root_logger.setLevel(numeric_level)
  
      # Clear existing handlers
      root_logger.handlers.clear()
  
      # Create formatters
      structured_formatter = StructuredFormatter()
4650fcec   tangwang   日志优化、日志串联(uid rqid)
142
      console_formatter = ContextAwareConsoleFormatter(LOG_LINE_FORMAT)
16c42787   tangwang   feat: implement r...
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
  
      # Add console handler
      if enable_console:
          console_handler = logging.StreamHandler(sys.stdout)
          console_handler.setLevel(numeric_level)
          console_handler.setFormatter(console_formatter)
          console_handler.addFilter(RequestContextFilter())
          root_logger.addHandler(console_handler)
  
      # Add file handler with daily rotation
      if enable_file:
          # Daily rotating file handler
          file_handler = logging.handlers.TimedRotatingFileHandler(
              filename=log_path / "search_engine.log",
              when='midnight',
              interval=1,
              backupCount=30,  # Keep 30 days of logs
              encoding='utf-8'
          )
          file_handler.setLevel(numeric_level)
          file_handler.setFormatter(structured_formatter)
          file_handler.addFilter(RequestContextFilter())
          root_logger.addHandler(file_handler)
  
          # Separate error log file
          error_handler = logging.handlers.TimedRotatingFileHandler(
              filename=log_path / "errors.log",
              when='midnight',
              interval=1,
              backupCount=30,
              encoding='utf-8'
          )
          error_handler.setLevel(logging.ERROR)
          error_handler.setFormatter(structured_formatter)
          error_handler.addFilter(RequestContextFilter())
          root_logger.addHandler(error_handler)
  
      # Configure specific loggers
      configure_specific_loggers()
  
  
  def configure_specific_loggers():
      """Configure logging levels for specific components"""
  
      # Set specific logger levels
      loggers_config = {
          'urllib3.connectionpool': logging.WARNING,  # Suppress HTTP connection logs
          'elasticsearch': logging.WARNING,  # Suppress ES client debug logs
          'requests.packages.urllib3': logging.WARNING,
          'transformers': logging.WARNING,  # Suppress transformer model logs
          'tokenizers': logging.WARNING,
      }
  
      for logger_name, level in loggers_config.items():
          logging.getLogger(logger_name).setLevel(level)
  
  
  def get_logger(name: str) -> logging.Logger:
      """
      Get a structured logger with request context support
  
      Args:
          name: Logger name (usually __name__)
  
      Returns:
          Configured logger instance
      """
      return logging.getLogger(name)
  
  
  # Convenience functions for different log levels
  def log_debug(message: str, **kwargs) -> None:
      """Log debug message with optional context data"""
      logger = logging.getLogger()
      logger.debug(message, extra=kwargs)
  
  
  def log_info(message: str, **kwargs) -> None:
      """Log info message with optional context data"""
      logger = logging.getLogger()
      logger.info(message, extra=kwargs)
  
  
  def log_warning(message: str, **kwargs) -> None:
      """Log warning message with optional context data"""
      logger = logging.getLogger()
      logger.warning(message, extra=kwargs)
  
  
  def log_error(message: str, **kwargs) -> None:
      """Log error message with optional context data"""
      logger = logging.getLogger()
      logger.error(message, extra=kwargs)
  
  
  def log_critical(message: str, **kwargs) -> None:
      """Log critical message with optional context data"""
      logger = logging.getLogger()
      logger.critical(message, extra=kwargs)
  
  
  # Initialize logging on module import
  def _init_logging():
      """Initialize logging with default configuration"""
      if not logging.getLogger().handlers:
          setup_logging(
              log_level=os.getenv('LOG_LEVEL', 'INFO'),
              log_dir=os.getenv('LOG_DIR', 'logs'),
              enable_console=True,
              enable_file=True
          )
  
  
  # Auto-initialize when module is imported
  _init_logging()