Blame view

utils/logger.py 8.73 KB
16c42787   tangwang   feat: implement r...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  """
  Search Engine structured logging utilities
  
  Provides request-scoped logging with automatic context injection,
  structured JSON output, and daily log rotation.
  """
  
  import logging
  import logging.handlers
  import json
  import sys
  import os
  from datetime import datetime
  from typing import Any, Dict, Optional
  from pathlib import Path
  
  
  class StructuredFormatter(logging.Formatter):
      """Structured JSON formatter with request context support"""
  
      def __init__(self):
          super().__init__()
  
      def format(self, record: logging.LogRecord) -> str:
          """Format log record as structured JSON"""
  
          # Build base log entry
          log_entry = {
              'timestamp': datetime.fromtimestamp(record.created).isoformat(),
              'level': record.levelname,
              'logger': record.name,
              'message': record.getMessage(),
              'module': record.module,
              'function': record.funcName,
              'line': record.lineno
          }
  
          # Add request context if available
          reqid = getattr(record, 'reqid', None)
          uid = getattr(record, 'uid', None)
99bea633   tangwang   add logs
41
42
          if reqid is not None or uid is not None:
              # Normalize missing values to "-1" for easier correlation
16c42787   tangwang   feat: implement r...
43
              log_entry['request_context'] = {
99bea633   tangwang   add logs
44
45
                  'reqid': reqid if reqid is not None else "-1",
                  'uid': uid if uid is not None else "-1"
16c42787   tangwang   feat: implement r...
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
              }
  
          # Add extra data if available
          extra_data = getattr(record, 'extra_data', None)
          if extra_data:
              log_entry['data'] = extra_data
  
          # Add exception info if present
          if record.exc_info:
              log_entry['exception'] = self.formatException(record.exc_info)
  
          # Add stack trace if available
          if record.stack_info:
              log_entry['stack_trace'] = self.formatStack(record.stack_info)
  
          return json.dumps(log_entry, ensure_ascii=False, separators=(',', ':'))
  
  
  def _log_with_context(logger: logging.Logger, level: int, msg: str, **kwargs):
      """Helper function to log with context parameters"""
      # Filter out our custom parameters that shouldn't go to the record
      context_kwargs = {}
      for key in ['reqid', 'uid', 'extra_data']:
          if key in kwargs:
              context_kwargs[key] = kwargs.pop(key)
  
      # Add context parameters to the record
      if context_kwargs:
          old_factory = logging.getLogRecordFactory()
  
          def record_factory(*args, **factory_kwargs):
              record = old_factory(*args, **factory_kwargs)
              for key, value in context_kwargs.items():
                  setattr(record, key, value)
              return record
  
          logging.setLogRecordFactory(record_factory)
  
      try:
          logger.log(level, msg, **kwargs)
      finally:
          # Restore original factory
          if context_kwargs:
              logging.setLogRecordFactory(old_factory)
  
  
  class RequestContextFilter(logging.Filter):
      """Filter that automatically injects request context from thread-local storage"""
  
      def filter(self, record: logging.LogRecord) -> bool:
          """Inject request context from thread-local storage"""
          try:
              # Import here to avoid circular imports
              from context.request_context import get_current_request_context
              context = get_current_request_context()
              if context:
99bea633   tangwang   add logs
102
103
104
105
                  # Ensure every request-scoped log record carries reqid/uid.
                  # If they are missing in the context, fall back to "-1".
                  record.reqid = getattr(context, "reqid", None) or "-1"
                  record.uid = getattr(context, "uid", None) or "-1"
16c42787   tangwang   feat: implement r...
106
107
108
109
110
          except (ImportError, AttributeError):
              pass
          return True
  
  
99bea633   tangwang   add logs
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  class ContextAwareConsoleFormatter(logging.Formatter):
      """
      Console formatter that injects reqid/uid into the log line.
  
      For non-request logs (no context), reqid/uid will be "-1".
      """
  
      def format(self, record: logging.LogRecord) -> str:
          # Provide safe defaults so format string never fails
          if not hasattr(record, "reqid"):
              record.reqid = "-1"
          if not hasattr(record, "uid"):
              record.uid = "-1"
          return super().format(record)
  
  
16c42787   tangwang   feat: implement r...
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
  def setup_logging(
      log_level: str = "INFO",
      log_dir: str = "logs",
      enable_console: bool = True,
      enable_file: bool = True
  ) -> None:
      """
      Setup structured logging for the Search Engine application
  
      Args:
          log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
          log_dir: Directory for log files
          enable_console: Enable console output
          enable_file: Enable file output with daily rotation
      """
  
      # Convert string log level
      numeric_level = getattr(logging, log_level.upper(), logging.INFO)
  
      # Create log directory
      log_path = Path(log_dir)
      log_path.mkdir(parents=True, exist_ok=True)
  
      # Create root logger
      root_logger = logging.getLogger()
      root_logger.setLevel(numeric_level)
  
      # Clear existing handlers
      root_logger.handlers.clear()
  
      # Create formatters
      structured_formatter = StructuredFormatter()
99bea633   tangwang   add logs
159
160
      console_formatter = ContextAwareConsoleFormatter(
          '%(asctime)s | reqid:%(reqid)s | uid:%(uid)s | %(levelname)-8s | %(name)-15s | %(message)s'
16c42787   tangwang   feat: implement r...
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
      )
  
      # Add console handler
      if enable_console:
          console_handler = logging.StreamHandler(sys.stdout)
          console_handler.setLevel(numeric_level)
          console_handler.setFormatter(console_formatter)
          console_handler.addFilter(RequestContextFilter())
          root_logger.addHandler(console_handler)
  
      # Add file handler with daily rotation
      if enable_file:
          # Daily rotating file handler
          file_handler = logging.handlers.TimedRotatingFileHandler(
              filename=log_path / "search_engine.log",
              when='midnight',
              interval=1,
              backupCount=30,  # Keep 30 days of logs
              encoding='utf-8'
          )
          file_handler.setLevel(numeric_level)
          file_handler.setFormatter(structured_formatter)
          file_handler.addFilter(RequestContextFilter())
          root_logger.addHandler(file_handler)
  
          # Separate error log file
          error_handler = logging.handlers.TimedRotatingFileHandler(
              filename=log_path / "errors.log",
              when='midnight',
              interval=1,
              backupCount=30,
              encoding='utf-8'
          )
          error_handler.setLevel(logging.ERROR)
          error_handler.setFormatter(structured_formatter)
          error_handler.addFilter(RequestContextFilter())
          root_logger.addHandler(error_handler)
  
      # Configure specific loggers
      configure_specific_loggers()
  
  
  def configure_specific_loggers():
      """Configure logging levels for specific components"""
  
      # Set specific logger levels
      loggers_config = {
          'urllib3.connectionpool': logging.WARNING,  # Suppress HTTP connection logs
          'elasticsearch': logging.WARNING,  # Suppress ES client debug logs
          'requests.packages.urllib3': logging.WARNING,
          'transformers': logging.WARNING,  # Suppress transformer model logs
          'tokenizers': logging.WARNING,
      }
  
      for logger_name, level in loggers_config.items():
          logging.getLogger(logger_name).setLevel(level)
  
  
  def get_logger(name: str) -> logging.Logger:
      """
      Get a structured logger with request context support
  
      Args:
          name: Logger name (usually __name__)
  
      Returns:
          Configured logger instance
      """
      return logging.getLogger(name)
  
  
  # Convenience functions for different log levels
  def log_debug(message: str, **kwargs) -> None:
      """Log debug message with optional context data"""
      logger = logging.getLogger()
      logger.debug(message, extra=kwargs)
  
  
  def log_info(message: str, **kwargs) -> None:
      """Log info message with optional context data"""
      logger = logging.getLogger()
      logger.info(message, extra=kwargs)
  
  
  def log_warning(message: str, **kwargs) -> None:
      """Log warning message with optional context data"""
      logger = logging.getLogger()
      logger.warning(message, extra=kwargs)
  
  
  def log_error(message: str, **kwargs) -> None:
      """Log error message with optional context data"""
      logger = logging.getLogger()
      logger.error(message, extra=kwargs)
  
  
  def log_critical(message: str, **kwargs) -> None:
      """Log critical message with optional context data"""
      logger = logging.getLogger()
      logger.critical(message, extra=kwargs)
  
  
  # Initialize logging on module import
  def _init_logging():
      """Initialize logging with default configuration"""
      if not logging.getLogger().handlers:
          setup_logging(
              log_level=os.getenv('LOG_LEVEL', 'INFO'),
              log_dir=os.getenv('LOG_DIR', 'logs'),
              enable_console=True,
              enable_file=True
          )
  
  
  # Auto-initialize when module is imported
  _init_logging()