""" Search Engine structured logging utilities Provides request-scoped logging with automatic context injection, structured JSON output, and daily log rotation. """ import logging import logging.handlers import json import sys import os from datetime import datetime from typing import Any, Dict, Optional from pathlib import Path class StructuredFormatter(logging.Formatter): """Structured JSON formatter with request context support""" def __init__(self): super().__init__() def format(self, record: logging.LogRecord) -> str: """Format log record as structured JSON""" # Build base log entry log_entry = { 'timestamp': datetime.fromtimestamp(record.created).isoformat(), 'level': record.levelname, 'logger': record.name, 'message': record.getMessage(), 'module': record.module, 'function': record.funcName, 'line': record.lineno } # Add request context if available reqid = getattr(record, 'reqid', None) uid = getattr(record, 'uid', None) if reqid or uid: log_entry['request_context'] = { 'reqid': reqid, 'uid': uid } # Add extra data if available extra_data = getattr(record, 'extra_data', None) if extra_data: log_entry['data'] = extra_data # Add exception info if present if record.exc_info: log_entry['exception'] = self.formatException(record.exc_info) # Add stack trace if available if record.stack_info: log_entry['stack_trace'] = self.formatStack(record.stack_info) return json.dumps(log_entry, ensure_ascii=False, separators=(',', ':')) def _log_with_context(logger: logging.Logger, level: int, msg: str, **kwargs): """Helper function to log with context parameters""" # Filter out our custom parameters that shouldn't go to the record context_kwargs = {} for key in ['reqid', 'uid', 'extra_data']: if key in kwargs: context_kwargs[key] = kwargs.pop(key) # Add context parameters to the record if context_kwargs: old_factory = logging.getLogRecordFactory() def record_factory(*args, **factory_kwargs): record = old_factory(*args, **factory_kwargs) for key, value in context_kwargs.items(): setattr(record, key, value) return record logging.setLogRecordFactory(record_factory) try: logger.log(level, msg, **kwargs) finally: # Restore original factory if context_kwargs: logging.setLogRecordFactory(old_factory) class RequestContextFilter(logging.Filter): """Filter that automatically injects request context from thread-local storage""" def filter(self, record: logging.LogRecord) -> bool: """Inject request context from thread-local storage""" try: # Import here to avoid circular imports from context.request_context import get_current_request_context context = get_current_request_context() if context: record.reqid = context.reqid record.uid = context.uid except (ImportError, AttributeError): pass return True def setup_logging( log_level: str = "INFO", log_dir: str = "logs", enable_console: bool = True, enable_file: bool = True ) -> None: """ Setup structured logging for the Search Engine application Args: log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) log_dir: Directory for log files enable_console: Enable console output enable_file: Enable file output with daily rotation """ # Convert string log level numeric_level = getattr(logging, log_level.upper(), logging.INFO) # Create log directory log_path = Path(log_dir) log_path.mkdir(parents=True, exist_ok=True) # Create root logger root_logger = logging.getLogger() root_logger.setLevel(numeric_level) # Clear existing handlers root_logger.handlers.clear() # Create formatters structured_formatter = StructuredFormatter() console_formatter = logging.Formatter( '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s' ) # Add console handler if enable_console: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(numeric_level) console_handler.setFormatter(console_formatter) console_handler.addFilter(RequestContextFilter()) root_logger.addHandler(console_handler) # Add file handler with daily rotation if enable_file: # Daily rotating file handler file_handler = logging.handlers.TimedRotatingFileHandler( filename=log_path / "search_engine.log", when='midnight', interval=1, backupCount=30, # Keep 30 days of logs encoding='utf-8' ) file_handler.setLevel(numeric_level) file_handler.setFormatter(structured_formatter) file_handler.addFilter(RequestContextFilter()) root_logger.addHandler(file_handler) # Separate error log file error_handler = logging.handlers.TimedRotatingFileHandler( filename=log_path / "errors.log", when='midnight', interval=1, backupCount=30, encoding='utf-8' ) error_handler.setLevel(logging.ERROR) error_handler.setFormatter(structured_formatter) error_handler.addFilter(RequestContextFilter()) root_logger.addHandler(error_handler) # Configure specific loggers configure_specific_loggers() def configure_specific_loggers(): """Configure logging levels for specific components""" # Set specific logger levels loggers_config = { 'urllib3.connectionpool': logging.WARNING, # Suppress HTTP connection logs 'elasticsearch': logging.WARNING, # Suppress ES client debug logs 'requests.packages.urllib3': logging.WARNING, 'transformers': logging.WARNING, # Suppress transformer model logs 'tokenizers': logging.WARNING, } for logger_name, level in loggers_config.items(): logging.getLogger(logger_name).setLevel(level) def get_logger(name: str) -> logging.Logger: """ Get a structured logger with request context support Args: name: Logger name (usually __name__) Returns: Configured logger instance """ return logging.getLogger(name) # Convenience functions for different log levels def log_debug(message: str, **kwargs) -> None: """Log debug message with optional context data""" logger = logging.getLogger() logger.debug(message, extra=kwargs) def log_info(message: str, **kwargs) -> None: """Log info message with optional context data""" logger = logging.getLogger() logger.info(message, extra=kwargs) def log_warning(message: str, **kwargs) -> None: """Log warning message with optional context data""" logger = logging.getLogger() logger.warning(message, extra=kwargs) def log_error(message: str, **kwargs) -> None: """Log error message with optional context data""" logger = logging.getLogger() logger.error(message, extra=kwargs) def log_critical(message: str, **kwargs) -> None: """Log critical message with optional context data""" logger = logging.getLogger() logger.critical(message, extra=kwargs) # Initialize logging on module import def _init_logging(): """Initialize logging with default configuration""" if not logging.getLogger().handlers: setup_logging( log_level=os.getenv('LOG_LEVEL', 'INFO'), log_dir=os.getenv('LOG_DIR', 'logs'), enable_console=True, enable_file=True ) # Auto-initialize when module is imported _init_logging()