logger.py 7.85 KB
"""
Search Engine structured logging utilities

Provides request-scoped logging with automatic context injection,
structured JSON output, and daily log rotation.
"""

import logging
import logging.handlers
import json
import sys
import os
from datetime import datetime
from typing import Any, Dict, Optional
from pathlib import Path


class StructuredFormatter(logging.Formatter):
    """Structured JSON formatter with request context support"""

    def __init__(self):
        super().__init__()

    def format(self, record: logging.LogRecord) -> str:
        """Format log record as structured JSON"""

        # Build base log entry
        log_entry = {
            'timestamp': datetime.fromtimestamp(record.created).isoformat(),
            'level': record.levelname,
            'logger': record.name,
            'message': record.getMessage(),
            'module': record.module,
            'function': record.funcName,
            'line': record.lineno
        }

        # Add request context if available
        reqid = getattr(record, 'reqid', None)
        uid = getattr(record, 'uid', None)
        if reqid or uid:
            log_entry['request_context'] = {
                'reqid': reqid,
                'uid': uid
            }

        # Add extra data if available
        extra_data = getattr(record, 'extra_data', None)
        if extra_data:
            log_entry['data'] = extra_data

        # Add exception info if present
        if record.exc_info:
            log_entry['exception'] = self.formatException(record.exc_info)

        # Add stack trace if available
        if record.stack_info:
            log_entry['stack_trace'] = self.formatStack(record.stack_info)

        return json.dumps(log_entry, ensure_ascii=False, separators=(',', ':'))


def _log_with_context(logger: logging.Logger, level: int, msg: str, **kwargs):
    """Helper function to log with context parameters"""
    # Filter out our custom parameters that shouldn't go to the record
    context_kwargs = {}
    for key in ['reqid', 'uid', 'extra_data']:
        if key in kwargs:
            context_kwargs[key] = kwargs.pop(key)

    # Add context parameters to the record
    if context_kwargs:
        old_factory = logging.getLogRecordFactory()

        def record_factory(*args, **factory_kwargs):
            record = old_factory(*args, **factory_kwargs)
            for key, value in context_kwargs.items():
                setattr(record, key, value)
            return record

        logging.setLogRecordFactory(record_factory)

    try:
        logger.log(level, msg, **kwargs)
    finally:
        # Restore original factory
        if context_kwargs:
            logging.setLogRecordFactory(old_factory)


class RequestContextFilter(logging.Filter):
    """Filter that automatically injects request context from thread-local storage"""

    def filter(self, record: logging.LogRecord) -> bool:
        """Inject request context from thread-local storage"""
        try:
            # Import here to avoid circular imports
            from context.request_context import get_current_request_context
            context = get_current_request_context()
            if context:
                record.reqid = context.reqid
                record.uid = context.uid
        except (ImportError, AttributeError):
            pass
        return True


def setup_logging(
    log_level: str = "INFO",
    log_dir: str = "logs",
    enable_console: bool = True,
    enable_file: bool = True
) -> None:
    """
    Setup structured logging for the Search Engine application

    Args:
        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
        log_dir: Directory for log files
        enable_console: Enable console output
        enable_file: Enable file output with daily rotation
    """

    # Convert string log level
    numeric_level = getattr(logging, log_level.upper(), logging.INFO)

    # Create log directory
    log_path = Path(log_dir)
    log_path.mkdir(parents=True, exist_ok=True)

    # Create root logger
    root_logger = logging.getLogger()
    root_logger.setLevel(numeric_level)

    # Clear existing handlers
    root_logger.handlers.clear()

    # Create formatters
    structured_formatter = StructuredFormatter()
    console_formatter = logging.Formatter(
        '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s'
    )

    # Add console handler
    if enable_console:
        console_handler = logging.StreamHandler(sys.stdout)
        console_handler.setLevel(numeric_level)
        console_handler.setFormatter(console_formatter)
        console_handler.addFilter(RequestContextFilter())
        root_logger.addHandler(console_handler)

    # Add file handler with daily rotation
    if enable_file:
        # Daily rotating file handler
        file_handler = logging.handlers.TimedRotatingFileHandler(
            filename=log_path / "search_engine.log",
            when='midnight',
            interval=1,
            backupCount=30,  # Keep 30 days of logs
            encoding='utf-8'
        )
        file_handler.setLevel(numeric_level)
        file_handler.setFormatter(structured_formatter)
        file_handler.addFilter(RequestContextFilter())
        root_logger.addHandler(file_handler)

        # Separate error log file
        error_handler = logging.handlers.TimedRotatingFileHandler(
            filename=log_path / "errors.log",
            when='midnight',
            interval=1,
            backupCount=30,
            encoding='utf-8'
        )
        error_handler.setLevel(logging.ERROR)
        error_handler.setFormatter(structured_formatter)
        error_handler.addFilter(RequestContextFilter())
        root_logger.addHandler(error_handler)

    # Configure specific loggers
    configure_specific_loggers()


def configure_specific_loggers():
    """Configure logging levels for specific components"""

    # Set specific logger levels
    loggers_config = {
        'urllib3.connectionpool': logging.WARNING,  # Suppress HTTP connection logs
        'elasticsearch': logging.WARNING,  # Suppress ES client debug logs
        'requests.packages.urllib3': logging.WARNING,
        'transformers': logging.WARNING,  # Suppress transformer model logs
        'tokenizers': logging.WARNING,
    }

    for logger_name, level in loggers_config.items():
        logging.getLogger(logger_name).setLevel(level)


def get_logger(name: str) -> logging.Logger:
    """
    Get a structured logger with request context support

    Args:
        name: Logger name (usually __name__)

    Returns:
        Configured logger instance
    """
    return logging.getLogger(name)


# Convenience functions for different log levels
def log_debug(message: str, **kwargs) -> None:
    """Log debug message with optional context data"""
    logger = logging.getLogger()
    logger.debug(message, extra=kwargs)


def log_info(message: str, **kwargs) -> None:
    """Log info message with optional context data"""
    logger = logging.getLogger()
    logger.info(message, extra=kwargs)


def log_warning(message: str, **kwargs) -> None:
    """Log warning message with optional context data"""
    logger = logging.getLogger()
    logger.warning(message, extra=kwargs)


def log_error(message: str, **kwargs) -> None:
    """Log error message with optional context data"""
    logger = logging.getLogger()
    logger.error(message, extra=kwargs)


def log_critical(message: str, **kwargs) -> None:
    """Log critical message with optional context data"""
    logger = logging.getLogger()
    logger.critical(message, extra=kwargs)


# Initialize logging on module import
def _init_logging():
    """Initialize logging with default configuration"""
    if not logging.getLogger().handlers:
        setup_logging(
            log_level=os.getenv('LOG_LEVEL', 'INFO'),
            log_dir=os.getenv('LOG_DIR', 'logs'),
            enable_console=True,
            enable_file=True
        )


# Auto-initialize when module is imported
_init_logging()