app.py 10.1 KB
"""
Main FastAPI application for the search service.

Usage:
    uvicorn api.app:app --host 0.0.0.0 --port 6002 --reload
"""

import os
import sys
import logging
import time
from collections import defaultdict, deque
from typing import Optional
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.trustedhost import TrustedHostMiddleware
from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.util import get_remote_address
from slowapi.errors import RateLimitExceeded
import argparse

# Configure logging with better formatting
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('/tmp/search_engine_api.log', mode='a')
    ]
)
logger = logging.getLogger(__name__)

# Initialize rate limiter
limiter = Limiter(key_func=get_remote_address)

# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from config import ConfigLoader, CustomerConfig
from utils import ESClient
from search import Searcher
from query import QueryParser

# Global instances
_config: Optional[CustomerConfig] = None
_es_client: Optional[ESClient] = None
_searcher: Optional[Searcher] = None
_query_parser: Optional[QueryParser] = None


def init_service(es_host: str = "http://localhost:9200"):
    """
    Initialize search service with unified configuration.

    Args:
        es_host: Elasticsearch host URL
    """
    global _config, _es_client, _searcher, _query_parser

    print("Initializing search service (multi-tenant)")

    # Load unified configuration
    config_loader = ConfigLoader("config/config.yaml")
    _config = config_loader.load_config()

    # Validate configuration
    errors = config_loader.validate_config(_config)
    if errors:
        raise ValueError(f"Configuration validation failed: {errors}")

    print(f"Configuration loaded: {_config.es_index_name}")

    # Get ES credentials from environment variables or .env file
    es_username = os.getenv('ES_USERNAME')
    es_password = os.getenv('ES_PASSWORD')
    
    # Try to load from config if not in env
    if not es_username or not es_password:
        try:
            from config.env_config import get_es_config
            es_config = get_es_config()
            es_username = es_username or es_config.get('username')
            es_password = es_password or es_config.get('password')
        except Exception:
            pass

    # Initialize ES client with authentication if credentials are available
    if es_username and es_password:
        print(f"Connecting to Elasticsearch with authentication: {es_username}")
        _es_client = ESClient(hosts=[es_host], username=es_username, password=es_password)
    else:
        print(f"Connecting to Elasticsearch without authentication")
        _es_client = ESClient(hosts=[es_host])
    
    if not _es_client.ping():
        raise ConnectionError(f"Failed to connect to Elasticsearch at {es_host}")

    print(f"Connected to Elasticsearch: {es_host}")

    # Initialize query parser
    _query_parser = QueryParser(_config)
    print("Query parser initialized")

    # Initialize searcher
    _searcher = Searcher(_config, _es_client, _query_parser)
    print("Searcher initialized")

    print("Search service ready!")


def get_config() -> CustomerConfig:
    """Get search engine configuration."""
    if _config is None:
        raise RuntimeError("Service not initialized")
    return _config


def get_es_client() -> ESClient:
    """Get Elasticsearch client."""
    if _es_client is None:
        raise RuntimeError("Service not initialized")
    return _es_client


def get_searcher() -> Searcher:
    """Get searcher instance."""
    if _searcher is None:
        raise RuntimeError("Service not initialized")
    return _searcher


def get_query_parser() -> QueryParser:
    """Get query parser instance."""
    if _query_parser is None:
        raise RuntimeError("Service not initialized")
    return _query_parser


# Create FastAPI app with enhanced configuration
app = FastAPI(
    title="E-Commerce Search API",
    description="Configurable search engine for cross-border e-commerce",
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc",
    openapi_url="/openapi.json"
)

# Add rate limiting middleware
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

# Add trusted host middleware (restrict to localhost and trusted domains)
app.add_middleware(
    TrustedHostMiddleware,
    allowed_hosts=["*"]  # Allow all hosts for development, restrict in production
)

# Add security headers middleware
@app.middleware("http")
async def add_security_headers(request: Request, call_next):
    response = await call_next(request)
    response.headers["X-Content-Type-Options"] = "nosniff"
    response.headers["X-Frame-Options"] = "DENY"
    response.headers["X-XSS-Protection"] = "1; mode=block"
    response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
    return response

# Add CORS middleware with more restrictive settings
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Restrict in production to specific domains
    allow_credentials=True,
    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
    allow_headers=["*"],
    expose_headers=["X-Total-Count"]
)


@app.on_event("startup")
async def startup_event():
    """Initialize service on startup."""
    es_host = os.getenv("ES_HOST", "http://localhost:9200")

    logger.info("Starting E-Commerce Search API (Multi-Tenant)")
    logger.info(f"Elasticsearch Host: {es_host}")

    try:
        init_service(es_host=es_host)
        logger.info("Service initialized successfully")
    except Exception as e:
        logger.error(f"Failed to initialize service: {e}")
        logger.warning("Service will start but may not function correctly")


@app.on_event("shutdown")
async def shutdown_event():
    """Cleanup on shutdown."""
    logger.info("Shutting down E-Commerce Search API")


@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    """Global exception handler with detailed logging."""
    client_ip = request.client.host if request.client else "unknown"
    logger.error(f"Unhandled exception from {client_ip}: {exc}", exc_info=True)

    return JSONResponse(
        status_code=500,
        content={
            "error": "Internal server error",
            "detail": "An unexpected error occurred. Please try again later.",
            "timestamp": int(time.time())
        }
    )


@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
    """HTTP exception handler."""
    logger.warning(f"HTTP exception from {request.client.host if request.client else 'unknown'}: {exc.status_code} - {exc.detail}")

    return JSONResponse(
        status_code=exc.status_code,
        content={
            "error": exc.detail,
            "status_code": exc.status_code,
            "timestamp": int(time.time())
        }
    )


@app.get("/api")
@limiter.limit("60/minute")
async def root(request: Request):
    """Root endpoint with rate limiting."""
    client_ip = request.client.host if request.client else "unknown"
    logger.info(f"Root endpoint accessed from {client_ip}")

    return {
        "service": "E-Commerce Search API",
        "version": "1.0.0",
        "status": "running",
        "timestamp": int(time.time())
    }


@app.get("/health")
@limiter.limit("120/minute")
async def health_check(request: Request):
    """Health check endpoint."""
    try:
        # Check if services are initialized
        get_config()
        get_es_client()

        return {
            "status": "healthy",
            "services": {
                "config": "initialized",
                "elasticsearch": "connected",
                "searcher": "initialized"
            },
            "timestamp": int(time.time())
        }
    except Exception as e:
        logger.error(f"Health check failed: {e}")
        return JSONResponse(
            status_code=503,
            content={
                "status": "unhealthy",
                "error": str(e),
                "timestamp": int(time.time())
            }
        )


# Include routers
from .routes import search, admin

app.include_router(search.router)
app.include_router(admin.router)

# Mount static files and serve frontend
frontend_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "frontend")
if os.path.exists(frontend_path):
    # Serve frontend HTML at root
    @app.get("/")
    async def serve_frontend():
        """Serve the frontend HTML."""
        index_path = os.path.join(frontend_path, "index.html")
        if os.path.exists(index_path):
            return FileResponse(index_path)
        return {"service": "E-Commerce Search API", "version": "1.0.0", "status": "running"}
    
    # Mount static files (CSS, JS, images)
    app.mount("/static", StaticFiles(directory=os.path.join(frontend_path, "static")), name="static")
    
    logger.info(f"Frontend static files mounted from: {frontend_path}")
else:
    logger.warning(f"Frontend directory not found: {frontend_path}")


if __name__ == "__main__":
    import uvicorn

    parser = argparse.ArgumentParser(description='Start search API service (multi-tenant)')
    parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
    parser.add_argument('--port', type=int, default=6002, help='Port to bind to')
    parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host')
    parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
    args = parser.parse_args()

    # Set environment variable
    os.environ['ES_HOST'] = args.es_host

    # Run server
    uvicorn.run(
        "api.app:app",
        host=args.host,
        port=args.port,
        reload=args.reload
    )