""" Main FastAPI application for the search service. Usage: uvicorn api.app:app --host 0.0.0.0 --port 6002 --reload """ import os import sys import logging import time import argparse import uvicorn from collections import defaultdict, deque from typing import Optional from fastapi import FastAPI, Request, HTTPException from fastapi.responses import JSONResponse, FileResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.trustedhost import TrustedHostMiddleware from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi.util import get_remote_address from slowapi.errors import RateLimitExceeded # Configure logging with better formatting import pathlib log_dir = pathlib.Path('logs') log_dir.mkdir(exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(), logging.FileHandler(log_dir / 'api.log', mode='a', encoding='utf-8') ] ) logger = logging.getLogger(__name__) # Initialize rate limiter limiter = Limiter(key_func=get_remote_address) # Add parent directory to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config.env_config import ES_CONFIG, DB_CONFIG from config import ConfigLoader from utils import ESClient from utils.db_connector import create_db_connection from search import Searcher from query import QueryParser from indexer.incremental_service import IncrementalIndexerService # Global instances _es_client: Optional[ESClient] = None _searcher: Optional[Searcher] = None _query_parser: Optional[QueryParser] = None _config = None _incremental_service: Optional[IncrementalIndexerService] = None _bulk_indexing_service = None def init_service(es_host: str = "http://localhost:9200"): """ Initialize search service with unified configuration. Args: es_host: Elasticsearch host URL """ global _es_client, _searcher, _query_parser, _config, _incremental_service, _bulk_indexing_service start_time = time.time() logger.info("Initializing search service (multi-tenant)") # Load configuration logger.info("Loading configuration...") config_loader = ConfigLoader("config/config.yaml") _config = config_loader.load_config() logger.info("Configuration loaded") # Get ES credentials es_username = os.getenv('ES_USERNAME') or ES_CONFIG.get('username') es_password = os.getenv('ES_PASSWORD') or ES_CONFIG.get('password') # Connect to Elasticsearch logger.info(f"Connecting to Elasticsearch at {es_host}...") if es_username and es_password: _es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) else: _es_client = ESClient(hosts=[es_host]) if not _es_client.ping(): raise ConnectionError(f"Failed to connect to Elasticsearch at {es_host}") logger.info("Elasticsearch connected") # Initialize components logger.info("Initializing query parser...") _query_parser = QueryParser(_config) logger.info("Initializing searcher...") _searcher = Searcher(_es_client, _config, _query_parser) # Initialize indexing services (if DB config is available) try: from utils.db_connector import create_db_connection from indexer.incremental_service import IncrementalIndexerService from indexer.bulk_indexing_service import BulkIndexingService db_host = os.getenv('DB_HOST') db_port = int(os.getenv('DB_PORT', 3306)) db_database = os.getenv('DB_DATABASE') db_username = os.getenv('DB_USERNAME') db_password = os.getenv('DB_PASSWORD') if all([db_host, db_database, db_username, db_password]): logger.info("Initializing database connection for indexing services...") db_engine = create_db_connection( host=db_host, port=db_port, database=db_database, username=db_username, password=db_password ) # Initialize incremental service _incremental_service = IncrementalIndexerService(db_engine) logger.info("Incremental indexer service initialized") # Initialize bulk indexing service _bulk_indexing_service = BulkIndexingService(db_engine, _es_client) logger.info("Bulk indexing service initialized") else: logger.warning("Database config incomplete, indexing services will not be available") logger.warning("Required: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD") except Exception as e: logger.warning(f"Failed to initialize indexing services: {e}") logger.warning("Indexing endpoints will not be available") _incremental_service = None _bulk_indexing_service = None elapsed = time.time() - start_time logger.info(f"Search service ready! (took {elapsed:.2f}s) | Index: {_config.es_index_name}") def get_es_client() -> ESClient: """Get Elasticsearch client.""" if _es_client is None: raise RuntimeError("Service not initialized") return _es_client def get_searcher() -> Searcher: """Get searcher instance.""" if _searcher is None: raise RuntimeError("Service not initialized") return _searcher def get_query_parser() -> QueryParser: """Get query parser instance.""" if _query_parser is None: raise RuntimeError("Service not initialized") return _query_parser def get_config(): """Get global config instance.""" if _config is None: raise RuntimeError("Service not initialized") return _config def get_incremental_service() -> Optional[IncrementalIndexerService]: """Get incremental indexer service instance.""" return _incremental_service def get_bulk_indexing_service(): """Get bulk indexing service instance.""" return _bulk_indexing_service # Create FastAPI app with enhanced configuration app = FastAPI( title="E-Commerce Search API", description="Configurable search engine for cross-border e-commerce", version="1.0.0", docs_url="/docs", redoc_url="/redoc", openapi_url="/openapi.json" ) # Add rate limiting middleware app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # Add trusted host middleware (restrict to localhost and trusted domains) app.add_middleware( TrustedHostMiddleware, allowed_hosts=["*"] # Allow all hosts for development, restrict in production ) # Add security headers middleware @app.middleware("http") async def add_security_headers(request: Request, call_next): response = await call_next(request) response.headers["X-Content-Type-Options"] = "nosniff" response.headers["X-Frame-Options"] = "DENY" response.headers["X-XSS-Protection"] = "1; mode=block" response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" return response # Add CORS middleware with more restrictive settings app.add_middleware( CORSMiddleware, allow_origins=["*"], # Restrict in production to specific domains allow_credentials=True, allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], allow_headers=["*"], expose_headers=["X-Total-Count"] ) @app.on_event("startup") async def startup_event(): """Initialize service on startup.""" es_host = os.getenv("ES_HOST", "http://localhost:9200") logger.info("Starting E-Commerce Search API (Multi-Tenant)") logger.info(f"Elasticsearch Host: {es_host}") try: init_service(es_host=es_host) logger.info("Service initialized successfully") except Exception as e: logger.error(f"Failed to initialize service: {e}", exc_info=True) logger.warning("Service will start but may not function correctly") @app.on_event("shutdown") async def shutdown_event(): """Cleanup on shutdown.""" logger.info("Shutting down E-Commerce Search API") @app.exception_handler(Exception) async def global_exception_handler(request: Request, exc: Exception): """Global exception handler with detailed logging.""" client_ip = request.client.host if request.client else "unknown" logger.error(f"Unhandled exception from {client_ip}: {exc}", exc_info=True) return JSONResponse( status_code=500, content={ "error": "Internal server error", "detail": "An unexpected error occurred. Please try again later.", "timestamp": int(time.time()) } ) @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException): """HTTP exception handler.""" logger.warning(f"HTTP exception from {request.client.host if request.client else 'unknown'}: {exc.status_code} - {exc.detail}") return JSONResponse( status_code=exc.status_code, content={ "error": exc.detail, "status_code": exc.status_code, "timestamp": int(time.time()) } ) @app.get("/api") @limiter.limit("60/minute") async def root(request: Request): """Root endpoint with rate limiting.""" client_ip = request.client.host if request.client else "unknown" logger.info(f"Root endpoint accessed from {client_ip}") return { "service": "E-Commerce Search API", "version": "1.0.0", "status": "running", "timestamp": int(time.time()) } @app.get("/health") @limiter.limit("120/minute") async def health_check(request: Request): """Health check endpoint.""" try: # Check if services are initialized get_es_client() get_searcher() return { "status": "healthy", "services": { "config": "initialized", "elasticsearch": "connected", "searcher": "initialized" }, "timestamp": int(time.time()) } except Exception as e: logger.error(f"Health check failed: {e}") return JSONResponse( status_code=503, content={ "status": "unhealthy", "error": str(e), "timestamp": int(time.time()) } ) # Include routers from .routes import search, admin, indexer app.include_router(search.router) app.include_router(admin.router) app.include_router(indexer.router) # Mount static files and serve frontend frontend_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "frontend") if os.path.exists(frontend_path): # Serve frontend HTML at root @app.get("/") async def serve_frontend(): """Serve the frontend HTML.""" index_path = os.path.join(frontend_path, "index.html") if os.path.exists(index_path): return FileResponse(index_path) return {"service": "E-Commerce Search API", "version": "1.0.0", "status": "running"} # Mount static files (CSS, JS, images) app.mount("/static", StaticFiles(directory=os.path.join(frontend_path, "static")), name="static") logger.info(f"Frontend static files mounted from: {frontend_path}") else: logger.warning(f"Frontend directory not found: {frontend_path}") if __name__ == "__main__": parser = argparse.ArgumentParser(description='Start search API service (multi-tenant)') parser.add_argument('--host', default='0.0.0.0', help='Host to bind to') parser.add_argument('--port', type=int, default=6002, help='Port to bind to') parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host') parser.add_argument('--reload', action='store_true', help='Enable auto-reload') args = parser.parse_args() # Set environment variable os.environ['ES_HOST'] = args.es_host # Run server uvicorn.run( "api.app:app", host=args.host, port=args.port, reload=args.reload )