frontend_server.py 5.46 KB
#!/usr/bin/env python3
"""
Simple HTTP server for SearchEngine frontend.
"""

import http.server
import socketserver
import os
import sys
import logging
import time
from collections import defaultdict, deque

# Change to frontend directory
frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend')
os.chdir(frontend_dir)

# Get port from environment variable or default
PORT = int(os.getenv('PORT', 6003))

# Configure logging to suppress scanner noise
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

class RateLimitingMixin:
    """Mixin for rate limiting requests by IP address."""
    request_counts = defaultdict(deque)
    rate_limit = 100  # requests per minute
    window = 60  # seconds

    @classmethod
    def is_rate_limited(cls, ip):
        now = time.time()

        # Clean old requests
        while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
            cls.request_counts[ip].popleft()

        # Check rate limit
        if len(cls.request_counts[ip]) > cls.rate_limit:
            return True

        cls.request_counts[ip].append(now)
        return False

class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
    """Custom request handler with CORS support and robust error handling."""

    def do_GET(self):
        """Handle GET requests with support for base.html."""
        # Parse path (handle query strings)
        path = self.path.split('?')[0]  # Remove query string if present
        
        # Route /base to base.html (handle both with and without trailing slash)
        if path == '/base' or path == '/base/':
            self.path = '/base.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
        # Route / to index.html (default)
        elif path == '/' or path == '':
            self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
        
        # Call parent do_GET with modified path
        super().do_GET()

    def setup(self):
        """Setup with error handling."""
        try:
            super().setup()
        except Exception:
            pass  # Silently handle setup errors from scanners

    def handle_one_request(self):
        """Handle single request with error catching."""
        try:
            # Check rate limiting
            client_ip = self.client_address[0]
            if self.is_rate_limited(client_ip):
                logging.warning(f"Rate limiting IP: {client_ip}")
                self.send_error(429, "Too Many Requests")
                return

            super().handle_one_request()
        except (ConnectionResetError, BrokenPipeError):
            # Client disconnected prematurely - common with scanners
            pass
        except UnicodeDecodeError:
            # Binary data received - not HTTP
            pass
        except Exception as e:
            # Log unexpected errors but don't crash
            logging.debug(f"Request handling error: {e}")

    def log_message(self, format, *args):
        """Suppress logging for malformed requests from scanners."""
        message = format % args
        # Filter out scanner noise
        noise_patterns = [
            "code 400",
            "Bad request",
            "Bad request version",
            "Bad HTTP/0.9 request type",
            "Bad request syntax"
        ]
        if any(pattern in message for pattern in noise_patterns):
            return
        # Only log legitimate requests
        if message and not message.startswith(" ") and len(message) > 10:
            super().log_message(format, *args)

    def end_headers(self):
        # Add CORS headers
        self.send_header('Access-Control-Allow-Origin', '*')
        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
        # Add security headers
        self.send_header('X-Content-Type-Options', 'nosniff')
        self.send_header('X-Frame-Options', 'DENY')
        self.send_header('X-XSS-Protection', '1; mode=block')
        super().end_headers()

    def do_OPTIONS(self):
        """Handle OPTIONS requests."""
        try:
            self.send_response(200)
            self.end_headers()
        except Exception:
            pass

class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
    """Threaded TCP server with better error handling."""
    allow_reuse_address = True
    daemon_threads = True

if __name__ == '__main__':
    # Check if port is already in use
    import socket
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        sock.bind(("", PORT))
        sock.close()
    except OSError:
        print(f"ERROR: Port {PORT} is already in use.")
        print(f"Please stop the existing server or use a different port.")
        print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
        sys.exit(1)
    
    # Create threaded server for better concurrency
    with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
        print(f"Frontend server started at http://localhost:{PORT}")
        print(f"Serving files from: {os.getcwd()}")
        print("\nPress Ctrl+C to stop the server")

        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            print("\nShutting down server...")
            httpd.shutdown()
            print("Server stopped")
            sys.exit(0)
        except Exception as e:
            print(f"Server error: {e}")
            sys.exit(1)