frontend_server.py 10 KB
#!/usr/bin/env python3
"""
Simple HTTP server for saas-search frontend.
"""

import http.server
import socketserver
import os
import sys
import logging
import time
import urllib.request
import urllib.error
from collections import defaultdict, deque
from pathlib import Path
from dotenv import load_dotenv

# Load .env file
project_root = Path(__file__).resolve().parents[2]
load_dotenv(project_root / '.env')

# Get API_BASE_URL from environment(默认不注入,避免被旧 .env 覆盖同源策略)
# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。
API_BASE_URL = os.getenv('API_BASE_URL') or None
INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
# Backend proxy target for same-origin API forwarding
BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')

# Change to frontend directory
frontend_dir = os.path.join(project_root, 'frontend')
os.chdir(frontend_dir)

# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))

# Configure logging to suppress scanner noise
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

class RateLimitingMixin:
    """Mixin for rate limiting requests by IP address."""
    request_counts = defaultdict(deque)
    rate_limit = 100  # requests per minute
    window = 60  # seconds

    @classmethod
    def is_rate_limited(cls, ip):
        now = time.time()

        # Clean old requests
        while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
            cls.request_counts[ip].popleft()

        # Check rate limit
        if len(cls.request_counts[ip]) > cls.rate_limit:
            return True

        cls.request_counts[ip].append(now)
        return False

class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
    """Custom request handler with CORS support and robust error handling."""

    def _is_proxy_path(self, path: str) -> bool:
        """Return True for API paths that should be forwarded to backend service."""
        return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')

    def _proxy_to_backend(self):
        """Proxy current request to backend service on the GPU server."""
        target_url = f"{BACKEND_PROXY_URL}{self.path}"
        method = self.command.upper()

        try:
            content_length = int(self.headers.get('Content-Length', '0'))
        except ValueError:
            content_length = 0
        body = self.rfile.read(content_length) if content_length > 0 else None

        forward_headers = {}
        for key, value in self.headers.items():
            lk = key.lower()
            if lk in ('host', 'content-length', 'connection'):
                continue
            forward_headers[key] = value

        req = urllib.request.Request(
            target_url,
            data=body,
            headers=forward_headers,
            method=method,
        )

        try:
            with urllib.request.urlopen(req, timeout=30) as resp:
                resp_body = resp.read()
                self.send_response(resp.getcode())
                for header, value in resp.getheaders():
                    lh = header.lower()
                    if lh in ('transfer-encoding', 'connection', 'content-length'):
                        continue
                    self.send_header(header, value)
                self.end_headers()
                self.wfile.write(resp_body)
        except urllib.error.HTTPError as e:
            err_body = e.read() if hasattr(e, 'read') else b''
            self.send_response(e.code)
            if e.headers:
                for header, value in e.headers.items():
                    lh = header.lower()
                    if lh in ('transfer-encoding', 'connection', 'content-length'):
                        continue
                    self.send_header(header, value)
            self.end_headers()
            if err_body:
                self.wfile.write(err_body)
        except Exception as e:
            logging.error(f"Backend proxy error for {method} {self.path}: {e}")
            self.send_response(502)
            self.send_header('Content-Type', 'application/json; charset=utf-8')
            self.end_headers()
            self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')

    def do_GET(self):
        """Handle GET requests with API config injection."""
        path = self.path.split('?')[0]

        # Proxy API paths to backend first
        if self._is_proxy_path(path):
            self._proxy_to_backend()
            return
        
        # Route / to index.html
        if path == '/' or path == '':
            self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
        
        # Inject API config for HTML files
        if self.path.endswith('.html'):
            self._serve_html_with_config()
        else:
            super().do_GET()
    
    def _serve_html_with_config(self):
        """Serve HTML with optional API_BASE_URL injected."""
        try:
            file_path = self.path.lstrip('/')
            if not os.path.exists(file_path):
                self.send_error(404)
                return
            
            with open(file_path, 'r', encoding='utf-8') as f:
                html = f.read()

            # 默认不注入 API_BASE_URL,避免历史 .env(如 http://xx:6002)覆盖同源调用。
            # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。
            if INJECT_API_BASE_URL and API_BASE_URL:
                config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n    '
                html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
            
            self.send_response(200)
            self.send_header('Content-Type', 'text/html; charset=utf-8')
            self.end_headers()
            self.wfile.write(html.encode('utf-8'))
        except Exception as e:
            logging.error(f"Error serving HTML: {e}")
            self.send_error(500)

    def do_POST(self):
        """Handle POST requests. Proxy API requests to backend."""
        path = self.path.split('?')[0]
        if self._is_proxy_path(path):
            self._proxy_to_backend()
            return
        self.send_error(405, "Method Not Allowed")

    def setup(self):
        """Setup with error handling."""
        try:
            super().setup()
        except Exception:
            pass  # Silently handle setup errors from scanners

    def handle_one_request(self):
        """Handle single request with error catching."""
        try:
            # Check rate limiting
            client_ip = self.client_address[0]
            if self.is_rate_limited(client_ip):
                logging.warning(f"Rate limiting IP: {client_ip}")
                self.send_error(429, "Too Many Requests")
                return

            super().handle_one_request()
        except (ConnectionResetError, BrokenPipeError):
            # Client disconnected prematurely - common with scanners
            pass
        except UnicodeDecodeError:
            # Binary data received - not HTTP
            pass
        except Exception as e:
            # Log unexpected errors but don't crash
            logging.debug(f"Request handling error: {e}")

    def log_message(self, format, *args):
        """Suppress logging for malformed requests from scanners."""
        message = format % args
        # Filter out scanner noise
        noise_patterns = [
            "code 400",
            "Bad request",
            "Bad request version",
            "Bad HTTP/0.9 request type",
            "Bad request syntax"
        ]
        if any(pattern in message for pattern in noise_patterns):
            return
        # Only log legitimate requests
        if message and not message.startswith(" ") and len(message) > 10:
            super().log_message(format, *args)

    def end_headers(self):
        # Add CORS headers
        self.send_header('Access-Control-Allow-Origin', '*')
        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
        # Add security headers
        self.send_header('X-Content-Type-Options', 'nosniff')
        self.send_header('X-Frame-Options', 'DENY')
        self.send_header('X-XSS-Protection', '1; mode=block')
        super().end_headers()

    def do_OPTIONS(self):
        """Handle OPTIONS requests."""
        try:
            path = self.path.split('?')[0]
            if self._is_proxy_path(path):
                self.send_response(204)
                self.end_headers()
                return
            self.send_response(200)
            self.end_headers()
        except Exception:
            pass

class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
    """Threaded TCP server with better error handling."""
    allow_reuse_address = True
    daemon_threads = True

if __name__ == '__main__':
    # Check if port is already in use
    import socket
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        sock.bind(("", PORT))
        sock.close()
    except OSError:
        print(f"ERROR: Port {PORT} is already in use.")
        print(f"Please stop the existing server or use a different port.")
        print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
        sys.exit(1)
    
    # Create threaded server for better concurrency
    with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
        print(f"Frontend server started at http://localhost:{PORT}")
        print(f"Serving files from: {os.getcwd()}")
        print("\nPress Ctrl+C to stop the server")

        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            print("\nShutting down server...")
            httpd.shutdown()
            print("Server stopped")
            sys.exit(0)
        except Exception as e:
            print(f"Server error: {e}")
            sys.exit(1)