Blame view

scripts/frontend_server.py 5.46 KB
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
1
2
3
4
5
6
7
8
9
  #!/usr/bin/env python3
  """
  Simple HTTP server for SearchEngine frontend.
  """
  
  import http.server
  import socketserver
  import os
  import sys
bb3c5ef8   tangwang   灌入数据流程跑通
10
11
12
  import logging
  import time
  from collections import defaultdict, deque
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
13
14
15
16
17
  
  # Change to frontend directory
  frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend')
  os.chdir(frontend_dir)
  
1852e3e3   tangwang   添加Base配置演示流程和数据库配置
18
19
  # Get port from environment variable or default
  PORT = int(os.getenv('PORT', 6003))
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
20
  
bb3c5ef8   tangwang   灌入数据流程跑通
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  # Configure logging to suppress scanner noise
  logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
  
  class RateLimitingMixin:
      """Mixin for rate limiting requests by IP address."""
      request_counts = defaultdict(deque)
      rate_limit = 100  # requests per minute
      window = 60  # seconds
  
      @classmethod
      def is_rate_limited(cls, ip):
          now = time.time()
  
          # Clean old requests
          while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
              cls.request_counts[ip].popleft()
  
          # Check rate limit
          if len(cls.request_counts[ip]) > cls.rate_limit:
              return True
  
          cls.request_counts[ip].append(now)
          return False
  
  class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
      """Custom request handler with CORS support and robust error handling."""
  
1852e3e3   tangwang   添加Base配置演示流程和数据库配置
48
49
      def do_GET(self):
          """Handle GET requests with support for base.html."""
4d824a77   tangwang   所有租户共用一套统一配置.tena...
50
51
52
53
54
55
          # Parse path (handle query strings)
          path = self.path.split('?')[0]  # Remove query string if present
          
          # Route /base to base.html (handle both with and without trailing slash)
          if path == '/base' or path == '/base/':
              self.path = '/base.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
1852e3e3   tangwang   添加Base配置演示流程和数据库配置
56
          # Route / to index.html (default)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
57
58
59
60
61
          elif path == '/' or path == '':
              self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
          
          # Call parent do_GET with modified path
          super().do_GET()
1852e3e3   tangwang   添加Base配置演示流程和数据库配置
62
  
bb3c5ef8   tangwang   灌入数据流程跑通
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
      def setup(self):
          """Setup with error handling."""
          try:
              super().setup()
          except Exception:
              pass  # Silently handle setup errors from scanners
  
      def handle_one_request(self):
          """Handle single request with error catching."""
          try:
              # Check rate limiting
              client_ip = self.client_address[0]
              if self.is_rate_limited(client_ip):
                  logging.warning(f"Rate limiting IP: {client_ip}")
                  self.send_error(429, "Too Many Requests")
                  return
  
              super().handle_one_request()
          except (ConnectionResetError, BrokenPipeError):
              # Client disconnected prematurely - common with scanners
              pass
          except UnicodeDecodeError:
              # Binary data received - not HTTP
              pass
          except Exception as e:
              # Log unexpected errors but don't crash
              logging.debug(f"Request handling error: {e}")
  
      def log_message(self, format, *args):
          """Suppress logging for malformed requests from scanners."""
          message = format % args
          # Filter out scanner noise
          noise_patterns = [
              "code 400",
              "Bad request",
              "Bad request version",
              "Bad HTTP/0.9 request type",
              "Bad request syntax"
          ]
          if any(pattern in message for pattern in noise_patterns):
              return
          # Only log legitimate requests
          if message and not message.startswith(" ") and len(message) > 10:
              super().log_message(format, *args)
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
107
108
109
110
111
112
  
      def end_headers(self):
          # Add CORS headers
          self.send_header('Access-Control-Allow-Origin', '*')
          self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
          self.send_header('Access-Control-Allow-Headers', 'Content-Type')
bb3c5ef8   tangwang   灌入数据流程跑通
113
114
115
116
          # Add security headers
          self.send_header('X-Content-Type-Options', 'nosniff')
          self.send_header('X-Frame-Options', 'DENY')
          self.send_header('X-XSS-Protection', '1; mode=block')
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
117
118
119
          super().end_headers()
  
      def do_OPTIONS(self):
bb3c5ef8   tangwang   灌入数据流程跑通
120
121
122
123
124
125
126
127
128
129
130
          """Handle OPTIONS requests."""
          try:
              self.send_response(200)
              self.end_headers()
          except Exception:
              pass
  
  class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
      """Threaded TCP server with better error handling."""
      allow_reuse_address = True
      daemon_threads = True
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
131
132
  
  if __name__ == '__main__':
4d824a77   tangwang   所有租户共用一套统一配置.tena...
133
134
135
136
137
138
139
140
141
142
143
144
      # Check if port is already in use
      import socket
      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
      try:
          sock.bind(("", PORT))
          sock.close()
      except OSError:
          print(f"ERROR: Port {PORT} is already in use.")
          print(f"Please stop the existing server or use a different port.")
          print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
          sys.exit(1)
      
bb3c5ef8   tangwang   灌入数据流程跑通
145
146
      # Create threaded server for better concurrency
      with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
147
148
149
          print(f"Frontend server started at http://localhost:{PORT}")
          print(f"Serving files from: {os.getcwd()}")
          print("\nPress Ctrl+C to stop the server")
bb3c5ef8   tangwang   灌入数据流程跑通
150
  
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
151
152
153
          try:
              httpd.serve_forever()
          except KeyboardInterrupt:
bb3c5ef8   tangwang   灌入数据流程跑通
154
155
156
              print("\nShutting down server...")
              httpd.shutdown()
              print("Server stopped")
115047ee   tangwang   为一个租户灌入测试数据;实例的启动...
157
              sys.exit(0)
bb3c5ef8   tangwang   灌入数据流程跑通
158
159
160
          except Exception as e:
              print(f"Server error: {e}")
              sys.exit(1)