Blame view

main.py 7.08 KB
be52af70   tangwang   first commit
1
2
3
4
5
  #!/usr/bin/env python3
  """
  Main entry point for SearchEngine operations.
  
  Provides a unified CLI for common operations:
d6606d7a   tangwang   清理旧代码,具体如下:
6
7
  - serve: Start API service (search + admin routes)
  - serve-indexer: Start dedicated Indexer API service
be52af70   tangwang   first commit
8
9
10
11
12
13
14
  - search: Test search from command line
  """
  
  import sys
  import os
  import argparse
  import json
a77693fe   tangwang   调整配置目录结构
15
  import uvicorn
be52af70   tangwang   first commit
16
17
18
19
  
  # Add parent directory to path
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
  
a77693fe   tangwang   调整配置目录结构
20
  from config import ConfigLoader
ded6f29e   tangwang   补充suggestion模块
21
  from config.env_config import DB_CONFIG
a77693fe   tangwang   调整配置目录结构
22
  from utils import ESClient
a77693fe   tangwang   调整配置目录结构
23
  from search import Searcher
ded6f29e   tangwang   补充suggestion模块
24
25
  from suggestion import SuggestionIndexBuilder
  from utils.db_connector import create_db_connection
a77693fe   tangwang   调整配置目录结构
26
  
be52af70   tangwang   first commit
27
  
be52af70   tangwang   first commit
28
29
  def cmd_serve(args):
      """Start API service."""
be52af70   tangwang   first commit
30
31
      os.environ['ES_HOST'] = args.es_host
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
32
      print("Starting API service (multi-tenant)...")
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
33
      print(f"  Host: {args.host}:{args.port} (search + indexer routes)")
be52af70   tangwang   first commit
34
35
36
37
38
39
40
41
42
43
      print(f"  Elasticsearch: {args.es_host}")
  
      uvicorn.run(
          "api.app:app",
          host=args.host,
          port=args.port,
          reload=args.reload
      )
  
  
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  def cmd_serve_indexer(args):
      """Start dedicated Indexer API service (no search endpoints)."""
      os.environ['ES_HOST'] = args.es_host
  
      print("Starting Indexer API service...")
      print(f"  Host: {args.host}:{args.port} (indexer only)")
      print(f"  Elasticsearch: {args.es_host}")
  
      uvicorn.run(
          "api.indexer_app:app",
          host=args.host,
          port=args.port,
          reload=args.reload
      )
  
be52af70   tangwang   first commit
59
60
  def cmd_search(args):
      """Test search from command line."""
be52af70   tangwang   first commit
61
      # Load config
4d824a77   tangwang   所有租户共用一套统一配置.tena...
62
63
      config_loader = ConfigLoader("config/config.yaml")
      config = config_loader.load_config()
be52af70   tangwang   first commit
64
65
66
67
68
69
70
  
      # Initialize ES and searcher
      es_client = ESClient(hosts=[args.es_host])
      if not es_client.ping():
          print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
          return 1
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
71
72
      from query import QueryParser
      query_parser = QueryParser(config)
9f96d6f3   tangwang   短query不用语义搜索
73
      searcher = Searcher(es_client, config, query_parser)
be52af70   tangwang   first commit
74
75
  
      # Execute search
4d824a77   tangwang   所有租户共用一套统一配置.tena...
76
      print(f"Searching for: '{args.query}' (tenant: {args.tenant_id})")
be52af70   tangwang   first commit
77
78
      result = searcher.search(
          query=args.query,
4d824a77   tangwang   所有租户共用一套统一配置.tena...
79
80
          tenant_id=args.tenant_id,
          size=args.size
be52af70   tangwang   first commit
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
      )
  
      # Display results
      print(f"\nFound {result.total} results in {result.took_ms}ms")
      print(f"Max score: {result.max_score}")
  
      if args.json:
          print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False))
      else:
          print(f"\nTop {len(result.hits)} results:")
          for i, hit in enumerate(result.hits, 1):
              source = hit['_source']
              score = hit['_score']
              print(f"\n{i}. Score: {score:.4f}")
              print(f"   ID: {source.get('skuId', 'N/A')}")
              print(f"   Name: {source.get('name', 'N/A')}")
              print(f"   Category: {source.get('categoryName', 'N/A')}")
              print(f"   Brand: {source.get('brandName', 'N/A')}")
  
      return 0
  
  
ded6f29e   tangwang   补充suggestion模块
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  def cmd_build_suggestions(args):
      """Build suggestion index for a tenant."""
      es_client = ESClient(hosts=[args.es_host])
      if not es_client.ping():
          print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
          return 1
  
      db_engine = create_db_connection(
          host=DB_CONFIG["host"],
          port=DB_CONFIG["port"],
          database=DB_CONFIG["database"],
          username=DB_CONFIG["username"],
          password=DB_CONFIG["password"],
      )
      builder = SuggestionIndexBuilder(es_client=es_client, db_engine=db_engine)
      result = builder.rebuild_tenant_index(
          tenant_id=args.tenant_id,
          days=args.days,
          recreate=args.recreate,
          batch_size=args.batch_size,
          min_query_len=args.min_query_len,
      )
      print(json.dumps(result, indent=2, ensure_ascii=False))
      return 0
  
  
be52af70   tangwang   first commit
129
130
131
132
133
134
135
136
137
  def main():
      """Main CLI entry point."""
      parser = argparse.ArgumentParser(
          description='SearchEngine - E-Commerce Search SaaS',
          formatter_class=argparse.RawDescriptionHelpFormatter
      )
  
      subparsers = parser.add_subparsers(dest='command', help='Command to execute')
  
be52af70   tangwang   first commit
138
      # Serve command
4d824a77   tangwang   所有租户共用一套统一配置.tena...
139
      serve_parser = subparsers.add_parser('serve', help='Start API service (multi-tenant)')
be52af70   tangwang   first commit
140
      serve_parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
2a76641e   tangwang   config
141
      serve_parser.add_argument('--port', type=int, default=6002, help='Port to bind to')
be52af70   tangwang   first commit
142
143
144
      serve_parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host')
      serve_parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
  
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
145
146
147
148
149
150
151
152
153
154
      # Serve-indexer command
      serve_indexer_parser = subparsers.add_parser(
          'serve-indexer',
          help='Start dedicated Indexer API service (indexer routes only)'
      )
      serve_indexer_parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
      serve_indexer_parser.add_argument('--port', type=int, default=6004, help='Port to bind to')
      serve_indexer_parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host')
      serve_indexer_parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
  
be52af70   tangwang   first commit
155
156
157
      # Search command
      search_parser = subparsers.add_parser('search', help='Test search from command line')
      search_parser.add_argument('query', help='Search query')
4d824a77   tangwang   所有租户共用一套统一配置.tena...
158
      search_parser.add_argument('--tenant-id', required=True, help='Tenant ID (required)')
be52af70   tangwang   first commit
159
160
161
162
163
164
      search_parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host')
      search_parser.add_argument('--size', type=int, default=10, help='Number of results')
      search_parser.add_argument('--no-translation', action='store_true', help='Disable translation')
      search_parser.add_argument('--no-embedding', action='store_true', help='Disable embeddings')
      search_parser.add_argument('--json', action='store_true', help='Output JSON')
  
ded6f29e   tangwang   补充suggestion模块
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
      # Suggestion build command
      suggest_build_parser = subparsers.add_parser(
          'build-suggestions',
          help='Build tenant suggestion index (full rebuild)'
      )
      suggest_build_parser.add_argument('--tenant-id', required=True, help='Tenant ID')
      suggest_build_parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host')
      suggest_build_parser.add_argument('--days', type=int, default=30, help='Query log lookback days')
      suggest_build_parser.add_argument('--batch-size', type=int, default=500, help='Product scan batch size')
      suggest_build_parser.add_argument('--min-query-len', type=int, default=1, help='Minimum query length')
      suggest_build_parser.add_argument(
          '--recreate',
          action='store_true',
          help='Delete and recreate suggestion index before build'
      )
  
be52af70   tangwang   first commit
181
182
183
184
185
186
187
      args = parser.parse_args()
  
      if not args.command:
          parser.print_help()
          return 1
  
      # Execute command
d6606d7a   tangwang   清理旧代码,具体如下:
188
      if args.command == 'serve':
be52af70   tangwang   first commit
189
          return cmd_serve(args)
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
190
191
      elif args.command == 'serve-indexer':
          return cmd_serve_indexer(args)
be52af70   tangwang   first commit
192
193
      elif args.command == 'search':
          return cmd_search(args)
ded6f29e   tangwang   补充suggestion模块
194
195
      elif args.command == 'build-suggestions':
          return cmd_build_suggestions(args)
be52af70   tangwang   first commit
196
197
198
199
200
201
202
      else:
          print(f"Unknown command: {args.command}")
          return 1
  
  
  if __name__ == "__main__":
      sys.exit(main())