Blame view

main.py 9.66 KB
be52af70   tangwang   first commit
1
2
  #!/usr/bin/env python3
  """
a7920e17   tangwang   项目名称和部署路径修改
3
  Main entry point for saas-search operations.
be52af70   tangwang   first commit
4
5
  
  Provides a unified CLI for common operations:
d6606d7a   tangwang   清理旧代码,具体如下:
6
7
  - serve: Start API service (search + admin routes)
  - serve-indexer: Start dedicated Indexer API service
be52af70   tangwang   first commit
8
9
10
11
12
13
14
  - search: Test search from command line
  """
  
  import sys
  import os
  import argparse
  import json
be52af70   tangwang   first commit
15
16
17
18
  
  # Add parent directory to path
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
  
86d8358b   tangwang   config optimize
19
  from config import get_app_config
a77693fe   tangwang   调整配置目录结构
20
  from utils import ESClient
a77693fe   tangwang   调整配置目录结构
21
  from search import Searcher
ded6f29e   tangwang   补充suggestion模块
22
23
  from suggestion import SuggestionIndexBuilder
  from utils.db_connector import create_db_connection
ed948666   tangwang   tidy
24
  from context.request_context import create_request_context
a77693fe   tangwang   调整配置目录结构
25
  
be52af70   tangwang   first commit
26
  
be52af70   tangwang   first commit
27
28
  def cmd_serve(args):
      """Start API service."""
f251cf2d   tangwang   suggestion全量索引程序跑通
29
      import uvicorn
be52af70   tangwang   first commit
30
31
      os.environ['ES_HOST'] = args.es_host
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
32
      print("Starting API service (multi-tenant)...")
bc54124c   tangwang   docs
33
      print(f"  Host: {args.host}:{args.port} (search + admin routes)")
be52af70   tangwang   first commit
34
35
36
37
38
39
40
41
42
43
      print(f"  Elasticsearch: {args.es_host}")
  
      uvicorn.run(
          "api.app:app",
          host=args.host,
          port=args.port,
          reload=args.reload
      )
  
  
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
44
45
  def cmd_serve_indexer(args):
      """Start dedicated Indexer API service (no search endpoints)."""
f251cf2d   tangwang   suggestion全量索引程序跑通
46
      import uvicorn
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
47
48
49
50
51
52
53
54
55
56
57
58
59
      os.environ['ES_HOST'] = args.es_host
  
      print("Starting Indexer API service...")
      print(f"  Host: {args.host}:{args.port} (indexer only)")
      print(f"  Elasticsearch: {args.es_host}")
  
      uvicorn.run(
          "api.indexer_app:app",
          host=args.host,
          port=args.port,
          reload=args.reload
      )
  
be52af70   tangwang   first commit
60
61
  def cmd_search(args):
      """Test search from command line."""
be52af70   tangwang   first commit
62
      # Load config
86d8358b   tangwang   config optimize
63
      config = get_app_config().search
be52af70   tangwang   first commit
64
65
66
67
68
69
70
  
      # Initialize ES and searcher
      es_client = ESClient(hosts=[args.es_host])
      if not es_client.ping():
          print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
          return 1
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
71
72
      from query import QueryParser
      query_parser = QueryParser(config)
9f96d6f3   tangwang   短query不用语义搜索
73
      searcher = Searcher(es_client, config, query_parser)
be52af70   tangwang   first commit
74
75
  
      # Execute search
4d824a77   tangwang   所有租户共用一套统一配置.tena...
76
      print(f"Searching for: '{args.query}' (tenant: {args.tenant_id})")
be52af70   tangwang   first commit
77
78
      result = searcher.search(
          query=args.query,
4d824a77   tangwang   所有租户共用一套统一配置.tena...
79
          tenant_id=args.tenant_id,
ed948666   tangwang   tidy
80
81
          size=args.size,
          context=create_request_context(),
be52af70   tangwang   first commit
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
      )
  
      # Display results
      print(f"\nFound {result.total} results in {result.took_ms}ms")
      print(f"Max score: {result.max_score}")
  
      if args.json:
          print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False))
      else:
          print(f"\nTop {len(result.hits)} results:")
          for i, hit in enumerate(result.hits, 1):
              source = hit['_source']
              score = hit['_score']
              print(f"\n{i}. Score: {score:.4f}")
              print(f"   ID: {source.get('skuId', 'N/A')}")
              print(f"   Name: {source.get('name', 'N/A')}")
              print(f"   Category: {source.get('categoryName', 'N/A')}")
              print(f"   Brand: {source.get('brandName', 'N/A')}")
  
      return 0
  
  
ded6f29e   tangwang   补充suggestion模块
104
  def cmd_build_suggestions(args):
ff9efda0   tangwang   suggest
105
      """Build/update suggestion index for a tenant."""
f251cf2d   tangwang   suggestion全量索引程序跑通
106
      # Initialize ES client with optional authentication
86d8358b   tangwang   config optimize
107
108
109
      es_cfg = get_app_config().infrastructure.elasticsearch
      es_username = es_cfg.username
      es_password = es_cfg.password
f251cf2d   tangwang   suggestion全量索引程序跑通
110
111
112
113
      if es_username and es_password:
          es_client = ESClient(hosts=[args.es_host], username=es_username, password=es_password)
      else:
          es_client = ESClient(hosts=[args.es_host])
ded6f29e   tangwang   补充suggestion模块
114
115
116
117
      if not es_client.ping():
          print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
          return 1
  
f251cf2d   tangwang   suggestion全量索引程序跑通
118
      # Build DB config directly from environment to avoid dotenv dependency
86d8358b   tangwang   config optimize
119
120
121
122
123
124
      db_cfg = get_app_config().infrastructure.database
      db_host = db_cfg.host
      db_port = db_cfg.port
      db_name = db_cfg.database
      db_user = db_cfg.username
      db_pass = db_cfg.password
f251cf2d   tangwang   suggestion全量索引程序跑通
125
126
127
128
      if not all([db_host, db_name, db_user, db_pass]):
          print("ERROR: DB_HOST/DB_PORT/DB_DATABASE/DB_USERNAME/DB_PASSWORD must be set in environment")
          return 1
  
ded6f29e   tangwang   补充suggestion模块
129
      db_engine = create_db_connection(
f251cf2d   tangwang   suggestion全量索引程序跑通
130
131
132
133
134
          host=db_host,
          port=db_port,
          database=db_name,
          username=db_user,
          password=db_pass,
ded6f29e   tangwang   补充suggestion模块
135
136
      )
      builder = SuggestionIndexBuilder(es_client=es_client, db_engine=db_engine)
ff9efda0   tangwang   suggest
137
138
139
140
      if args.mode == "full":
          result = builder.rebuild_tenant_index(
              tenant_id=args.tenant_id,
              days=args.days,
ff9efda0   tangwang   suggest
141
142
143
144
              batch_size=args.batch_size,
              min_query_len=args.min_query_len,
              publish_alias=args.publish_alias,
              keep_versions=args.keep_versions,
ff9efda0   tangwang   suggest
145
146
147
148
149
150
151
152
153
154
155
          )
      else:
          result = builder.incremental_update_tenant_index(
              tenant_id=args.tenant_id,
              min_query_len=args.min_query_len,
              fallback_days=args.incremental_fallback_days,
              overlap_minutes=args.overlap_minutes,
              bootstrap_if_missing=args.bootstrap_if_missing,
              bootstrap_days=args.bootstrap_days,
              batch_size=args.batch_size,
          )
ded6f29e   tangwang   补充suggestion模块
156
157
158
159
      print(json.dumps(result, indent=2, ensure_ascii=False))
      return 0
  
  
be52af70   tangwang   first commit
160
161
162
  def main():
      """Main CLI entry point."""
      parser = argparse.ArgumentParser(
a7920e17   tangwang   项目名称和部署路径修改
163
          description='saas-search - E-Commerce Search SaaS',
be52af70   tangwang   first commit
164
165
166
167
168
          formatter_class=argparse.RawDescriptionHelpFormatter
      )
  
      subparsers = parser.add_subparsers(dest='command', help='Command to execute')
  
be52af70   tangwang   first commit
169
      # Serve command
4d824a77   tangwang   所有租户共用一套统一配置.tena...
170
      serve_parser = subparsers.add_parser('serve', help='Start API service (multi-tenant)')
be52af70   tangwang   first commit
171
      serve_parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
2a76641e   tangwang   config
172
      serve_parser.add_argument('--port', type=int, default=6002, help='Port to bind to')
86d8358b   tangwang   config optimize
173
      serve_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
be52af70   tangwang   first commit
174
175
      serve_parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
  
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
176
177
178
179
180
181
182
      # Serve-indexer command
      serve_indexer_parser = subparsers.add_parser(
          'serve-indexer',
          help='Start dedicated Indexer API service (indexer routes only)'
      )
      serve_indexer_parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
      serve_indexer_parser.add_argument('--port', type=int, default=6004, help='Port to bind to')
86d8358b   tangwang   config optimize
183
      serve_indexer_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
184
185
      serve_indexer_parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
  
be52af70   tangwang   first commit
186
187
188
      # Search command
      search_parser = subparsers.add_parser('search', help='Test search from command line')
      search_parser.add_argument('query', help='Search query')
4d824a77   tangwang   所有租户共用一套统一配置.tena...
189
      search_parser.add_argument('--tenant-id', required=True, help='Tenant ID (required)')
86d8358b   tangwang   config optimize
190
      search_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
be52af70   tangwang   first commit
191
192
193
194
195
      search_parser.add_argument('--size', type=int, default=10, help='Number of results')
      search_parser.add_argument('--no-translation', action='store_true', help='Disable translation')
      search_parser.add_argument('--no-embedding', action='store_true', help='Disable embeddings')
      search_parser.add_argument('--json', action='store_true', help='Output JSON')
  
ded6f29e   tangwang   补充suggestion模块
196
197
198
      # Suggestion build command
      suggest_build_parser = subparsers.add_parser(
          'build-suggestions',
ff9efda0   tangwang   suggest
199
          help='Build tenant suggestion index (full/incremental)'
ded6f29e   tangwang   补充suggestion模块
200
201
      )
      suggest_build_parser.add_argument('--tenant-id', required=True, help='Tenant ID')
86d8358b   tangwang   config optimize
202
      suggest_build_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
ff9efda0   tangwang   suggest
203
204
205
206
207
208
      suggest_build_parser.add_argument(
          '--mode',
          choices=['full', 'incremental'],
          default='full',
          help='Build mode: full rebuild or incremental update',
      )
316c97c4   tangwang   feat: 完整落地多租户 sug...
209
      suggest_build_parser.add_argument('--days', type=int, default=360, help='Query log lookback days')
ded6f29e   tangwang   补充suggestion模块
210
211
212
      suggest_build_parser.add_argument('--batch-size', type=int, default=500, help='Product scan batch size')
      suggest_build_parser.add_argument('--min-query-len', type=int, default=1, help='Minimum query length')
      suggest_build_parser.add_argument(
ff9efda0   tangwang   suggest
213
214
215
216
217
218
219
220
221
222
223
224
          '--publish-alias',
          action=argparse.BooleanOptionalAction,
          default=True,
          help='For full mode: publish alias to new versioned index (default: true)',
      )
      suggest_build_parser.add_argument(
          '--keep-versions',
          type=int,
          default=2,
          help='For full mode: keep latest N versioned indices',
      )
      suggest_build_parser.add_argument(
ff9efda0   tangwang   suggest
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
          '--incremental-fallback-days',
          type=int,
          default=7,
          help='For incremental mode: default lookback days when no watermark',
      )
      suggest_build_parser.add_argument(
          '--overlap-minutes',
          type=int,
          default=30,
          help='For incremental mode: overlap window to avoid late-arrival misses',
      )
      suggest_build_parser.add_argument(
          '--bootstrap-if-missing',
          action=argparse.BooleanOptionalAction,
          default=True,
          help='For incremental mode: bootstrap with full build when active index is missing',
      )
      suggest_build_parser.add_argument(
          '--bootstrap-days',
          type=int,
          default=30,
          help='For incremental mode bootstrap full build: query log lookback days',
ded6f29e   tangwang   补充suggestion模块
247
248
      )
  
be52af70   tangwang   first commit
249
250
251
252
253
254
255
      args = parser.parse_args()
  
      if not args.command:
          parser.print_help()
          return 1
  
      # Execute command
d6606d7a   tangwang   清理旧代码,具体如下:
256
      if args.command == 'serve':
be52af70   tangwang   first commit
257
          return cmd_serve(args)
bb9c626c   tangwang   搜索服务(6002)不再初始化/挂...
258
259
      elif args.command == 'serve-indexer':
          return cmd_serve_indexer(args)
be52af70   tangwang   first commit
260
261
      elif args.command == 'search':
          return cmd_search(args)
ded6f29e   tangwang   补充suggestion模块
262
263
      elif args.command == 'build-suggestions':
          return cmd_build_suggestions(args)
be52af70   tangwang   first commit
264
265
266
267
268
269
270
      else:
          print(f"Unknown command: {args.command}")
          return 1
  
  
  if __name__ == "__main__":
      sys.exit(main())