be52af70
tangwang
first commit
|
1
2
|
#!/usr/bin/env python3
"""
|
a7920e17
tangwang
项目名称和部署路径修改
|
3
|
Main entry point for saas-search operations.
|
be52af70
tangwang
first commit
|
4
5
|
Provides a unified CLI for common operations:
|
d6606d7a
tangwang
清理旧代码,具体如下:
|
6
7
|
- serve: Start API service (search + admin routes)
- serve-indexer: Start dedicated Indexer API service
|
be52af70
tangwang
first commit
|
8
9
10
11
12
13
14
|
- search: Test search from command line
"""
import sys
import os
import argparse
import json
|
be52af70
tangwang
first commit
|
15
16
17
18
|
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
86d8358b
tangwang
config optimize
|
19
|
from config import get_app_config
|
a77693fe
tangwang
调整配置目录结构
|
20
|
from utils import ESClient
|
a77693fe
tangwang
调整配置目录结构
|
21
|
from search import Searcher
|
ded6f29e
tangwang
补充suggestion模块
|
22
23
|
from suggestion import SuggestionIndexBuilder
from utils.db_connector import create_db_connection
|
ed948666
tangwang
tidy
|
24
|
from context.request_context import create_request_context
|
a77693fe
tangwang
调整配置目录结构
|
25
|
|
be52af70
tangwang
first commit
|
26
|
|
be52af70
tangwang
first commit
|
27
28
|
def cmd_serve(args):
"""Start API service."""
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
29
|
import uvicorn
|
be52af70
tangwang
first commit
|
30
31
|
os.environ['ES_HOST'] = args.es_host
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
32
|
print("Starting API service (multi-tenant)...")
|
bc54124c
tangwang
docs
|
33
|
print(f" Host: {args.host}:{args.port} (search + admin routes)")
|
be52af70
tangwang
first commit
|
34
35
36
37
38
39
40
41
42
43
|
print(f" Elasticsearch: {args.es_host}")
uvicorn.run(
"api.app:app",
host=args.host,
port=args.port,
reload=args.reload
)
|
bb9c626c
tangwang
搜索服务(6002)不再初始化/挂...
|
44
45
|
def cmd_serve_indexer(args):
"""Start dedicated Indexer API service (no search endpoints)."""
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
46
|
import uvicorn
|
bb9c626c
tangwang
搜索服务(6002)不再初始化/挂...
|
47
48
49
50
51
52
53
54
55
56
57
58
59
|
os.environ['ES_HOST'] = args.es_host
print("Starting Indexer API service...")
print(f" Host: {args.host}:{args.port} (indexer only)")
print(f" Elasticsearch: {args.es_host}")
uvicorn.run(
"api.indexer_app:app",
host=args.host,
port=args.port,
reload=args.reload
)
|
be52af70
tangwang
first commit
|
60
61
|
def cmd_search(args):
"""Test search from command line."""
|
be52af70
tangwang
first commit
|
62
|
# Load config
|
86d8358b
tangwang
config optimize
|
63
|
config = get_app_config().search
|
be52af70
tangwang
first commit
|
64
65
66
67
68
69
70
|
# Initialize ES and searcher
es_client = ESClient(hosts=[args.es_host])
if not es_client.ping():
print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
return 1
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
71
72
|
from query import QueryParser
query_parser = QueryParser(config)
|
9f96d6f3
tangwang
短query不用语义搜索
|
73
|
searcher = Searcher(es_client, config, query_parser)
|
be52af70
tangwang
first commit
|
74
75
|
# Execute search
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
76
|
print(f"Searching for: '{args.query}' (tenant: {args.tenant_id})")
|
be52af70
tangwang
first commit
|
77
78
|
result = searcher.search(
query=args.query,
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
79
|
tenant_id=args.tenant_id,
|
ed948666
tangwang
tidy
|
80
81
|
size=args.size,
context=create_request_context(),
|
be52af70
tangwang
first commit
|
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
)
# Display results
print(f"\nFound {result.total} results in {result.took_ms}ms")
print(f"Max score: {result.max_score}")
if args.json:
print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False))
else:
print(f"\nTop {len(result.hits)} results:")
for i, hit in enumerate(result.hits, 1):
source = hit['_source']
score = hit['_score']
print(f"\n{i}. Score: {score:.4f}")
print(f" ID: {source.get('skuId', 'N/A')}")
print(f" Name: {source.get('name', 'N/A')}")
print(f" Category: {source.get('categoryName', 'N/A')}")
print(f" Brand: {source.get('brandName', 'N/A')}")
return 0
|
ded6f29e
tangwang
补充suggestion模块
|
104
|
def cmd_build_suggestions(args):
|
ff9efda0
tangwang
suggest
|
105
|
"""Build/update suggestion index for a tenant."""
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
106
|
# Initialize ES client with optional authentication
|
86d8358b
tangwang
config optimize
|
107
108
109
|
es_cfg = get_app_config().infrastructure.elasticsearch
es_username = es_cfg.username
es_password = es_cfg.password
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
110
111
112
113
|
if es_username and es_password:
es_client = ESClient(hosts=[args.es_host], username=es_username, password=es_password)
else:
es_client = ESClient(hosts=[args.es_host])
|
ded6f29e
tangwang
补充suggestion模块
|
114
115
116
117
|
if not es_client.ping():
print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
return 1
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
118
|
# Build DB config directly from environment to avoid dotenv dependency
|
86d8358b
tangwang
config optimize
|
119
120
121
122
123
124
|
db_cfg = get_app_config().infrastructure.database
db_host = db_cfg.host
db_port = db_cfg.port
db_name = db_cfg.database
db_user = db_cfg.username
db_pass = db_cfg.password
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
125
126
127
128
|
if not all([db_host, db_name, db_user, db_pass]):
print("ERROR: DB_HOST/DB_PORT/DB_DATABASE/DB_USERNAME/DB_PASSWORD must be set in environment")
return 1
|
ded6f29e
tangwang
补充suggestion模块
|
129
|
db_engine = create_db_connection(
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
130
131
132
133
134
|
host=db_host,
port=db_port,
database=db_name,
username=db_user,
password=db_pass,
|
ded6f29e
tangwang
补充suggestion模块
|
135
136
|
)
builder = SuggestionIndexBuilder(es_client=es_client, db_engine=db_engine)
|
ff9efda0
tangwang
suggest
|
137
138
139
140
|
if args.mode == "full":
result = builder.rebuild_tenant_index(
tenant_id=args.tenant_id,
days=args.days,
|
ff9efda0
tangwang
suggest
|
141
142
143
144
|
batch_size=args.batch_size,
min_query_len=args.min_query_len,
publish_alias=args.publish_alias,
keep_versions=args.keep_versions,
|
ff9efda0
tangwang
suggest
|
145
146
147
148
149
150
151
152
153
154
155
|
)
else:
result = builder.incremental_update_tenant_index(
tenant_id=args.tenant_id,
min_query_len=args.min_query_len,
fallback_days=args.incremental_fallback_days,
overlap_minutes=args.overlap_minutes,
bootstrap_if_missing=args.bootstrap_if_missing,
bootstrap_days=args.bootstrap_days,
batch_size=args.batch_size,
)
|
ded6f29e
tangwang
补充suggestion模块
|
156
157
158
159
|
print(json.dumps(result, indent=2, ensure_ascii=False))
return 0
|
be52af70
tangwang
first commit
|
160
161
162
|
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
|
a7920e17
tangwang
项目名称和部署路径修改
|
163
|
description='saas-search - E-Commerce Search SaaS',
|
be52af70
tangwang
first commit
|
164
165
166
167
168
|
formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(dest='command', help='Command to execute')
|
be52af70
tangwang
first commit
|
169
|
# Serve command
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
170
|
serve_parser = subparsers.add_parser('serve', help='Start API service (multi-tenant)')
|
be52af70
tangwang
first commit
|
171
|
serve_parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
|
2a76641e
tangwang
config
|
172
|
serve_parser.add_argument('--port', type=int, default=6002, help='Port to bind to')
|
86d8358b
tangwang
config optimize
|
173
|
serve_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
|
be52af70
tangwang
first commit
|
174
175
|
serve_parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
|
bb9c626c
tangwang
搜索服务(6002)不再初始化/挂...
|
176
177
178
179
180
181
182
|
# Serve-indexer command
serve_indexer_parser = subparsers.add_parser(
'serve-indexer',
help='Start dedicated Indexer API service (indexer routes only)'
)
serve_indexer_parser.add_argument('--host', default='0.0.0.0', help='Host to bind to')
serve_indexer_parser.add_argument('--port', type=int, default=6004, help='Port to bind to')
|
86d8358b
tangwang
config optimize
|
183
|
serve_indexer_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
|
bb9c626c
tangwang
搜索服务(6002)不再初始化/挂...
|
184
185
|
serve_indexer_parser.add_argument('--reload', action='store_true', help='Enable auto-reload')
|
be52af70
tangwang
first commit
|
186
187
188
|
# Search command
search_parser = subparsers.add_parser('search', help='Test search from command line')
search_parser.add_argument('query', help='Search query')
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
189
|
search_parser.add_argument('--tenant-id', required=True, help='Tenant ID (required)')
|
86d8358b
tangwang
config optimize
|
190
|
search_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
|
be52af70
tangwang
first commit
|
191
192
193
194
195
|
search_parser.add_argument('--size', type=int, default=10, help='Number of results')
search_parser.add_argument('--no-translation', action='store_true', help='Disable translation')
search_parser.add_argument('--no-embedding', action='store_true', help='Disable embeddings')
search_parser.add_argument('--json', action='store_true', help='Output JSON')
|
ded6f29e
tangwang
补充suggestion模块
|
196
197
198
|
# Suggestion build command
suggest_build_parser = subparsers.add_parser(
'build-suggestions',
|
ff9efda0
tangwang
suggest
|
199
|
help='Build tenant suggestion index (full/incremental)'
|
ded6f29e
tangwang
补充suggestion模块
|
200
201
|
)
suggest_build_parser.add_argument('--tenant-id', required=True, help='Tenant ID')
|
86d8358b
tangwang
config optimize
|
202
|
suggest_build_parser.add_argument('--es-host', default=get_app_config().infrastructure.elasticsearch.host, help='Elasticsearch host')
|
ff9efda0
tangwang
suggest
|
203
204
205
206
207
208
|
suggest_build_parser.add_argument(
'--mode',
choices=['full', 'incremental'],
default='full',
help='Build mode: full rebuild or incremental update',
)
|
316c97c4
tangwang
feat: 完整落地多租户 sug...
|
209
|
suggest_build_parser.add_argument('--days', type=int, default=360, help='Query log lookback days')
|
ded6f29e
tangwang
补充suggestion模块
|
210
211
212
|
suggest_build_parser.add_argument('--batch-size', type=int, default=500, help='Product scan batch size')
suggest_build_parser.add_argument('--min-query-len', type=int, default=1, help='Minimum query length')
suggest_build_parser.add_argument(
|
ff9efda0
tangwang
suggest
|
213
214
215
216
217
218
219
220
221
222
223
224
|
'--publish-alias',
action=argparse.BooleanOptionalAction,
default=True,
help='For full mode: publish alias to new versioned index (default: true)',
)
suggest_build_parser.add_argument(
'--keep-versions',
type=int,
default=2,
help='For full mode: keep latest N versioned indices',
)
suggest_build_parser.add_argument(
|
ff9efda0
tangwang
suggest
|
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
|
'--incremental-fallback-days',
type=int,
default=7,
help='For incremental mode: default lookback days when no watermark',
)
suggest_build_parser.add_argument(
'--overlap-minutes',
type=int,
default=30,
help='For incremental mode: overlap window to avoid late-arrival misses',
)
suggest_build_parser.add_argument(
'--bootstrap-if-missing',
action=argparse.BooleanOptionalAction,
default=True,
help='For incremental mode: bootstrap with full build when active index is missing',
)
suggest_build_parser.add_argument(
'--bootstrap-days',
type=int,
default=30,
help='For incremental mode bootstrap full build: query log lookback days',
|
ded6f29e
tangwang
补充suggestion模块
|
247
248
|
)
|
be52af70
tangwang
first commit
|
249
250
251
252
253
254
255
|
args = parser.parse_args()
if not args.command:
parser.print_help()
return 1
# Execute command
|
d6606d7a
tangwang
清理旧代码,具体如下:
|
256
|
if args.command == 'serve':
|
be52af70
tangwang
first commit
|
257
|
return cmd_serve(args)
|
bb9c626c
tangwang
搜索服务(6002)不再初始化/挂...
|
258
259
|
elif args.command == 'serve-indexer':
return cmd_serve_indexer(args)
|
be52af70
tangwang
first commit
|
260
261
|
elif args.command == 'search':
return cmd_search(args)
|
ded6f29e
tangwang
补充suggestion模块
|
262
263
|
elif args.command == 'build-suggestions':
return cmd_build_suggestions(args)
|
be52af70
tangwang
first commit
|
264
265
266
267
268
269
270
|
else:
print(f"Unknown command: {args.command}")
return 1
if __name__ == "__main__":
sys.exit(main())
|