be52af70
tangwang
first commit
|
1
2
3
4
|
"""
Search API routes.
"""
|
16c42787
tangwang
feat: implement r...
|
5
|
from fastapi import APIRouter, HTTPException, Query, Request
|
be52af70
tangwang
first commit
|
6
|
from typing import Optional
|
16c42787
tangwang
feat: implement r...
|
7
|
import uuid
|
28e57bb1
tangwang
日志体系优化
|
8
9
10
|
import hashlib
import json
import logging
|
be52af70
tangwang
first commit
|
11
12
13
14
15
|
from ..models import (
SearchRequest,
ImageSearchRequest,
SearchResponse,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
16
|
SearchSuggestResponse,
|
be52af70
tangwang
first commit
|
17
18
19
|
DocumentResponse,
ErrorResponse
)
|
16c42787
tangwang
feat: implement r...
|
20
|
from context.request_context import create_request_context, set_current_request_context, clear_current_request_context
|
be52af70
tangwang
first commit
|
21
22
|
router = APIRouter(prefix="/search", tags=["search"])
|
28e57bb1
tangwang
日志体系优化
|
23
24
25
26
27
28
29
30
31
|
backend_verbose_logger = logging.getLogger("backend.verbose")
def _log_backend_verbose(payload: dict) -> None:
if not backend_verbose_logger.handlers:
return
backend_verbose_logger.info(
json.dumps(payload, ensure_ascii=False, separators=(",", ":"))
)
|
be52af70
tangwang
first commit
|
32
33
|
|
16c42787
tangwang
feat: implement r...
|
34
35
36
37
38
|
def extract_request_info(request: Request) -> tuple[str, str]:
"""Extract request ID and user ID from HTTP request"""
# Try to get request ID from headers
reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8]
|
99bea633
tangwang
add logs
|
39
40
|
# Try to get user ID from headers; if not found, use "-1" for correlation
uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1"
|
16c42787
tangwang
feat: implement r...
|
41
42
43
44
|
return reqid, uid
|
be52af70
tangwang
first commit
|
45
|
@router.post("/", response_model=SearchResponse)
|
16c42787
tangwang
feat: implement r...
|
46
|
async def search(request: SearchRequest, http_request: Request):
|
be52af70
tangwang
first commit
|
47
|
"""
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
48
|
Execute text search query (外部友好格式).
|
be52af70
tangwang
first commit
|
49
50
51
|
Supports:
- Multi-language query processing
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
52
|
- Unified text retrieval strategy (no boolean AST parsing)
|
be52af70
tangwang
first commit
|
53
54
|
- Semantic search with embeddings
- Custom ranking functions
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
55
56
|
- Exact match filters and range filters
- Faceted search
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
57
58
|
Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id).
|
be52af70
tangwang
first commit
|
59
|
"""
|
16c42787
tangwang
feat: implement r...
|
60
61
|
reqid, uid = extract_request_info(http_request)
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get('X-Tenant-ID')
if not tenant_id:
# Try to get from query string
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get('tenant_id', [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'"
)
|
16c42787
tangwang
feat: implement r...
|
78
79
80
81
82
|
# Create request context
context = create_request_context(reqid=reqid, uid=uid)
# Set context in thread-local storage
set_current_request_context(context)
|
be52af70
tangwang
first commit
|
83
84
|
try:
|
99bea633
tangwang
add logs
|
85
86
87
|
# Log request start (English logs, with key search parameters)
client_ip = http_request.client.host if http_request.client else "unknown"
user_agent = http_request.headers.get("User-Agent", "unknown")[:200]
|
16c42787
tangwang
feat: implement r...
|
88
|
context.logger.info(
|
99bea633
tangwang
add logs
|
89
90
91
92
93
94
95
96
97
98
|
"Received search request | "
f"Tenant: {tenant_id} | "
f"Query: {request.query} | "
f"IP: {client_ip} | "
f"User agent: {user_agent} | "
f"size: {request.size} | from: {request.from_} | "
f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | "
f"min_score: {request.min_score} | "
f"language: {request.language} | "
f"debug: {request.debug} | "
|
ff32d894
tangwang
rerank
|
99
100
101
|
f"enable_rerank: {request.enable_rerank} | "
f"rerank_query_template: {request.rerank_query_template} | "
f"rerank_doc_template: {request.rerank_doc_template} | "
|
99bea633
tangwang
add logs
|
102
103
104
105
|
f"sku_filter_dimension: {request.sku_filter_dimension} | "
f"filters: {request.filters} | "
f"range_filters: {request.range_filters} | "
f"facets: {request.facets}",
|
16c42787
tangwang
feat: implement r...
|
106
107
108
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
109
|
# Get searcher from app state
|
bb3c5ef8
tangwang
灌入数据流程跑通
|
110
|
from api.app import get_searcher
|
be52af70
tangwang
first commit
|
111
112
|
searcher = get_searcher()
|
16c42787
tangwang
feat: implement r...
|
113
|
# Execute search with context (using backend defaults from config)
|
be52af70
tangwang
first commit
|
114
115
|
result = searcher.search(
query=request.query,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
116
|
tenant_id=tenant_id,
|
be52af70
tangwang
first commit
|
117
118
119
|
size=request.size,
from_=request.from_,
filters=request.filters,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
120
121
|
range_filters=request.range_filters,
facets=request.facets,
|
16c42787
tangwang
feat: implement r...
|
122
|
min_score=request.min_score,
|
c86c8237
tangwang
支持聚合。过滤项补充了逻辑,但是有问题
|
123
|
context=context,
|
c86c8237
tangwang
支持聚合。过滤项补充了逻辑,但是有问题
|
124
|
sort_by=request.sort_by,
|
1f071951
tangwang
补充调试信息,记录包括各个阶段的 ...
|
125
|
sort_order=request.sort_order,
|
577ec972
tangwang
返回给前端的字段、格式适配。主要包...
|
126
127
|
debug=request.debug,
language=request.language,
|
ca91352a
tangwang
更新文档
|
128
|
sku_filter_dimension=request.sku_filter_dimension,
|
ff32d894
tangwang
rerank
|
129
130
131
|
enable_rerank=request.enable_rerank,
rerank_query_template=request.rerank_query_template,
rerank_doc_template=request.rerank_doc_template,
|
be52af70
tangwang
first commit
|
132
133
|
)
|
16c42787
tangwang
feat: implement r...
|
134
135
136
|
# Include performance summary in response
performance_summary = context.get_summary() if context else None
|
be52af70
tangwang
first commit
|
137
|
# Convert to response model
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
138
|
response = SearchResponse(
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
139
|
results=result.results,
|
be52af70
tangwang
first commit
|
140
141
142
|
total=result.total,
max_score=result.max_score,
took_ms=result.took_ms,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
143
|
facets=result.facets,
|
16c42787
tangwang
feat: implement r...
|
144
|
query_info=result.query_info,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
145
146
|
suggestions=result.suggestions,
related_searches=result.related_searches,
|
1f071951
tangwang
补充调试信息,记录包括各个阶段的 ...
|
147
148
|
performance_info=performance_summary,
debug_info=result.debug_info
|
be52af70
tangwang
first commit
|
149
150
|
)
|
28e57bb1
tangwang
日志体系优化
|
151
152
153
154
155
|
response_payload = response.model_dump(mode="json")
response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":"))
response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16]
max_score = float(response.max_score or 0.0)
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
156
|
context.logger.info(
|
28e57bb1
tangwang
日志体系优化
|
157
158
159
160
161
162
|
"Search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s",
response.total,
max_score,
response.took_ms,
len(response_json),
response_digest,
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
163
164
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
28e57bb1
tangwang
日志体系优化
|
165
166
167
168
169
170
171
172
173
174
175
176
|
_log_backend_verbose({
"event": "search_response",
"reqid": context.reqid,
"uid": context.uid,
"tenant_id": tenant_id,
"total_results": response.total,
"max_score": max_score,
"took_ms": response.took_ms,
"payload_size_chars": len(response_json),
"sha256_16": response_digest,
"response": response_payload,
})
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
177
178
179
|
return response
|
be52af70
tangwang
first commit
|
180
|
except Exception as e:
|
16c42787
tangwang
feat: implement r...
|
181
182
183
184
|
# Log error in context
if context:
context.set_error(e)
context.logger.error(
|
99bea633
tangwang
add logs
|
185
|
f"Search request failed | error: {str(e)}",
|
16c42787
tangwang
feat: implement r...
|
186
187
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
188
|
raise HTTPException(status_code=500, detail=str(e))
|
16c42787
tangwang
feat: implement r...
|
189
190
191
|
finally:
# Clear thread-local context
clear_current_request_context()
|
be52af70
tangwang
first commit
|
192
193
194
|
@router.post("/image", response_model=SearchResponse)
|
16c42787
tangwang
feat: implement r...
|
195
|
async def search_by_image(request: ImageSearchRequest, http_request: Request):
|
be52af70
tangwang
first commit
|
196
|
"""
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
197
|
Search by image similarity (外部友好格式).
|
be52af70
tangwang
first commit
|
198
199
|
Uses image embeddings to find visually similar products.
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
200
|
Supports exact match filters and range filters.
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
201
202
|
Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id).
|
be52af70
tangwang
first commit
|
203
|
"""
|
16c42787
tangwang
feat: implement r...
|
204
205
|
reqid, uid = extract_request_info(http_request)
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get('X-Tenant-ID')
if not tenant_id:
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get('tenant_id', [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'"
)
|
16c42787
tangwang
feat: implement r...
|
221
222
223
224
225
226
|
# Create request context
context = create_request_context(reqid=reqid, uid=uid)
# Set context in thread-local storage
set_current_request_context(context)
|
be52af70
tangwang
first commit
|
227
|
try:
|
99bea633
tangwang
add logs
|
228
229
|
# Log request start for image search (English)
client_ip = http_request.client.host if http_request.client else "unknown"
|
16c42787
tangwang
feat: implement r...
|
230
|
context.logger.info(
|
99bea633
tangwang
add logs
|
231
232
233
234
|
"Received image search request | "
f"Tenant: {tenant_id} | "
f"Image URL: {request.image_url} | "
f"IP: {client_ip}",
|
16c42787
tangwang
feat: implement r...
|
235
236
237
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
bb3c5ef8
tangwang
灌入数据流程跑通
|
238
|
from api.app import get_searcher
|
be52af70
tangwang
first commit
|
239
240
241
242
243
|
searcher = get_searcher()
# Execute image search
result = searcher.search_by_image(
image_url=request.image_url,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
244
|
tenant_id=tenant_id,
|
be52af70
tangwang
first commit
|
245
|
size=request.size,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
246
247
|
filters=request.filters,
range_filters=request.range_filters
|
be52af70
tangwang
first commit
|
248
249
|
)
|
16c42787
tangwang
feat: implement r...
|
250
251
252
|
# Include performance summary in response
performance_summary = context.get_summary() if context else None
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
253
|
response = SearchResponse(
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
254
|
results=result.results,
|
be52af70
tangwang
first commit
|
255
256
257
|
total=result.total,
max_score=result.max_score,
took_ms=result.took_ms,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
258
|
facets=result.facets,
|
16c42787
tangwang
feat: implement r...
|
259
|
query_info=result.query_info,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
260
261
|
suggestions=result.suggestions,
related_searches=result.related_searches,
|
16c42787
tangwang
feat: implement r...
|
262
|
performance_info=performance_summary
|
be52af70
tangwang
first commit
|
263
264
|
)
|
28e57bb1
tangwang
日志体系优化
|
265
266
267
268
269
|
response_payload = response.model_dump(mode="json")
response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":"))
response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16]
max_score = float(response.max_score or 0.0)
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
270
|
context.logger.info(
|
28e57bb1
tangwang
日志体系优化
|
271
272
273
274
275
276
|
"Image search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s",
response.total,
max_score,
response.took_ms,
len(response_json),
response_digest,
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
277
278
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
28e57bb1
tangwang
日志体系优化
|
279
280
281
282
283
284
285
286
287
288
289
290
|
_log_backend_verbose({
"event": "image_search_response",
"reqid": context.reqid,
"uid": context.uid,
"tenant_id": tenant_id,
"total_results": response.total,
"max_score": max_score,
"took_ms": response.took_ms,
"payload_size_chars": len(response_json),
"sha256_16": response_digest,
"response": response_payload,
})
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
291
292
293
|
return response
|
be52af70
tangwang
first commit
|
294
|
except ValueError as e:
|
16c42787
tangwang
feat: implement r...
|
295
296
297
|
if context:
context.set_error(e)
context.logger.error(
|
99bea633
tangwang
add logs
|
298
|
f"Image search request parameter error | error: {str(e)}",
|
16c42787
tangwang
feat: implement r...
|
299
300
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
301
302
|
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
|
16c42787
tangwang
feat: implement r...
|
303
304
305
|
if context:
context.set_error(e)
context.logger.error(
|
99bea633
tangwang
add logs
|
306
|
f"Image search request failed | error: {str(e)}",
|
16c42787
tangwang
feat: implement r...
|
307
308
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
309
|
raise HTTPException(status_code=500, detail=str(e))
|
16c42787
tangwang
feat: implement r...
|
310
311
312
|
finally:
# Clear thread-local context
clear_current_request_context()
|
be52af70
tangwang
first commit
|
313
314
|
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
315
316
317
|
@router.get("/suggestions", response_model=SearchSuggestResponse)
async def search_suggestions(
q: str = Query(..., min_length=1, description="搜索查询"),
|
ff9efda0
tangwang
suggest
|
318
|
size: int = Query(10, ge=1, le=50, description="建议数量(1-50)"),
|
ded6f29e
tangwang
补充suggestion模块
|
319
|
language: str = Query("en", description="请求语言,如 zh/en/ar/ru"),
|
ded6f29e
tangwang
补充suggestion模块
|
320
321
|
debug: bool = Query(False, description="是否返回调试信息"),
http_request: Request = None,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
322
323
324
325
|
):
"""
获取搜索建议(自动补全)。
|
ff9efda0
tangwang
suggest
|
326
|
获取搜索建议(自动补全,支持多语言)。
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
327
|
"""
|
ded6f29e
tangwang
补充suggestion模块
|
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get("X-Tenant-ID") if http_request else None
if not tenant_id and http_request:
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get("tenant_id", [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'",
)
try:
from api.app import get_suggestion_service
service = get_suggestion_service()
result = service.search(
tenant_id=tenant_id,
query=q,
language=language,
size=size,
|
ded6f29e
tangwang
补充suggestion模块
|
352
353
354
355
356
357
358
359
360
361
362
363
364
365
|
)
response = SearchSuggestResponse(
query=result["query"],
language=result.get("language"),
resolved_language=result.get("resolved_language"),
suggestions=result["suggestions"],
took_ms=result["took_ms"],
)
if debug:
# keep response_model stable; debug info stays inside suggestions payload for now
return response
return response
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
366
367
368
369
370
|
@router.get("/instant", response_model=SearchResponse)
async def instant_search(
q: str = Query(..., min_length=2, description="搜索查询"),
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
371
|
size: int = Query(5, ge=1, le=20, description="结果数量"),
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
372
373
374
375
376
377
378
379
|
):
"""
即时搜索(Instant Search)。
功能说明:
- 边输入边搜索,无需点击搜索按钮
- 返回简化的搜索结果
|
26b910bd
tangwang
refactor service ...
|
380
|
注意:此功能暂未开放,当前明确返回 501。
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
381
|
"""
|
26b910bd
tangwang
refactor service ...
|
382
383
384
385
386
387
388
|
# 明确暴露当前接口尚未完成实现,避免调用不完整逻辑导致隐式运行时错误。
raise HTTPException(
status_code=501,
detail=(
"/search/instant is not implemented yet. "
"Use POST /search/ for production traffic."
),
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
389
390
391
|
)
|
be52af70
tangwang
first commit
|
392
|
@router.get("/{doc_id}", response_model=DocumentResponse)
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
393
|
async def get_document(doc_id: str, http_request: Request):
|
be52af70
tangwang
first commit
|
394
395
|
"""
Get a single document by ID.
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
396
397
|
Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id).
|
be52af70
tangwang
first commit
|
398
399
|
"""
try:
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get('X-Tenant-ID')
if not tenant_id:
# Try to get from query string
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get('tenant_id', [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'"
)
|
bb3c5ef8
tangwang
灌入数据流程跑通
|
416
|
from api.app import get_searcher
|
be52af70
tangwang
first commit
|
417
418
|
searcher = get_searcher()
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
419
|
doc = searcher.get_document(tenant_id=tenant_id, doc_id=doc_id)
|
be52af70
tangwang
first commit
|
420
421
|
if doc is None:
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
422
|
raise HTTPException(status_code=404, detail=f"Document {doc_id} not found for tenant {tenant_id}")
|
be52af70
tangwang
first commit
|
423
424
425
426
427
428
429
|
return DocumentResponse(id=doc_id, source=doc)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|