be52af70
tangwang
first commit
|
1
2
3
4
|
"""
Search API routes.
"""
|
16c42787
tangwang
feat: implement r...
|
5
|
from fastapi import APIRouter, HTTPException, Query, Request
|
be52af70
tangwang
first commit
|
6
|
from typing import Optional
|
16c42787
tangwang
feat: implement r...
|
7
|
import uuid
|
be52af70
tangwang
first commit
|
8
9
10
11
12
|
from ..models import (
SearchRequest,
ImageSearchRequest,
SearchResponse,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
13
|
SearchSuggestResponse,
|
be52af70
tangwang
first commit
|
14
15
16
|
DocumentResponse,
ErrorResponse
)
|
16c42787
tangwang
feat: implement r...
|
17
|
from context.request_context import create_request_context, set_current_request_context, clear_current_request_context
|
be52af70
tangwang
first commit
|
18
19
20
21
|
router = APIRouter(prefix="/search", tags=["search"])
|
16c42787
tangwang
feat: implement r...
|
22
23
24
25
26
|
def extract_request_info(request: Request) -> tuple[str, str]:
"""Extract request ID and user ID from HTTP request"""
# Try to get request ID from headers
reqid = request.headers.get('X-Request-ID') or str(uuid.uuid4())[:8]
|
99bea633
tangwang
add logs
|
27
28
|
# Try to get user ID from headers; if not found, use "-1" for correlation
uid = request.headers.get('X-User-ID') or request.headers.get('User-ID') or "-1"
|
16c42787
tangwang
feat: implement r...
|
29
30
31
32
|
return reqid, uid
|
be52af70
tangwang
first commit
|
33
|
@router.post("/", response_model=SearchResponse)
|
16c42787
tangwang
feat: implement r...
|
34
|
async def search(request: SearchRequest, http_request: Request):
|
be52af70
tangwang
first commit
|
35
|
"""
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
36
|
Execute text search query (外部友好格式).
|
be52af70
tangwang
first commit
|
37
38
39
|
Supports:
- Multi-language query processing
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
40
|
- Unified text retrieval strategy (no boolean AST parsing)
|
be52af70
tangwang
first commit
|
41
42
|
- Semantic search with embeddings
- Custom ranking functions
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
43
44
|
- Exact match filters and range filters
- Faceted search
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
45
46
|
Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id).
|
be52af70
tangwang
first commit
|
47
|
"""
|
16c42787
tangwang
feat: implement r...
|
48
49
|
reqid, uid = extract_request_info(http_request)
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get('X-Tenant-ID')
if not tenant_id:
# Try to get from query string
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get('tenant_id', [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'"
)
|
16c42787
tangwang
feat: implement r...
|
66
67
68
69
70
|
# Create request context
context = create_request_context(reqid=reqid, uid=uid)
# Set context in thread-local storage
set_current_request_context(context)
|
be52af70
tangwang
first commit
|
71
72
|
try:
|
99bea633
tangwang
add logs
|
73
74
75
|
# Log request start (English logs, with key search parameters)
client_ip = http_request.client.host if http_request.client else "unknown"
user_agent = http_request.headers.get("User-Agent", "unknown")[:200]
|
16c42787
tangwang
feat: implement r...
|
76
|
context.logger.info(
|
99bea633
tangwang
add logs
|
77
78
79
80
81
82
83
84
85
86
|
"Received search request | "
f"Tenant: {tenant_id} | "
f"Query: {request.query} | "
f"IP: {client_ip} | "
f"User agent: {user_agent} | "
f"size: {request.size} | from: {request.from_} | "
f"sort_by: {request.sort_by} | sort_order: {request.sort_order} | "
f"min_score: {request.min_score} | "
f"language: {request.language} | "
f"debug: {request.debug} | "
|
ff32d894
tangwang
rerank
|
87
88
89
|
f"enable_rerank: {request.enable_rerank} | "
f"rerank_query_template: {request.rerank_query_template} | "
f"rerank_doc_template: {request.rerank_doc_template} | "
|
99bea633
tangwang
add logs
|
90
91
92
93
|
f"sku_filter_dimension: {request.sku_filter_dimension} | "
f"filters: {request.filters} | "
f"range_filters: {request.range_filters} | "
f"facets: {request.facets}",
|
16c42787
tangwang
feat: implement r...
|
94
95
96
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
97
|
# Get searcher from app state
|
bb3c5ef8
tangwang
灌入数据流程跑通
|
98
|
from api.app import get_searcher
|
be52af70
tangwang
first commit
|
99
100
|
searcher = get_searcher()
|
16c42787
tangwang
feat: implement r...
|
101
|
# Execute search with context (using backend defaults from config)
|
be52af70
tangwang
first commit
|
102
103
|
result = searcher.search(
query=request.query,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
104
|
tenant_id=tenant_id,
|
be52af70
tangwang
first commit
|
105
106
107
|
size=request.size,
from_=request.from_,
filters=request.filters,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
108
109
|
range_filters=request.range_filters,
facets=request.facets,
|
16c42787
tangwang
feat: implement r...
|
110
|
min_score=request.min_score,
|
c86c8237
tangwang
支持聚合。过滤项补充了逻辑,但是有问题
|
111
|
context=context,
|
c86c8237
tangwang
支持聚合。过滤项补充了逻辑,但是有问题
|
112
|
sort_by=request.sort_by,
|
1f071951
tangwang
补充调试信息,记录包括各个阶段的 ...
|
113
|
sort_order=request.sort_order,
|
577ec972
tangwang
返回给前端的字段、格式适配。主要包...
|
114
115
|
debug=request.debug,
language=request.language,
|
ca91352a
tangwang
更新文档
|
116
|
sku_filter_dimension=request.sku_filter_dimension,
|
ff32d894
tangwang
rerank
|
117
118
119
|
enable_rerank=request.enable_rerank,
rerank_query_template=request.rerank_query_template,
rerank_doc_template=request.rerank_doc_template,
|
be52af70
tangwang
first commit
|
120
121
|
)
|
16c42787
tangwang
feat: implement r...
|
122
123
124
|
# Include performance summary in response
performance_summary = context.get_summary() if context else None
|
be52af70
tangwang
first commit
|
125
|
# Convert to response model
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
126
|
response = SearchResponse(
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
127
|
results=result.results,
|
be52af70
tangwang
first commit
|
128
129
130
|
total=result.total,
max_score=result.max_score,
took_ms=result.took_ms,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
131
|
facets=result.facets,
|
16c42787
tangwang
feat: implement r...
|
132
|
query_info=result.query_info,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
133
134
|
suggestions=result.suggestions,
related_searches=result.related_searches,
|
1f071951
tangwang
补充调试信息,记录包括各个阶段的 ...
|
135
136
|
performance_info=performance_summary,
debug_info=result.debug_info
|
be52af70
tangwang
first commit
|
137
138
|
)
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
139
140
141
142
143
144
145
146
147
148
149
150
|
# Log complete response JSON
context.logger.info(
"Search response | "
f"Total results: {response.total} | "
f"Max score: {response.max_score:.4f} | "
f"Time: {response.took_ms}ms | "
f"Response: {response.model_dump_json()}",
extra={'reqid': context.reqid, 'uid': context.uid}
)
return response
|
be52af70
tangwang
first commit
|
151
|
except Exception as e:
|
16c42787
tangwang
feat: implement r...
|
152
153
154
155
|
# Log error in context
if context:
context.set_error(e)
context.logger.error(
|
99bea633
tangwang
add logs
|
156
|
f"Search request failed | error: {str(e)}",
|
16c42787
tangwang
feat: implement r...
|
157
158
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
159
|
raise HTTPException(status_code=500, detail=str(e))
|
16c42787
tangwang
feat: implement r...
|
160
161
162
|
finally:
# Clear thread-local context
clear_current_request_context()
|
be52af70
tangwang
first commit
|
163
164
165
|
@router.post("/image", response_model=SearchResponse)
|
16c42787
tangwang
feat: implement r...
|
166
|
async def search_by_image(request: ImageSearchRequest, http_request: Request):
|
be52af70
tangwang
first commit
|
167
|
"""
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
168
|
Search by image similarity (外部友好格式).
|
be52af70
tangwang
first commit
|
169
170
|
Uses image embeddings to find visually similar products.
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
171
|
Supports exact match filters and range filters.
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
172
173
|
Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id).
|
be52af70
tangwang
first commit
|
174
|
"""
|
16c42787
tangwang
feat: implement r...
|
175
176
|
reqid, uid = extract_request_info(http_request)
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get('X-Tenant-ID')
if not tenant_id:
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get('tenant_id', [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'"
)
|
16c42787
tangwang
feat: implement r...
|
192
193
194
195
196
197
|
# Create request context
context = create_request_context(reqid=reqid, uid=uid)
# Set context in thread-local storage
set_current_request_context(context)
|
be52af70
tangwang
first commit
|
198
|
try:
|
99bea633
tangwang
add logs
|
199
200
|
# Log request start for image search (English)
client_ip = http_request.client.host if http_request.client else "unknown"
|
16c42787
tangwang
feat: implement r...
|
201
|
context.logger.info(
|
99bea633
tangwang
add logs
|
202
203
204
205
|
"Received image search request | "
f"Tenant: {tenant_id} | "
f"Image URL: {request.image_url} | "
f"IP: {client_ip}",
|
16c42787
tangwang
feat: implement r...
|
206
207
208
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
bb3c5ef8
tangwang
灌入数据流程跑通
|
209
|
from api.app import get_searcher
|
be52af70
tangwang
first commit
|
210
211
212
213
214
|
searcher = get_searcher()
# Execute image search
result = searcher.search_by_image(
image_url=request.image_url,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
215
|
tenant_id=tenant_id,
|
be52af70
tangwang
first commit
|
216
|
size=request.size,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
217
218
|
filters=request.filters,
range_filters=request.range_filters
|
be52af70
tangwang
first commit
|
219
220
|
)
|
16c42787
tangwang
feat: implement r...
|
221
222
223
|
# Include performance summary in response
performance_summary = context.get_summary() if context else None
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
224
|
response = SearchResponse(
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
225
|
results=result.results,
|
be52af70
tangwang
first commit
|
226
227
228
|
total=result.total,
max_score=result.max_score,
took_ms=result.took_ms,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
229
|
facets=result.facets,
|
16c42787
tangwang
feat: implement r...
|
230
|
query_info=result.query_info,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
231
232
|
suggestions=result.suggestions,
related_searches=result.related_searches,
|
16c42787
tangwang
feat: implement r...
|
233
|
performance_info=performance_summary
|
be52af70
tangwang
first commit
|
234
235
|
)
|
3cd09b3b
tangwang
翻译接口改为调用qwen-mt-f...
|
236
237
238
239
240
241
242
243
244
245
246
247
|
# Log complete response JSON
context.logger.info(
"Image search response | "
f"Total results: {response.total} | "
f"Max score: {response.max_score:.4f} | "
f"Time: {response.took_ms}ms | "
f"Response: {response.model_dump_json()}",
extra={'reqid': context.reqid, 'uid': context.uid}
)
return response
|
be52af70
tangwang
first commit
|
248
|
except ValueError as e:
|
16c42787
tangwang
feat: implement r...
|
249
250
251
|
if context:
context.set_error(e)
context.logger.error(
|
99bea633
tangwang
add logs
|
252
|
f"Image search request parameter error | error: {str(e)}",
|
16c42787
tangwang
feat: implement r...
|
253
254
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
255
256
|
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
|
16c42787
tangwang
feat: implement r...
|
257
258
259
|
if context:
context.set_error(e)
context.logger.error(
|
99bea633
tangwang
add logs
|
260
|
f"Image search request failed | error: {str(e)}",
|
16c42787
tangwang
feat: implement r...
|
261
262
|
extra={'reqid': context.reqid, 'uid': context.uid}
)
|
be52af70
tangwang
first commit
|
263
|
raise HTTPException(status_code=500, detail=str(e))
|
16c42787
tangwang
feat: implement r...
|
264
265
266
|
finally:
# Clear thread-local context
clear_current_request_context()
|
be52af70
tangwang
first commit
|
267
268
|
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
269
270
271
|
@router.get("/suggestions", response_model=SearchSuggestResponse)
async def search_suggestions(
q: str = Query(..., min_length=1, description="搜索查询"),
|
ff9efda0
tangwang
suggest
|
272
|
size: int = Query(10, ge=1, le=50, description="建议数量(1-50)"),
|
ded6f29e
tangwang
补充suggestion模块
|
273
|
language: str = Query("en", description="请求语言,如 zh/en/ar/ru"),
|
ded6f29e
tangwang
补充suggestion模块
|
274
275
|
debug: bool = Query(False, description="是否返回调试信息"),
http_request: Request = None,
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
276
277
278
279
|
):
"""
获取搜索建议(自动补全)。
|
ff9efda0
tangwang
suggest
|
280
|
获取搜索建议(自动补全,支持多语言)。
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
281
|
"""
|
ded6f29e
tangwang
补充suggestion模块
|
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get("X-Tenant-ID") if http_request else None
if not tenant_id and http_request:
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get("tenant_id", [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'",
)
try:
from api.app import get_suggestion_service
service = get_suggestion_service()
result = service.search(
tenant_id=tenant_id,
query=q,
language=language,
size=size,
|
ded6f29e
tangwang
补充suggestion模块
|
306
307
308
309
310
311
312
313
314
315
316
317
318
319
|
)
response = SearchSuggestResponse(
query=result["query"],
language=result.get("language"),
resolved_language=result.get("resolved_language"),
suggestions=result["suggestions"],
took_ms=result["took_ms"],
)
if debug:
# keep response_model stable; debug info stays inside suggestions payload for now
return response
return response
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
320
321
322
323
324
|
@router.get("/instant", response_model=SearchResponse)
async def instant_search(
q: str = Query(..., min_length=2, description="搜索查询"),
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
325
|
size: int = Query(5, ge=1, le=20, description="结果数量"),
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
326
327
328
329
330
331
332
333
|
):
"""
即时搜索(Instant Search)。
功能说明:
- 边输入边搜索,无需点击搜索按钮
- 返回简化的搜索结果
|
26b910bd
tangwang
refactor service ...
|
334
|
注意:此功能暂未开放,当前明确返回 501。
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
335
|
"""
|
26b910bd
tangwang
refactor service ...
|
336
337
338
339
340
341
342
|
# 明确暴露当前接口尚未完成实现,避免调用不完整逻辑导致隐式运行时错误。
raise HTTPException(
status_code=501,
detail=(
"/search/instant is not implemented yet. "
"Use POST /search/ for production traffic."
),
|
6aa246be
tangwang
问题:Pydantic 应该能自动...
|
343
344
345
|
)
|
be52af70
tangwang
first commit
|
346
|
@router.get("/{doc_id}", response_model=DocumentResponse)
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
347
|
async def get_document(doc_id: str, http_request: Request):
|
be52af70
tangwang
first commit
|
348
349
|
"""
Get a single document by ID.
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
350
351
|
Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id).
|
be52af70
tangwang
first commit
|
352
353
|
"""
try:
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
|
# Extract tenant_id (required)
tenant_id = http_request.headers.get('X-Tenant-ID')
if not tenant_id:
# Try to get from query string
from urllib.parse import parse_qs
query_string = http_request.url.query
if query_string:
params = parse_qs(query_string)
tenant_id = params.get('tenant_id', [None])[0]
if not tenant_id:
raise HTTPException(
status_code=400,
detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'"
)
|
bb3c5ef8
tangwang
灌入数据流程跑通
|
370
|
from api.app import get_searcher
|
be52af70
tangwang
first commit
|
371
372
|
searcher = get_searcher()
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
373
|
doc = searcher.get_document(tenant_id=tenant_id, doc_id=doc_id)
|
be52af70
tangwang
first commit
|
374
375
|
if doc is None:
|
e4a39cc8
tangwang
索引隔离。 不同的tenant_i...
|
376
|
raise HTTPException(status_code=404, detail=f"Document {doc_id} not found for tenant {tenant_id}")
|
be52af70
tangwang
first commit
|
377
378
379
380
381
382
383
|
return DocumentResponse(id=doc_id, source=doc)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|