Commit 985752f51220c83297e5748b210cc55d1531243a

Authored by tangwang
1 parent f201859e

1. 前端调试功能

2. 翻译限速 对应处理(qwen-mt限速)
api/routes/search.py
@@ -18,6 +18,7 @@ from ..models import ( @@ -18,6 +18,7 @@ from ..models import (
18 ErrorResponse 18 ErrorResponse
19 ) 19 )
20 from context.request_context import create_request_context, set_current_request_context, clear_current_request_context 20 from context.request_context import create_request_context, set_current_request_context, clear_current_request_context
  21 +from indexer.mapping_generator import get_tenant_index_name
21 22
22 router = APIRouter(prefix="/search", tags=["search"]) 23 router = APIRouter(prefix="/search", tags=["search"])
23 backend_verbose_logger = logging.getLogger("backend.verbose") 24 backend_verbose_logger = logging.getLogger("backend.verbose")
@@ -437,3 +438,57 @@ async def get_document(doc_id: str, http_request: Request): @@ -437,3 +438,57 @@ async def get_document(doc_id: str, http_request: Request):
437 raise 438 raise
438 except Exception as e: 439 except Exception as e:
439 raise HTTPException(status_code=500, detail=str(e)) 440 raise HTTPException(status_code=500, detail=str(e))
  441 +
  442 +
  443 +@router.get("/es-doc/{spu_id}")
  444 +async def get_es_raw_document(spu_id: str, http_request: Request):
  445 + """
  446 + Get raw Elasticsearch document(s) for a given SPU ID.
  447 +
  448 + This is intended for debugging in the test frontend:
  449 + it queries the tenant-specific ES index with a term filter on spu_id
  450 + and returns the raw ES search response.
  451 + """
  452 + # Extract tenant_id (required)
  453 + tenant_id = http_request.headers.get("X-Tenant-ID")
  454 + if not tenant_id:
  455 + from urllib.parse import parse_qs
  456 + query_string = http_request.url.query
  457 + if query_string:
  458 + params = parse_qs(query_string)
  459 + tenant_id = params.get("tenant_id", [None])[0]
  460 +
  461 + if not tenant_id:
  462 + raise HTTPException(
  463 + status_code=400,
  464 + detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'",
  465 + )
  466 +
  467 + try:
  468 + from api.app import get_searcher
  469 +
  470 + searcher = get_searcher()
  471 + es_client = searcher.es_client
  472 + index_name = get_tenant_index_name(tenant_id)
  473 +
  474 + body = {
  475 + "size": 5,
  476 + "query": {
  477 + "bool": {
  478 + "filter": [
  479 + {
  480 + "term": {
  481 + "spu_id": spu_id,
  482 + }
  483 + }
  484 + ]
  485 + }
  486 + },
  487 + }
  488 +
  489 + es_response = es_client.search(index_name=index_name, body=body, size=5, from_=0)
  490 + return es_response
  491 + except HTTPException:
  492 + raise
  493 + except Exception as e:
  494 + raise HTTPException(status_code=500, detail=str(e))
docs/翻译模块说明.md
@@ -15,6 +15,12 @@ DEEPL_AUTH_KEY=xxx @@ -15,6 +15,12 @@ DEEPL_AUTH_KEY=xxx
15 TRANSLATION_MODEL=qwen # 或 deepl 15 TRANSLATION_MODEL=qwen # 或 deepl
16 ``` 16 ```
17 17
  18 +> **重要限速说明(Qwen 机翻)**
  19 +> 当前默认的 Qwen 翻译后端使用 `qwen-mt-flash` 云端模型,**官方限速较低,约 RPM=60(每分钟约 60 请求)**。
  20 +> - 推荐通过 Redis 翻译缓存复用结果,避免对相同文本重复打云端
  21 +> - 高并发场景需要在调用端做限流 / 去抖,或改为离线批量翻译
  22 +> - 如需更高吞吐,可考虑 DeepL 或自建翻译服务
  23 +
18 ## Provider 配置 24 ## Provider 配置
19 25
20 Provider 与 URL 在 `config/config.yaml` 的 `services.translation`。详见 [QUICKSTART.md](./QUICKSTART.md) §3 与 [DEVELOPER_GUIDE.md](./DEVELOPER_GUIDE.md) §7.2。 26 Provider 与 URL 在 `config/config.yaml` 的 `services.translation`。详见 [QUICKSTART.md](./QUICKSTART.md) §3 与 [DEVELOPER_GUIDE.md](./DEVELOPER_GUIDE.md) §7.2。
frontend/static/css/style.css
@@ -322,9 +322,9 @@ body { @@ -322,9 +322,9 @@ body {
322 322
323 /* Product Grid */ 323 /* Product Grid */
324 .product-grid { 324 .product-grid {
325 - display: grid;  
326 - grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));  
327 - gap: 20px; 325 + display: flex;
  326 + flex-direction: column;
  327 + gap: 16px;
328 padding: 30px; 328 padding: 30px;
329 background: #f8f8f8; 329 background: #f8f8f8;
330 min-height: 400px; 330 min-height: 400px;
@@ -336,9 +336,51 @@ body { @@ -336,9 +336,51 @@ body {
336 border-radius: 8px; 336 border-radius: 8px;
337 padding: 15px; 337 padding: 15px;
338 transition: all 0.3s; 338 transition: all 0.3s;
339 - cursor: pointer; 339 + cursor: default;
  340 + display: flex;
  341 + flex-direction: row;
  342 + align-items: flex-start;
  343 + gap: 16px;
  344 +}
  345 +
  346 +.product-main {
340 display: flex; 347 display: flex;
341 flex-direction: column; 348 flex-direction: column;
  349 + width: 260px;
  350 + flex-shrink: 0;
  351 +}
  352 +
  353 +.product-debug {
  354 + flex: 1;
  355 + font-family: Menlo, Consolas, "Courier New", monospace;
  356 + font-size: 12px;
  357 + color: #555;
  358 + border-left: 1px dashed #eee;
  359 + padding-left: 12px;
  360 + max-height: 260px;
  361 + overflow: auto;
  362 +}
  363 +
  364 +.product-debug-title {
  365 + font-weight: 600;
  366 + margin-bottom: 6px;
  367 + color: #333;
  368 +}
  369 +
  370 +.product-debug-line {
  371 + margin-bottom: 2px;
  372 +}
  373 +
  374 +.product-debug-link {
  375 + display: inline-block;
  376 + margin-top: 6px;
  377 + font-size: 12px;
  378 + color: #e67e22;
  379 + text-decoration: none;
  380 +}
  381 +
  382 +.product-debug-link:hover {
  383 + text-decoration: underline;
342 } 384 }
343 385
344 .product-card:hover { 386 .product-card:hover {
@@ -347,7 +389,7 @@ body { @@ -347,7 +389,7 @@ body {
347 } 389 }
348 390
349 .product-image-wrapper { 391 .product-image-wrapper {
350 - width: 100%; 392 + width: 220px;
351 height: 180px; 393 height: 180px;
352 display: flex; 394 display: flex;
353 align-items: center; 395 align-items: center;
frontend/static/js/app.js
@@ -287,7 +287,8 @@ async function performSearch(page = 1) { @@ -287,7 +287,8 @@ async function performSearch(page = 1) {
287 sort_by: state.sortBy || null, 287 sort_by: state.sortBy || null,
288 sort_order: state.sortOrder, 288 sort_order: state.sortOrder,
289 sku_filter_dimension: skuFilterDimension, 289 sku_filter_dimension: skuFilterDimension,
290 - debug: state.debug 290 + // 测试前端始终开启后端调试信息
  291 + debug: true
291 }) 292 })
292 }); 293 });
293 294
@@ -336,7 +337,19 @@ function displayResults(data) { @@ -336,7 +337,19 @@ function displayResults(data) {
336 } 337 }
337 338
338 let html = ''; 339 let html = '';
339 - 340 +
  341 + // Build per-SPU debug lookup from debug_info.per_result (if present)
  342 + let perResultDebugBySpu = {};
  343 + if (state.debug && data.debug_info && Array.isArray(data.debug_info.per_result)) {
  344 + data.debug_info.per_result.forEach((item) => {
  345 + if (item && item.spu_id) {
  346 + perResultDebugBySpu[String(item.spu_id)] = item;
  347 + }
  348 + });
  349 + }
  350 +
  351 + const tenantId = getTenantId();
  352 +
340 data.results.forEach((result) => { 353 data.results.forEach((result) => {
341 const product = result; 354 const product = result;
342 const title = product.title || product.name || 'N/A'; 355 const title = product.title || product.name || 'N/A';
@@ -344,32 +357,71 @@ function displayResults(data) { @@ -344,32 +357,71 @@ function displayResults(data) {
344 const imageUrl = product.image_url || product.imageUrl || ''; 357 const imageUrl = product.image_url || product.imageUrl || '';
345 const category = product.category || product.categoryName || ''; 358 const category = product.category || product.categoryName || '';
346 const vendor = product.vendor || product.brandName || ''; 359 const vendor = product.vendor || product.brandName || '';
  360 + const spuId = product.spu_id || '';
  361 + const debug = spuId ? perResultDebugBySpu[String(spuId)] : null;
  362 +
  363 + let debugHtml = '';
  364 + if (debug) {
  365 + const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? '');
  366 + const esNorm = typeof debug.es_score_normalized === 'number'
  367 + ? debug.es_score_normalized.toFixed(4)
  368 + : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
  369 +
  370 + // Build multilingual title info
  371 + let titleLines = '';
  372 + if (debug.title_multilingual && typeof debug.title_multilingual === 'object') {
  373 + Object.entries(debug.title_multilingual).forEach(([lang, val]) => {
  374 + if (val) {
  375 + titleLines += `<div class="product-debug-line">title.${escapeHtml(String(lang))}: ${escapeHtml(String(val))}</div>`;
  376 + }
  377 + });
  378 + }
  379 +
  380 + const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
  381 +
  382 + debugHtml = `
  383 + <div class="product-debug">
  384 + <div class="product-debug-title">Ranking Debug</div>
  385 + <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div>
  386 + <div class="product-debug-line">es_id: ${escapeHtml(String(debug.es_id || ''))}</div>
  387 + <div class="product-debug-line">ES score: ${esScore}</div>
  388 + <div class="product-debug-line">ES normalized: ${esNorm}</div>
  389 + ${titleLines}
  390 + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">
  391 + 查看 ES 原始文档
  392 + </a>
  393 + </div>
  394 + `;
  395 + }
347 396
348 html += ` 397 html += `
349 <div class="product-card"> 398 <div class="product-card">
350 - <div class="product-image-wrapper">  
351 - ${imageUrl ? `  
352 - <img src="${escapeHtml(imageUrl)}"  
353 - alt="${escapeHtml(title)}"  
354 - class="product-image"  
355 - onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'">  
356 - ` : `  
357 - <div style="color: #ccc; font-size: 14px;">No Image</div>  
358 - `}  
359 - </div>  
360 -  
361 - <div class="product-price">  
362 - ${price !== 'N/A' ? `¥${price}` : 'N/A'}  
363 - </div>  
364 -  
365 - <div class="product-title">  
366 - ${escapeHtml(title)}  
367 - </div>  
368 -  
369 - <div class="product-meta">  
370 - ${category ? escapeHtml(category) : ''}  
371 - ${vendor ? ' | ' + escapeHtml(vendor) : ''} 399 + <div class="product-main">
  400 + <div class="product-image-wrapper">
  401 + ${imageUrl ? `
  402 + <img src="${escapeHtml(imageUrl)}"
  403 + alt="${escapeHtml(title)}"
  404 + class="product-image"
  405 + onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'">
  406 + ` : `
  407 + <div style="color: #ccc; font-size: 14px;">No Image</div>
  408 + `}
  409 + </div>
  410 +
  411 + <div class="product-price">
  412 + ${price !== 'N/A' ? `¥${price}` : 'N/A'}
  413 + </div>
  414 +
  415 + <div class="product-title">
  416 + ${escapeHtml(title)}
  417 + </div>
  418 +
  419 + <div class="product-meta">
  420 + ${category ? escapeHtml(category) : ''}
  421 + ${vendor ? ' | ' + escapeHtml(vendor) : ''}
  422 + </div>
372 </div> 423 </div>
  424 + ${debugHtml}
373 </div> 425 </div>
374 `; 426 `;
375 }); 427 });
providers/translation.py
@@ -1,170 +0,0 @@ @@ -1,170 +0,0 @@
1 -"""  
2 -Translation provider - direct (in-process) or HTTP service.  
3 -"""  
4 -  
5 -from __future__ import annotations  
6 -  
7 -import logging  
8 -from typing import Any, Dict, List, Optional, Union  
9 -  
10 -from concurrent.futures import Future, ThreadPoolExecutor  
11 -import requests  
12 -  
13 -from config.services_config import get_translation_config, get_translation_base_url  
14 -  
15 -logger = logging.getLogger(__name__)  
16 -  
17 -  
18 -class HttpTranslationProvider:  
19 - """Translation via HTTP service."""  
20 -  
21 - def __init__(  
22 - self,  
23 - base_url: str,  
24 - model: str = "qwen",  
25 - timeout_sec: float = 10.0,  
26 - translation_context: Optional[str] = None,  
27 - ):  
28 - self.base_url = (base_url or "").rstrip("/")  
29 - self.model = model or "qwen"  
30 - self.timeout_sec = float(timeout_sec or 10.0)  
31 - self.translation_context = translation_context or "e-commerce product search"  
32 - self.executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="http-translator")  
33 -  
34 - def _translate_once(  
35 - self,  
36 - text: str,  
37 - target_lang: str,  
38 - source_lang: Optional[str] = None,  
39 - ) -> Optional[str]:  
40 - if not text or not str(text).strip():  
41 - return text  
42 - try:  
43 - url = f"{self.base_url}/translate"  
44 - payload = {  
45 - "text": text,  
46 - "target_lang": target_lang,  
47 - "source_lang": source_lang or "auto",  
48 - "model": self.model,  
49 - }  
50 - response = requests.post(url, json=payload, timeout=self.timeout_sec)  
51 - if response.status_code != 200:  
52 - logger.warning(  
53 - "HTTP translator failed: status=%s body=%s",  
54 - response.status_code,  
55 - (response.text or "")[:200],  
56 - )  
57 - return None  
58 - data = response.json()  
59 - translated = data.get("translated_text")  
60 - return translated if translated is not None else None  
61 - except Exception as exc:  
62 - logger.warning("HTTP translator request failed: %s", exc, exc_info=True)  
63 - return None  
64 -  
65 - def translate(  
66 - self,  
67 - text: str,  
68 - target_lang: str,  
69 - source_lang: Optional[str] = None,  
70 - context: Optional[str] = None,  
71 - prompt: Optional[str] = None,  
72 - ) -> Optional[str]:  
73 - del context, prompt  
74 - result = self._translate_once(text=text, target_lang=target_lang, source_lang=source_lang)  
75 - return result if result is not None else text  
76 -  
77 - def translate_multi(  
78 - self,  
79 - text: str,  
80 - target_langs: List[str],  
81 - source_lang: Optional[str] = None,  
82 - context: Optional[str] = None,  
83 - async_mode: bool = True,  
84 - prompt: Optional[str] = None,  
85 - ) -> Dict[str, Optional[str]]:  
86 - del context, async_mode, prompt  
87 - out: Dict[str, Optional[str]] = {}  
88 - for lang in target_langs:  
89 - out[lang] = self.translate(text, lang, source_lang=source_lang)  
90 - return out  
91 -  
92 - def translate_multi_async(  
93 - self,  
94 - text: str,  
95 - target_langs: List[str],  
96 - source_lang: Optional[str] = None,  
97 - context: Optional[str] = None,  
98 - prompt: Optional[str] = None,  
99 - ) -> Dict[str, Union[str, Future]]:  
100 - del context, prompt  
101 - out: Dict[str, Union[str, Future]] = {}  
102 - for lang in target_langs:  
103 - out[lang] = self.executor.submit(self.translate, text, lang, source_lang)  
104 - return out  
105 -  
106 - def translate_for_indexing(  
107 - self,  
108 - text: str,  
109 - shop_language: str,  
110 - source_lang: Optional[str] = None,  
111 - context: Optional[str] = None,  
112 - prompt: Optional[str] = None,  
113 - index_languages: Optional[List[str]] = None,  
114 - ) -> Dict[str, Optional[str]]:  
115 - del context, prompt  
116 - langs = index_languages if index_languages else ["en", "zh"]  
117 - source = source_lang or shop_language or "auto"  
118 - out: Dict[str, Optional[str]] = {}  
119 - for lang in langs:  
120 - if lang == shop_language:  
121 - out[lang] = text  
122 - else:  
123 - out[lang] = self.translate(text, target_lang=lang, source_lang=source)  
124 - return out  
125 -  
126 -  
127 -def create_translation_provider(query_config: Any = None) -> Any:  
128 - """  
129 - Create translation provider from services config.  
130 -  
131 - query_config: optional, for api_key/glossary_id/context (used by direct provider).  
132 - """  
133 - cfg = get_translation_config()  
134 - provider = cfg.provider  
135 - pc = cfg.get_provider_cfg()  
136 -  
137 - if provider in ("direct", "local", "inprocess"):  
138 - from query.translator import Translator  
139 - model = pc.get("model") or "qwen"  
140 - qc = query_config or _empty_query_config()  
141 - return Translator(  
142 - model=model,  
143 - api_key=getattr(qc, "translation_api_key", None),  
144 - use_cache=True,  
145 - glossary_id=getattr(qc, "translation_glossary_id", None),  
146 - translation_context=getattr(qc, "translation_context", "e-commerce product search"),  
147 - )  
148 -  
149 - if provider in ("http", "service"):  
150 - base_url = get_translation_base_url()  
151 - model = pc.get("model") or "qwen"  
152 - timeout = pc.get("timeout_sec", 10.0)  
153 - qc = query_config or _empty_query_config()  
154 - return HttpTranslationProvider(  
155 - base_url=base_url,  
156 - model=model,  
157 - timeout_sec=float(timeout),  
158 - translation_context=getattr(qc, "translation_context", "e-commerce product search"),  
159 - )  
160 -  
161 - raise ValueError(f"Unsupported translation provider: {provider}")  
162 -  
163 -  
164 -def _empty_query_config() -> Any:  
165 - """Minimal object with default translation attrs."""  
166 - class _QC:  
167 - translation_api_key = None  
168 - translation_glossary_id = None  
169 - translation_context = "e-commerce product search"  
170 - return _QC()  
query/llm_translate.py 0 → 100644
query/translator.py
@@ -5,6 +5,11 @@ Supports multiple translation models: @@ -5,6 +5,11 @@ Supports multiple translation models:
5 - Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model 5 - Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model
6 - DeepL: DeepL API for high-quality translations 6 - DeepL: DeepL API for high-quality translations
7 7
  8 +重要说明(Qwen 机翻限速):
  9 +- 当前默认使用的 `qwen-mt-flash` 为云端机翻模型,**官方限速较低,约 RPM=60(每分钟约 60 请求)**
  10 +- 在高并发场景必须依赖 Redis 翻译缓存与批量预热,避免在用户实时请求路径上直接打满 DashScope 限流
  11 +- 若业务侧存在大规模离线翻译或更高吞吐需求,建议评估 DeepL 或自建翻译后端
  12 +
8 使用方法 (Usage): 13 使用方法 (Usage):
9 14
10 ```python 15 ```python
reranker/backends/batching_utils.py deleted
@@ -1,41 +0,0 @@ @@ -1,41 +0,0 @@
1 -"""Utilities for reranker batching and deduplication."""  
2 -  
3 -from __future__ import annotations  
4 -  
5 -from typing import Iterable, List, Sequence, Tuple  
6 -  
7 -  
8 -def deduplicate_with_positions(texts: Sequence[str]) -> Tuple[List[str], List[int]]:  
9 - """  
10 - Deduplicate texts globally while preserving first-seen order.  
11 -  
12 - Returns:  
13 - unique_texts: deduplicated texts in first-seen order  
14 - position_to_unique: mapping from each original position to unique index  
15 - """  
16 - unique_texts: List[str] = []  
17 - position_to_unique: List[int] = []  
18 - seen: dict[str, int] = {}  
19 -  
20 - for text in texts:  
21 - idx = seen.get(text)  
22 - if idx is None:  
23 - idx = len(unique_texts)  
24 - seen[text] = idx  
25 - unique_texts.append(text)  
26 - position_to_unique.append(idx)  
27 -  
28 - return unique_texts, position_to_unique  
29 -  
30 -  
31 -def sort_indices_by_length(lengths: Sequence[int]) -> List[int]:  
32 - """Return stable ascending indices by lengths."""  
33 - return sorted(range(len(lengths)), key=lambda i: lengths[i])  
34 -  
35 -  
36 -def iter_batches(indices: Sequence[int], batch_size: int) -> Iterable[List[int]]:  
37 - """Yield consecutive batches from indices."""  
38 - if batch_size <= 0:  
39 - raise ValueError(f"batch_size must be > 0, got {batch_size}")  
40 - for i in range(0, len(indices), batch_size):  
41 - yield list(indices[i : i + batch_size])  
reranker/backends/dashscope_rerank.py
@@ -21,11 +21,33 @@ from typing import Any, Dict, List, Tuple @@ -21,11 +21,33 @@ from typing import Any, Dict, List, Tuple
21 from urllib import error as urllib_error 21 from urllib import error as urllib_error
22 from urllib import request as urllib_request 22 from urllib import request as urllib_request
23 23
24 -from reranker.backends.batching_utils import deduplicate_with_positions, iter_batches  
25 24
26 logger = logging.getLogger("reranker.backends.dashscope_rerank") 25 logger = logging.getLogger("reranker.backends.dashscope_rerank")
27 26
28 27
  28 +def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]:
  29 + """
  30 + Deduplicate texts globally while preserving first-seen order.
  31 +
  32 + Returns:
  33 + unique_texts: deduplicated texts in first-seen order
  34 + position_to_unique: mapping from each original position to unique index
  35 + """
  36 + unique_texts: List[str] = []
  37 + position_to_unique: List[int] = []
  38 + seen: Dict[str, int] = {}
  39 +
  40 + for text in texts:
  41 + idx = seen.get(text)
  42 + if idx is None:
  43 + idx = len(unique_texts)
  44 + seen[text] = idx
  45 + unique_texts.append(text)
  46 + position_to_unique.append(idx)
  47 +
  48 + return unique_texts, position_to_unique
  49 +
  50 +
29 class DashScopeRerankBackend: 51 class DashScopeRerankBackend:
30 """ 52 """
31 DashScope cloud reranker backend. 53 DashScope cloud reranker backend.
@@ -206,7 +228,10 @@ class DashScopeRerankBackend: @@ -206,7 +228,10 @@ class DashScopeRerankBackend:
206 then apply global top_n/top_n_cap truncation after merge if needed. 228 then apply global top_n/top_n_cap truncation after merge if needed.
207 """ 229 """
208 indices = list(range(len(unique_texts))) 230 indices = list(range(len(unique_texts)))
209 - batches = list(iter_batches(indices, batch_size=self._batchsize)) 231 + batches = [
  232 + indices[i : i + self._batchsize]
  233 + for i in range(0, len(indices), self._batchsize)
  234 + ]
210 num_batches = len(batches) 235 num_batches = len(batches)
211 max_workers = min(8, num_batches) if num_batches > 0 else 1 236 max_workers = min(8, num_batches) if num_batches > 0 else 1
212 unique_scores: List[float] = [0.0] * len(unique_texts) 237 unique_scores: List[float] = [0.0] * len(unique_texts)
reranker/backends/qwen3_vllm.py
@@ -14,12 +14,6 @@ import threading @@ -14,12 +14,6 @@ import threading
14 import time 14 import time
15 from typing import Any, Dict, List, Tuple 15 from typing import Any, Dict, List, Tuple
16 16
17 -from reranker.backends.batching_utils import (  
18 - deduplicate_with_positions,  
19 - iter_batches,  
20 - sort_indices_by_length,  
21 -)  
22 -  
23 logger = logging.getLogger("reranker.backends.qwen3_vllm") 17 logger = logging.getLogger("reranker.backends.qwen3_vllm")
24 18
25 try: 19 try:
@@ -34,6 +28,29 @@ except ImportError as e: @@ -34,6 +28,29 @@ except ImportError as e:
34 ) from e 28 ) from e
35 29
36 30
  31 +def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]:
  32 + """
  33 + Deduplicate texts globally while preserving first-seen order.
  34 +
  35 + Returns:
  36 + unique_texts: deduplicated texts in first-seen order
  37 + position_to_unique: mapping from each original position to unique index
  38 + """
  39 + unique_texts: List[str] = []
  40 + position_to_unique: List[int] = []
  41 + seen: Dict[str, int] = {}
  42 +
  43 + for text in texts:
  44 + idx = seen.get(text)
  45 + if idx is None:
  46 + idx = len(unique_texts)
  47 + seen[text] = idx
  48 + unique_texts.append(text)
  49 + position_to_unique.append(idx)
  50 +
  51 + return unique_texts, position_to_unique
  52 +
  53 +
37 def _format_instruction(instruction: str, query: str, doc: str) -> List[Dict[str, str]]: 54 def _format_instruction(instruction: str, query: str, doc: str) -> List[Dict[str, str]]:
38 """Build chat messages for one (query, doc) pair.""" 55 """Build chat messages for one (query, doc) pair."""
39 return [ 56 return [
@@ -71,19 +88,17 @@ class Qwen3VLLMRerankerBackend: @@ -71,19 +88,17 @@ class Qwen3VLLMRerankerBackend:
71 sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH") 88 sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH")
72 if sort_by_doc_length is None: 89 if sort_by_doc_length is None:
73 sort_by_doc_length = self._config.get("sort_by_doc_length", True) 90 sort_by_doc_length = self._config.get("sort_by_doc_length", True)
74 - length_sort_mode = os.getenv("RERANK_VLLM_LENGTH_SORT_MODE") or self._config.get("length_sort_mode", "char")  
75 91
76 self._infer_batch_size = int(infer_batch_size) 92 self._infer_batch_size = int(infer_batch_size)
77 self._sort_by_doc_length = str(sort_by_doc_length).strip().lower() in {"1", "true", "yes", "y", "on"} 93 self._sort_by_doc_length = str(sort_by_doc_length).strip().lower() in {"1", "true", "yes", "y", "on"}
78 - self._length_sort_mode = str(length_sort_mode).strip().lower()  
79 if not torch.cuda.is_available(): 94 if not torch.cuda.is_available():
80 raise RuntimeError("qwen3_vllm backend requires CUDA GPU, but torch.cuda.is_available() is False") 95 raise RuntimeError("qwen3_vllm backend requires CUDA GPU, but torch.cuda.is_available() is False")
81 if dtype not in {"float16", "half", "auto"}: 96 if dtype not in {"float16", "half", "auto"}:
82 raise ValueError(f"Unsupported dtype for qwen3_vllm: {dtype!r}. Use float16/half/auto.") 97 raise ValueError(f"Unsupported dtype for qwen3_vllm: {dtype!r}. Use float16/half/auto.")
83 if self._infer_batch_size <= 0: 98 if self._infer_batch_size <= 0:
84 - raise ValueError(f"infer_batch_size must be > 0, got {self._infer_batch_size}")  
85 - if self._length_sort_mode not in {"char", "token"}:  
86 - raise ValueError(f"length_sort_mode must be 'char' or 'token', got {self._length_sort_mode!r}") 99 + raise ValueError(
  100 + f"infer_batch_size must be > 0, got {self._infer_batch_size}"
  101 + )
87 102
88 logger.info( 103 logger.info(
89 "[Qwen3_VLLM] Loading model %s (max_model_len=%s, tp=%s, gpu_mem=%.2f, dtype=%s, prefix_caching=%s)", 104 "[Qwen3_VLLM] Loading model %s (max_model_len=%s, tp=%s, gpu_mem=%.2f, dtype=%s, prefix_caching=%s)",
@@ -196,21 +211,7 @@ class Qwen3VLLMRerankerBackend: @@ -196,21 +211,7 @@ class Qwen3VLLMRerankerBackend:
196 """ 211 """
197 if not docs: 212 if not docs:
198 return [] 213 return []
199 - if self._length_sort_mode == "char":  
200 - return [len(text) for text in docs]  
201 - try:  
202 - enc = self._tokenizer(  
203 - docs,  
204 - add_special_tokens=False,  
205 - truncation=True,  
206 - max_length=self._max_prompt_len,  
207 - return_length=True,  
208 - )  
209 - lengths = enc.get("length")  
210 - if isinstance(lengths, list) and len(lengths) == len(docs):  
211 - return [int(x) for x in lengths]  
212 - except Exception as exc:  
213 - logger.debug("Length estimation fallback to char length: %s", exc) 214 + # Use simple character length to approximate document length.
214 return [len(text) for text in docs] 215 return [len(text) for text in docs]
215 216
216 def score_with_meta( 217 def score_with_meta(
@@ -247,7 +248,6 @@ class Qwen3VLLMRerankerBackend: @@ -247,7 +248,6 @@ class Qwen3VLLMRerankerBackend:
247 "infer_batch_size": self._infer_batch_size, 248 "infer_batch_size": self._infer_batch_size,
248 "inference_batches": 0, 249 "inference_batches": 0,
249 "sort_by_doc_length": self._sort_by_doc_length, 250 "sort_by_doc_length": self._sort_by_doc_length,
250 - "length_sort_mode": self._length_sort_mode,  
251 } 251 }
252 252
253 # Deduplicate globally by text, keep mapping to original indices. 253 # Deduplicate globally by text, keep mapping to original indices.
@@ -257,11 +257,12 @@ class Qwen3VLLMRerankerBackend: @@ -257,11 +257,12 @@ class Qwen3VLLMRerankerBackend:
257 lengths = self._estimate_doc_lengths(unique_texts) 257 lengths = self._estimate_doc_lengths(unique_texts)
258 order = list(range(len(unique_texts))) 258 order = list(range(len(unique_texts)))
259 if self._sort_by_doc_length and len(unique_texts) > 1: 259 if self._sort_by_doc_length and len(unique_texts) > 1:
260 - order = sort_indices_by_length(lengths) 260 + order = sorted(order, key=lambda i: lengths[i])
261 261
262 unique_scores: List[float] = [0.0] * len(unique_texts) 262 unique_scores: List[float] = [0.0] * len(unique_texts)
263 inference_batches = 0 263 inference_batches = 0
264 - for batch_indices in iter_batches(order, self._infer_batch_size): 264 + for start in range(0, len(order), self._infer_batch_size):
  265 + batch_indices = order[start : start + self._infer_batch_size]
265 inference_batches += 1 266 inference_batches += 1
266 pairs = [(query, unique_texts[i]) for i in batch_indices] 267 pairs = [(query, unique_texts[i]) for i in batch_indices]
267 prompts = self._process_inputs(pairs) 268 prompts = self._process_inputs(pairs)
scripts/service_ctl.sh
@@ -684,6 +684,7 @@ status_one() { @@ -684,6 +684,7 @@ status_one() {
684 local pid_info="-" 684 local pid_info="-"
685 local health="down" 685 local health="down"
686 local health_body="" 686 local health_body=""
  687 + local curl_timeout_opts=(--connect-timeout 8 --max-time 8)
687 688
688 if [ "${service}" = "tei" ]; then 689 if [ "${service}" = "tei" ]; then
689 local cid 690 local cid
@@ -696,7 +697,7 @@ status_one() { @@ -696,7 +697,7 @@ status_one() {
696 local path 697 local path
697 path="$(health_path_for_service "${service}")" 698 path="$(health_path_for_service "${service}")"
698 if [ -n "${port}" ] && [ -n "${path}" ]; then 699 if [ -n "${port}" ] && [ -n "${path}" ]; then
699 - if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then 700 + if health_body="$(curl -fsS "${curl_timeout_opts[@]}" "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
700 health="ok" 701 health="ok"
701 else 702 else
702 health="fail" 703 health="fail"
@@ -723,7 +724,7 @@ status_one() { @@ -723,7 +724,7 @@ status_one() {
723 local path 724 local path
724 path="$(health_path_for_service "${service}")" 725 path="$(health_path_for_service "${service}")"
725 if [ -n "${port}" ] && [ -n "${path}" ]; then 726 if [ -n "${port}" ] && [ -n "${path}" ]; then
726 - if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then 727 + if health_body="$(curl -fsS "${curl_timeout_opts[@]}" "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
727 health="ok" 728 health="ok"
728 else 729 else
729 health="fail" 730 health="fail"
search/searcher.py
@@ -598,7 +598,6 @@ class Searcher: @@ -598,7 +598,6 @@ class Searcher:
598 es_hits = [] 598 es_hits = []
599 if 'hits' in es_response and 'hits' in es_response['hits']: 599 if 'hits' in es_response and 'hits' in es_response['hits']:
600 es_hits = es_response['hits']['hits'] 600 es_hits = es_response['hits']['hits']
601 -  
602 # Extract total and max_score 601 # Extract total and max_score
603 total = es_response.get('hits', {}).get('total', {}) 602 total = es_response.get('hits', {}).get('total', {})
604 if isinstance(total, dict): 603 if isinstance(total, dict):
@@ -616,6 +615,37 @@ class Searcher: @@ -616,6 +615,37 @@ class Searcher:
616 sku_filter_dimension=sku_filter_dimension 615 sku_filter_dimension=sku_filter_dimension
617 ) 616 )
618 617
  618 + # Build per-result debug info (per SPU) when debug mode is enabled
  619 + per_result_debug = []
  620 + if debug and es_hits and formatted_results:
  621 + for hit, spu in zip(es_hits, formatted_results):
  622 + source = hit.get("_source", {}) or {}
  623 + raw_score = hit.get("_score")
  624 + try:
  625 + es_score = float(raw_score) if raw_score is not None else 0.0
  626 + except (TypeError, ValueError):
  627 + es_score = 0.0
  628 + try:
  629 + normalized = float(es_score) / float(max_score) if max_score else None
  630 + except (TypeError, ValueError, ZeroDivisionError):
  631 + normalized = None
  632 +
  633 + title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None
  634 + brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
  635 + vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None
  636 +
  637 + per_result_debug.append(
  638 + {
  639 + "spu_id": spu.spu_id,
  640 + "es_id": hit.get("_id"),
  641 + "es_score": es_score,
  642 + "es_score_normalized": normalized,
  643 + "title_multilingual": title_multilingual,
  644 + "brief_multilingual": brief_multilingual,
  645 + "vendor_multilingual": vendor_multilingual,
  646 + }
  647 + )
  648 +
619 # Format facets 649 # Format facets
620 standardized_facets = None 650 standardized_facets = None
621 if facets: 651 if facets:
@@ -676,6 +706,8 @@ class Searcher: @@ -676,6 +706,8 @@ class Searcher:
676 }, 706 },
677 "search_params": context.metadata.get('search_params', {}) 707 "search_params": context.metadata.get('search_params', {})
678 } 708 }
  709 + if per_result_debug:
  710 + debug_info["per_result"] = per_result_debug
679 711
680 # Build result 712 # Build result
681 result = SearchResult( 713 result = SearchResult(
tests/test_reranker_batching_utils.py deleted
@@ -1,32 +0,0 @@ @@ -1,32 +0,0 @@
1 -import pytest  
2 -  
3 -from reranker.backends.batching_utils import (  
4 - deduplicate_with_positions,  
5 - iter_batches,  
6 - sort_indices_by_length,  
7 -)  
8 -  
9 -  
10 -def test_deduplicate_with_positions_global_not_adjacent():  
11 - texts = ["a", "b", "a", "c", "b", "a"]  
12 - unique, mapping = deduplicate_with_positions(texts)  
13 - assert unique == ["a", "b", "c"]  
14 - assert mapping == [0, 1, 0, 2, 1, 0]  
15 -  
16 -  
17 -def test_sort_indices_by_length_stable():  
18 - lengths = [5, 2, 2, 9, 4]  
19 - order = sort_indices_by_length(lengths)  
20 - # Stable sort: index 1 remains ahead of index 2 when lengths are equal.  
21 - assert order == [1, 2, 4, 0, 3]  
22 -  
23 -  
24 -def test_iter_batches():  
25 - indices = list(range(10))  
26 - batches = list(iter_batches(indices, 4))  
27 - assert batches == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]  
28 -  
29 -  
30 -def test_iter_batches_invalid_batch_size():  
31 - with pytest.raises(ValueError, match="batch_size must be > 0"):  
32 - list(iter_batches([0, 1], 0))