Commit 985752f51220c83297e5748b210cc55d1531243a

Authored by tangwang
1 parent f201859e

1. 前端调试功能

2. 翻译限速 对应处理(qwen-mt限速)
api/routes/search.py
... ... @@ -18,6 +18,7 @@ from ..models import (
18 18 ErrorResponse
19 19 )
20 20 from context.request_context import create_request_context, set_current_request_context, clear_current_request_context
  21 +from indexer.mapping_generator import get_tenant_index_name
21 22  
22 23 router = APIRouter(prefix="/search", tags=["search"])
23 24 backend_verbose_logger = logging.getLogger("backend.verbose")
... ... @@ -437,3 +438,57 @@ async def get_document(doc_id: str, http_request: Request):
437 438 raise
438 439 except Exception as e:
439 440 raise HTTPException(status_code=500, detail=str(e))
  441 +
  442 +
  443 +@router.get("/es-doc/{spu_id}")
  444 +async def get_es_raw_document(spu_id: str, http_request: Request):
  445 + """
  446 + Get raw Elasticsearch document(s) for a given SPU ID.
  447 +
  448 + This is intended for debugging in the test frontend:
  449 + it queries the tenant-specific ES index with a term filter on spu_id
  450 + and returns the raw ES search response.
  451 + """
  452 + # Extract tenant_id (required)
  453 + tenant_id = http_request.headers.get("X-Tenant-ID")
  454 + if not tenant_id:
  455 + from urllib.parse import parse_qs
  456 + query_string = http_request.url.query
  457 + if query_string:
  458 + params = parse_qs(query_string)
  459 + tenant_id = params.get("tenant_id", [None])[0]
  460 +
  461 + if not tenant_id:
  462 + raise HTTPException(
  463 + status_code=400,
  464 + detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'",
  465 + )
  466 +
  467 + try:
  468 + from api.app import get_searcher
  469 +
  470 + searcher = get_searcher()
  471 + es_client = searcher.es_client
  472 + index_name = get_tenant_index_name(tenant_id)
  473 +
  474 + body = {
  475 + "size": 5,
  476 + "query": {
  477 + "bool": {
  478 + "filter": [
  479 + {
  480 + "term": {
  481 + "spu_id": spu_id,
  482 + }
  483 + }
  484 + ]
  485 + }
  486 + },
  487 + }
  488 +
  489 + es_response = es_client.search(index_name=index_name, body=body, size=5, from_=0)
  490 + return es_response
  491 + except HTTPException:
  492 + raise
  493 + except Exception as e:
  494 + raise HTTPException(status_code=500, detail=str(e))
... ...
docs/翻译模块说明.md
... ... @@ -15,6 +15,12 @@ DEEPL_AUTH_KEY=xxx
15 15 TRANSLATION_MODEL=qwen # 或 deepl
16 16 ```
17 17  
  18 +> **重要限速说明(Qwen 机翻)**
  19 +> 当前默认的 Qwen 翻译后端使用 `qwen-mt-flash` 云端模型,**官方限速较低,约 RPM=60(每分钟约 60 请求)**。
  20 +> - 推荐通过 Redis 翻译缓存复用结果,避免对相同文本重复打云端
  21 +> - 高并发场景需要在调用端做限流 / 去抖,或改为离线批量翻译
  22 +> - 如需更高吞吐,可考虑 DeepL 或自建翻译服务
  23 +
18 24 ## Provider 配置
19 25  
20 26 Provider 与 URL 在 `config/config.yaml` 的 `services.translation`。详见 [QUICKSTART.md](./QUICKSTART.md) §3 与 [DEVELOPER_GUIDE.md](./DEVELOPER_GUIDE.md) §7.2。
... ...
frontend/static/css/style.css
... ... @@ -322,9 +322,9 @@ body {
322 322  
323 323 /* Product Grid */
324 324 .product-grid {
325   - display: grid;
326   - grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
327   - gap: 20px;
  325 + display: flex;
  326 + flex-direction: column;
  327 + gap: 16px;
328 328 padding: 30px;
329 329 background: #f8f8f8;
330 330 min-height: 400px;
... ... @@ -336,9 +336,51 @@ body {
336 336 border-radius: 8px;
337 337 padding: 15px;
338 338 transition: all 0.3s;
339   - cursor: pointer;
  339 + cursor: default;
  340 + display: flex;
  341 + flex-direction: row;
  342 + align-items: flex-start;
  343 + gap: 16px;
  344 +}
  345 +
  346 +.product-main {
340 347 display: flex;
341 348 flex-direction: column;
  349 + width: 260px;
  350 + flex-shrink: 0;
  351 +}
  352 +
  353 +.product-debug {
  354 + flex: 1;
  355 + font-family: Menlo, Consolas, "Courier New", monospace;
  356 + font-size: 12px;
  357 + color: #555;
  358 + border-left: 1px dashed #eee;
  359 + padding-left: 12px;
  360 + max-height: 260px;
  361 + overflow: auto;
  362 +}
  363 +
  364 +.product-debug-title {
  365 + font-weight: 600;
  366 + margin-bottom: 6px;
  367 + color: #333;
  368 +}
  369 +
  370 +.product-debug-line {
  371 + margin-bottom: 2px;
  372 +}
  373 +
  374 +.product-debug-link {
  375 + display: inline-block;
  376 + margin-top: 6px;
  377 + font-size: 12px;
  378 + color: #e67e22;
  379 + text-decoration: none;
  380 +}
  381 +
  382 +.product-debug-link:hover {
  383 + text-decoration: underline;
342 384 }
343 385  
344 386 .product-card:hover {
... ... @@ -347,7 +389,7 @@ body {
347 389 }
348 390  
349 391 .product-image-wrapper {
350   - width: 100%;
  392 + width: 220px;
351 393 height: 180px;
352 394 display: flex;
353 395 align-items: center;
... ...
frontend/static/js/app.js
... ... @@ -287,7 +287,8 @@ async function performSearch(page = 1) {
287 287 sort_by: state.sortBy || null,
288 288 sort_order: state.sortOrder,
289 289 sku_filter_dimension: skuFilterDimension,
290   - debug: state.debug
  290 + // 测试前端始终开启后端调试信息
  291 + debug: true
291 292 })
292 293 });
293 294  
... ... @@ -336,7 +337,19 @@ function displayResults(data) {
336 337 }
337 338  
338 339 let html = '';
339   -
  340 +
  341 + // Build per-SPU debug lookup from debug_info.per_result (if present)
  342 + let perResultDebugBySpu = {};
  343 + if (state.debug && data.debug_info && Array.isArray(data.debug_info.per_result)) {
  344 + data.debug_info.per_result.forEach((item) => {
  345 + if (item && item.spu_id) {
  346 + perResultDebugBySpu[String(item.spu_id)] = item;
  347 + }
  348 + });
  349 + }
  350 +
  351 + const tenantId = getTenantId();
  352 +
340 353 data.results.forEach((result) => {
341 354 const product = result;
342 355 const title = product.title || product.name || 'N/A';
... ... @@ -344,32 +357,71 @@ function displayResults(data) {
344 357 const imageUrl = product.image_url || product.imageUrl || '';
345 358 const category = product.category || product.categoryName || '';
346 359 const vendor = product.vendor || product.brandName || '';
  360 + const spuId = product.spu_id || '';
  361 + const debug = spuId ? perResultDebugBySpu[String(spuId)] : null;
  362 +
  363 + let debugHtml = '';
  364 + if (debug) {
  365 + const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? '');
  366 + const esNorm = typeof debug.es_score_normalized === 'number'
  367 + ? debug.es_score_normalized.toFixed(4)
  368 + : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
  369 +
  370 + // Build multilingual title info
  371 + let titleLines = '';
  372 + if (debug.title_multilingual && typeof debug.title_multilingual === 'object') {
  373 + Object.entries(debug.title_multilingual).forEach(([lang, val]) => {
  374 + if (val) {
  375 + titleLines += `<div class="product-debug-line">title.${escapeHtml(String(lang))}: ${escapeHtml(String(val))}</div>`;
  376 + }
  377 + });
  378 + }
  379 +
  380 + const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
  381 +
  382 + debugHtml = `
  383 + <div class="product-debug">
  384 + <div class="product-debug-title">Ranking Debug</div>
  385 + <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div>
  386 + <div class="product-debug-line">es_id: ${escapeHtml(String(debug.es_id || ''))}</div>
  387 + <div class="product-debug-line">ES score: ${esScore}</div>
  388 + <div class="product-debug-line">ES normalized: ${esNorm}</div>
  389 + ${titleLines}
  390 + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">
  391 + 查看 ES 原始文档
  392 + </a>
  393 + </div>
  394 + `;
  395 + }
347 396  
348 397 html += `
349 398 <div class="product-card">
350   - <div class="product-image-wrapper">
351   - ${imageUrl ? `
352   - <img src="${escapeHtml(imageUrl)}"
353   - alt="${escapeHtml(title)}"
354   - class="product-image"
355   - onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'">
356   - ` : `
357   - <div style="color: #ccc; font-size: 14px;">No Image</div>
358   - `}
359   - </div>
360   -
361   - <div class="product-price">
362   - ${price !== 'N/A' ? `¥${price}` : 'N/A'}
363   - </div>
364   -
365   - <div class="product-title">
366   - ${escapeHtml(title)}
367   - </div>
368   -
369   - <div class="product-meta">
370   - ${category ? escapeHtml(category) : ''}
371   - ${vendor ? ' | ' + escapeHtml(vendor) : ''}
  399 + <div class="product-main">
  400 + <div class="product-image-wrapper">
  401 + ${imageUrl ? `
  402 + <img src="${escapeHtml(imageUrl)}"
  403 + alt="${escapeHtml(title)}"
  404 + class="product-image"
  405 + onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'">
  406 + ` : `
  407 + <div style="color: #ccc; font-size: 14px;">No Image</div>
  408 + `}
  409 + </div>
  410 +
  411 + <div class="product-price">
  412 + ${price !== 'N/A' ? `¥${price}` : 'N/A'}
  413 + </div>
  414 +
  415 + <div class="product-title">
  416 + ${escapeHtml(title)}
  417 + </div>
  418 +
  419 + <div class="product-meta">
  420 + ${category ? escapeHtml(category) : ''}
  421 + ${vendor ? ' | ' + escapeHtml(vendor) : ''}
  422 + </div>
372 423 </div>
  424 + ${debugHtml}
373 425 </div>
374 426 `;
375 427 });
... ...
providers/translation.py
... ... @@ -1,170 +0,0 @@
1   -"""
2   -Translation provider - direct (in-process) or HTTP service.
3   -"""
4   -
5   -from __future__ import annotations
6   -
7   -import logging
8   -from typing import Any, Dict, List, Optional, Union
9   -
10   -from concurrent.futures import Future, ThreadPoolExecutor
11   -import requests
12   -
13   -from config.services_config import get_translation_config, get_translation_base_url
14   -
15   -logger = logging.getLogger(__name__)
16   -
17   -
18   -class HttpTranslationProvider:
19   - """Translation via HTTP service."""
20   -
21   - def __init__(
22   - self,
23   - base_url: str,
24   - model: str = "qwen",
25   - timeout_sec: float = 10.0,
26   - translation_context: Optional[str] = None,
27   - ):
28   - self.base_url = (base_url or "").rstrip("/")
29   - self.model = model or "qwen"
30   - self.timeout_sec = float(timeout_sec or 10.0)
31   - self.translation_context = translation_context or "e-commerce product search"
32   - self.executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="http-translator")
33   -
34   - def _translate_once(
35   - self,
36   - text: str,
37   - target_lang: str,
38   - source_lang: Optional[str] = None,
39   - ) -> Optional[str]:
40   - if not text or not str(text).strip():
41   - return text
42   - try:
43   - url = f"{self.base_url}/translate"
44   - payload = {
45   - "text": text,
46   - "target_lang": target_lang,
47   - "source_lang": source_lang or "auto",
48   - "model": self.model,
49   - }
50   - response = requests.post(url, json=payload, timeout=self.timeout_sec)
51   - if response.status_code != 200:
52   - logger.warning(
53   - "HTTP translator failed: status=%s body=%s",
54   - response.status_code,
55   - (response.text or "")[:200],
56   - )
57   - return None
58   - data = response.json()
59   - translated = data.get("translated_text")
60   - return translated if translated is not None else None
61   - except Exception as exc:
62   - logger.warning("HTTP translator request failed: %s", exc, exc_info=True)
63   - return None
64   -
65   - def translate(
66   - self,
67   - text: str,
68   - target_lang: str,
69   - source_lang: Optional[str] = None,
70   - context: Optional[str] = None,
71   - prompt: Optional[str] = None,
72   - ) -> Optional[str]:
73   - del context, prompt
74   - result = self._translate_once(text=text, target_lang=target_lang, source_lang=source_lang)
75   - return result if result is not None else text
76   -
77   - def translate_multi(
78   - self,
79   - text: str,
80   - target_langs: List[str],
81   - source_lang: Optional[str] = None,
82   - context: Optional[str] = None,
83   - async_mode: bool = True,
84   - prompt: Optional[str] = None,
85   - ) -> Dict[str, Optional[str]]:
86   - del context, async_mode, prompt
87   - out: Dict[str, Optional[str]] = {}
88   - for lang in target_langs:
89   - out[lang] = self.translate(text, lang, source_lang=source_lang)
90   - return out
91   -
92   - def translate_multi_async(
93   - self,
94   - text: str,
95   - target_langs: List[str],
96   - source_lang: Optional[str] = None,
97   - context: Optional[str] = None,
98   - prompt: Optional[str] = None,
99   - ) -> Dict[str, Union[str, Future]]:
100   - del context, prompt
101   - out: Dict[str, Union[str, Future]] = {}
102   - for lang in target_langs:
103   - out[lang] = self.executor.submit(self.translate, text, lang, source_lang)
104   - return out
105   -
106   - def translate_for_indexing(
107   - self,
108   - text: str,
109   - shop_language: str,
110   - source_lang: Optional[str] = None,
111   - context: Optional[str] = None,
112   - prompt: Optional[str] = None,
113   - index_languages: Optional[List[str]] = None,
114   - ) -> Dict[str, Optional[str]]:
115   - del context, prompt
116   - langs = index_languages if index_languages else ["en", "zh"]
117   - source = source_lang or shop_language or "auto"
118   - out: Dict[str, Optional[str]] = {}
119   - for lang in langs:
120   - if lang == shop_language:
121   - out[lang] = text
122   - else:
123   - out[lang] = self.translate(text, target_lang=lang, source_lang=source)
124   - return out
125   -
126   -
127   -def create_translation_provider(query_config: Any = None) -> Any:
128   - """
129   - Create translation provider from services config.
130   -
131   - query_config: optional, for api_key/glossary_id/context (used by direct provider).
132   - """
133   - cfg = get_translation_config()
134   - provider = cfg.provider
135   - pc = cfg.get_provider_cfg()
136   -
137   - if provider in ("direct", "local", "inprocess"):
138   - from query.translator import Translator
139   - model = pc.get("model") or "qwen"
140   - qc = query_config or _empty_query_config()
141   - return Translator(
142   - model=model,
143   - api_key=getattr(qc, "translation_api_key", None),
144   - use_cache=True,
145   - glossary_id=getattr(qc, "translation_glossary_id", None),
146   - translation_context=getattr(qc, "translation_context", "e-commerce product search"),
147   - )
148   -
149   - if provider in ("http", "service"):
150   - base_url = get_translation_base_url()
151   - model = pc.get("model") or "qwen"
152   - timeout = pc.get("timeout_sec", 10.0)
153   - qc = query_config or _empty_query_config()
154   - return HttpTranslationProvider(
155   - base_url=base_url,
156   - model=model,
157   - timeout_sec=float(timeout),
158   - translation_context=getattr(qc, "translation_context", "e-commerce product search"),
159   - )
160   -
161   - raise ValueError(f"Unsupported translation provider: {provider}")
162   -
163   -
164   -def _empty_query_config() -> Any:
165   - """Minimal object with default translation attrs."""
166   - class _QC:
167   - translation_api_key = None
168   - translation_glossary_id = None
169   - translation_context = "e-commerce product search"
170   - return _QC()
query/llm_translate.py 0 → 100644
query/translator.py
... ... @@ -5,6 +5,11 @@ Supports multiple translation models:
5 5 - Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model
6 6 - DeepL: DeepL API for high-quality translations
7 7  
  8 +重要说明(Qwen 机翻限速):
  9 +- 当前默认使用的 `qwen-mt-flash` 为云端机翻模型,**官方限速较低,约 RPM=60(每分钟约 60 请求)**
  10 +- 在高并发场景必须依赖 Redis 翻译缓存与批量预热,避免在用户实时请求路径上直接打满 DashScope 限流
  11 +- 若业务侧存在大规模离线翻译或更高吞吐需求,建议评估 DeepL 或自建翻译后端
  12 +
8 13 使用方法 (Usage):
9 14  
10 15 ```python
... ...
reranker/backends/batching_utils.py deleted
... ... @@ -1,41 +0,0 @@
1   -"""Utilities for reranker batching and deduplication."""
2   -
3   -from __future__ import annotations
4   -
5   -from typing import Iterable, List, Sequence, Tuple
6   -
7   -
8   -def deduplicate_with_positions(texts: Sequence[str]) -> Tuple[List[str], List[int]]:
9   - """
10   - Deduplicate texts globally while preserving first-seen order.
11   -
12   - Returns:
13   - unique_texts: deduplicated texts in first-seen order
14   - position_to_unique: mapping from each original position to unique index
15   - """
16   - unique_texts: List[str] = []
17   - position_to_unique: List[int] = []
18   - seen: dict[str, int] = {}
19   -
20   - for text in texts:
21   - idx = seen.get(text)
22   - if idx is None:
23   - idx = len(unique_texts)
24   - seen[text] = idx
25   - unique_texts.append(text)
26   - position_to_unique.append(idx)
27   -
28   - return unique_texts, position_to_unique
29   -
30   -
31   -def sort_indices_by_length(lengths: Sequence[int]) -> List[int]:
32   - """Return stable ascending indices by lengths."""
33   - return sorted(range(len(lengths)), key=lambda i: lengths[i])
34   -
35   -
36   -def iter_batches(indices: Sequence[int], batch_size: int) -> Iterable[List[int]]:
37   - """Yield consecutive batches from indices."""
38   - if batch_size <= 0:
39   - raise ValueError(f"batch_size must be > 0, got {batch_size}")
40   - for i in range(0, len(indices), batch_size):
41   - yield list(indices[i : i + batch_size])
reranker/backends/dashscope_rerank.py
... ... @@ -21,11 +21,33 @@ from typing import Any, Dict, List, Tuple
21 21 from urllib import error as urllib_error
22 22 from urllib import request as urllib_request
23 23  
24   -from reranker.backends.batching_utils import deduplicate_with_positions, iter_batches
25 24  
26 25 logger = logging.getLogger("reranker.backends.dashscope_rerank")
27 26  
28 27  
  28 +def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]:
  29 + """
  30 + Deduplicate texts globally while preserving first-seen order.
  31 +
  32 + Returns:
  33 + unique_texts: deduplicated texts in first-seen order
  34 + position_to_unique: mapping from each original position to unique index
  35 + """
  36 + unique_texts: List[str] = []
  37 + position_to_unique: List[int] = []
  38 + seen: Dict[str, int] = {}
  39 +
  40 + for text in texts:
  41 + idx = seen.get(text)
  42 + if idx is None:
  43 + idx = len(unique_texts)
  44 + seen[text] = idx
  45 + unique_texts.append(text)
  46 + position_to_unique.append(idx)
  47 +
  48 + return unique_texts, position_to_unique
  49 +
  50 +
29 51 class DashScopeRerankBackend:
30 52 """
31 53 DashScope cloud reranker backend.
... ... @@ -206,7 +228,10 @@ class DashScopeRerankBackend:
206 228 then apply global top_n/top_n_cap truncation after merge if needed.
207 229 """
208 230 indices = list(range(len(unique_texts)))
209   - batches = list(iter_batches(indices, batch_size=self._batchsize))
  231 + batches = [
  232 + indices[i : i + self._batchsize]
  233 + for i in range(0, len(indices), self._batchsize)
  234 + ]
210 235 num_batches = len(batches)
211 236 max_workers = min(8, num_batches) if num_batches > 0 else 1
212 237 unique_scores: List[float] = [0.0] * len(unique_texts)
... ...
reranker/backends/qwen3_vllm.py
... ... @@ -14,12 +14,6 @@ import threading
14 14 import time
15 15 from typing import Any, Dict, List, Tuple
16 16  
17   -from reranker.backends.batching_utils import (
18   - deduplicate_with_positions,
19   - iter_batches,
20   - sort_indices_by_length,
21   -)
22   -
23 17 logger = logging.getLogger("reranker.backends.qwen3_vllm")
24 18  
25 19 try:
... ... @@ -34,6 +28,29 @@ except ImportError as e:
34 28 ) from e
35 29  
36 30  
  31 +def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]:
  32 + """
  33 + Deduplicate texts globally while preserving first-seen order.
  34 +
  35 + Returns:
  36 + unique_texts: deduplicated texts in first-seen order
  37 + position_to_unique: mapping from each original position to unique index
  38 + """
  39 + unique_texts: List[str] = []
  40 + position_to_unique: List[int] = []
  41 + seen: Dict[str, int] = {}
  42 +
  43 + for text in texts:
  44 + idx = seen.get(text)
  45 + if idx is None:
  46 + idx = len(unique_texts)
  47 + seen[text] = idx
  48 + unique_texts.append(text)
  49 + position_to_unique.append(idx)
  50 +
  51 + return unique_texts, position_to_unique
  52 +
  53 +
37 54 def _format_instruction(instruction: str, query: str, doc: str) -> List[Dict[str, str]]:
38 55 """Build chat messages for one (query, doc) pair."""
39 56 return [
... ... @@ -71,19 +88,17 @@ class Qwen3VLLMRerankerBackend:
71 88 sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH")
72 89 if sort_by_doc_length is None:
73 90 sort_by_doc_length = self._config.get("sort_by_doc_length", True)
74   - length_sort_mode = os.getenv("RERANK_VLLM_LENGTH_SORT_MODE") or self._config.get("length_sort_mode", "char")
75 91  
76 92 self._infer_batch_size = int(infer_batch_size)
77 93 self._sort_by_doc_length = str(sort_by_doc_length).strip().lower() in {"1", "true", "yes", "y", "on"}
78   - self._length_sort_mode = str(length_sort_mode).strip().lower()
79 94 if not torch.cuda.is_available():
80 95 raise RuntimeError("qwen3_vllm backend requires CUDA GPU, but torch.cuda.is_available() is False")
81 96 if dtype not in {"float16", "half", "auto"}:
82 97 raise ValueError(f"Unsupported dtype for qwen3_vllm: {dtype!r}. Use float16/half/auto.")
83 98 if self._infer_batch_size <= 0:
84   - raise ValueError(f"infer_batch_size must be > 0, got {self._infer_batch_size}")
85   - if self._length_sort_mode not in {"char", "token"}:
86   - raise ValueError(f"length_sort_mode must be 'char' or 'token', got {self._length_sort_mode!r}")
  99 + raise ValueError(
  100 + f"infer_batch_size must be > 0, got {self._infer_batch_size}"
  101 + )
87 102  
88 103 logger.info(
89 104 "[Qwen3_VLLM] Loading model %s (max_model_len=%s, tp=%s, gpu_mem=%.2f, dtype=%s, prefix_caching=%s)",
... ... @@ -196,21 +211,7 @@ class Qwen3VLLMRerankerBackend:
196 211 """
197 212 if not docs:
198 213 return []
199   - if self._length_sort_mode == "char":
200   - return [len(text) for text in docs]
201   - try:
202   - enc = self._tokenizer(
203   - docs,
204   - add_special_tokens=False,
205   - truncation=True,
206   - max_length=self._max_prompt_len,
207   - return_length=True,
208   - )
209   - lengths = enc.get("length")
210   - if isinstance(lengths, list) and len(lengths) == len(docs):
211   - return [int(x) for x in lengths]
212   - except Exception as exc:
213   - logger.debug("Length estimation fallback to char length: %s", exc)
  214 + # Use simple character length to approximate document length.
214 215 return [len(text) for text in docs]
215 216  
216 217 def score_with_meta(
... ... @@ -247,7 +248,6 @@ class Qwen3VLLMRerankerBackend:
247 248 "infer_batch_size": self._infer_batch_size,
248 249 "inference_batches": 0,
249 250 "sort_by_doc_length": self._sort_by_doc_length,
250   - "length_sort_mode": self._length_sort_mode,
251 251 }
252 252  
253 253 # Deduplicate globally by text, keep mapping to original indices.
... ... @@ -257,11 +257,12 @@ class Qwen3VLLMRerankerBackend:
257 257 lengths = self._estimate_doc_lengths(unique_texts)
258 258 order = list(range(len(unique_texts)))
259 259 if self._sort_by_doc_length and len(unique_texts) > 1:
260   - order = sort_indices_by_length(lengths)
  260 + order = sorted(order, key=lambda i: lengths[i])
261 261  
262 262 unique_scores: List[float] = [0.0] * len(unique_texts)
263 263 inference_batches = 0
264   - for batch_indices in iter_batches(order, self._infer_batch_size):
  264 + for start in range(0, len(order), self._infer_batch_size):
  265 + batch_indices = order[start : start + self._infer_batch_size]
265 266 inference_batches += 1
266 267 pairs = [(query, unique_texts[i]) for i in batch_indices]
267 268 prompts = self._process_inputs(pairs)
... ...
scripts/service_ctl.sh
... ... @@ -684,6 +684,7 @@ status_one() {
684 684 local pid_info="-"
685 685 local health="down"
686 686 local health_body=""
  687 + local curl_timeout_opts=(--connect-timeout 8 --max-time 8)
687 688  
688 689 if [ "${service}" = "tei" ]; then
689 690 local cid
... ... @@ -696,7 +697,7 @@ status_one() {
696 697 local path
697 698 path="$(health_path_for_service "${service}")"
698 699 if [ -n "${port}" ] && [ -n "${path}" ]; then
699   - if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
  700 + if health_body="$(curl -fsS "${curl_timeout_opts[@]}" "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
700 701 health="ok"
701 702 else
702 703 health="fail"
... ... @@ -723,7 +724,7 @@ status_one() {
723 724 local path
724 725 path="$(health_path_for_service "${service}")"
725 726 if [ -n "${port}" ] && [ -n "${path}" ]; then
726   - if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
  727 + if health_body="$(curl -fsS "${curl_timeout_opts[@]}" "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
727 728 health="ok"
728 729 else
729 730 health="fail"
... ...
search/searcher.py
... ... @@ -598,7 +598,6 @@ class Searcher:
598 598 es_hits = []
599 599 if 'hits' in es_response and 'hits' in es_response['hits']:
600 600 es_hits = es_response['hits']['hits']
601   -
602 601 # Extract total and max_score
603 602 total = es_response.get('hits', {}).get('total', {})
604 603 if isinstance(total, dict):
... ... @@ -616,6 +615,37 @@ class Searcher:
616 615 sku_filter_dimension=sku_filter_dimension
617 616 )
618 617  
  618 + # Build per-result debug info (per SPU) when debug mode is enabled
  619 + per_result_debug = []
  620 + if debug and es_hits and formatted_results:
  621 + for hit, spu in zip(es_hits, formatted_results):
  622 + source = hit.get("_source", {}) or {}
  623 + raw_score = hit.get("_score")
  624 + try:
  625 + es_score = float(raw_score) if raw_score is not None else 0.0
  626 + except (TypeError, ValueError):
  627 + es_score = 0.0
  628 + try:
  629 + normalized = float(es_score) / float(max_score) if max_score else None
  630 + except (TypeError, ValueError, ZeroDivisionError):
  631 + normalized = None
  632 +
  633 + title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None
  634 + brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
  635 + vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None
  636 +
  637 + per_result_debug.append(
  638 + {
  639 + "spu_id": spu.spu_id,
  640 + "es_id": hit.get("_id"),
  641 + "es_score": es_score,
  642 + "es_score_normalized": normalized,
  643 + "title_multilingual": title_multilingual,
  644 + "brief_multilingual": brief_multilingual,
  645 + "vendor_multilingual": vendor_multilingual,
  646 + }
  647 + )
  648 +
619 649 # Format facets
620 650 standardized_facets = None
621 651 if facets:
... ... @@ -676,6 +706,8 @@ class Searcher:
676 706 },
677 707 "search_params": context.metadata.get('search_params', {})
678 708 }
  709 + if per_result_debug:
  710 + debug_info["per_result"] = per_result_debug
679 711  
680 712 # Build result
681 713 result = SearchResult(
... ...
tests/test_reranker_batching_utils.py deleted
... ... @@ -1,32 +0,0 @@
1   -import pytest
2   -
3   -from reranker.backends.batching_utils import (
4   - deduplicate_with_positions,
5   - iter_batches,
6   - sort_indices_by_length,
7   -)
8   -
9   -
10   -def test_deduplicate_with_positions_global_not_adjacent():
11   - texts = ["a", "b", "a", "c", "b", "a"]
12   - unique, mapping = deduplicate_with_positions(texts)
13   - assert unique == ["a", "b", "c"]
14   - assert mapping == [0, 1, 0, 2, 1, 0]
15   -
16   -
17   -def test_sort_indices_by_length_stable():
18   - lengths = [5, 2, 2, 9, 4]
19   - order = sort_indices_by_length(lengths)
20   - # Stable sort: index 1 remains ahead of index 2 when lengths are equal.
21   - assert order == [1, 2, 4, 0, 3]
22   -
23   -
24   -def test_iter_batches():
25   - indices = list(range(10))
26   - batches = list(iter_batches(indices, 4))
27   - assert batches == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]
28   -
29   -
30   -def test_iter_batches_invalid_batch_size():
31   - with pytest.raises(ValueError, match="batch_size must be > 0"):
32   - list(iter_batches([0, 1], 0))