Commit 985752f51220c83297e5748b210cc55d1531243a
1 parent
f201859e
1. 前端调试功能
2. 翻译限速 对应处理(qwen-mt限速)
Showing
13 changed files
with
282 additions
and
306 deletions
Show diff stats
api/routes/search.py
| ... | ... | @@ -18,6 +18,7 @@ from ..models import ( |
| 18 | 18 | ErrorResponse |
| 19 | 19 | ) |
| 20 | 20 | from context.request_context import create_request_context, set_current_request_context, clear_current_request_context |
| 21 | +from indexer.mapping_generator import get_tenant_index_name | |
| 21 | 22 | |
| 22 | 23 | router = APIRouter(prefix="/search", tags=["search"]) |
| 23 | 24 | backend_verbose_logger = logging.getLogger("backend.verbose") |
| ... | ... | @@ -437,3 +438,57 @@ async def get_document(doc_id: str, http_request: Request): |
| 437 | 438 | raise |
| 438 | 439 | except Exception as e: |
| 439 | 440 | raise HTTPException(status_code=500, detail=str(e)) |
| 441 | + | |
| 442 | + | |
| 443 | +@router.get("/es-doc/{spu_id}") | |
| 444 | +async def get_es_raw_document(spu_id: str, http_request: Request): | |
| 445 | + """ | |
| 446 | + Get raw Elasticsearch document(s) for a given SPU ID. | |
| 447 | + | |
| 448 | + This is intended for debugging in the test frontend: | |
| 449 | + it queries the tenant-specific ES index with a term filter on spu_id | |
| 450 | + and returns the raw ES search response. | |
| 451 | + """ | |
| 452 | + # Extract tenant_id (required) | |
| 453 | + tenant_id = http_request.headers.get("X-Tenant-ID") | |
| 454 | + if not tenant_id: | |
| 455 | + from urllib.parse import parse_qs | |
| 456 | + query_string = http_request.url.query | |
| 457 | + if query_string: | |
| 458 | + params = parse_qs(query_string) | |
| 459 | + tenant_id = params.get("tenant_id", [None])[0] | |
| 460 | + | |
| 461 | + if not tenant_id: | |
| 462 | + raise HTTPException( | |
| 463 | + status_code=400, | |
| 464 | + detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'", | |
| 465 | + ) | |
| 466 | + | |
| 467 | + try: | |
| 468 | + from api.app import get_searcher | |
| 469 | + | |
| 470 | + searcher = get_searcher() | |
| 471 | + es_client = searcher.es_client | |
| 472 | + index_name = get_tenant_index_name(tenant_id) | |
| 473 | + | |
| 474 | + body = { | |
| 475 | + "size": 5, | |
| 476 | + "query": { | |
| 477 | + "bool": { | |
| 478 | + "filter": [ | |
| 479 | + { | |
| 480 | + "term": { | |
| 481 | + "spu_id": spu_id, | |
| 482 | + } | |
| 483 | + } | |
| 484 | + ] | |
| 485 | + } | |
| 486 | + }, | |
| 487 | + } | |
| 488 | + | |
| 489 | + es_response = es_client.search(index_name=index_name, body=body, size=5, from_=0) | |
| 490 | + return es_response | |
| 491 | + except HTTPException: | |
| 492 | + raise | |
| 493 | + except Exception as e: | |
| 494 | + raise HTTPException(status_code=500, detail=str(e)) | ... | ... |
docs/翻译模块说明.md
| ... | ... | @@ -15,6 +15,12 @@ DEEPL_AUTH_KEY=xxx |
| 15 | 15 | TRANSLATION_MODEL=qwen # 或 deepl |
| 16 | 16 | ``` |
| 17 | 17 | |
| 18 | +> **重要限速说明(Qwen 机翻)** | |
| 19 | +> 当前默认的 Qwen 翻译后端使用 `qwen-mt-flash` 云端模型,**官方限速较低,约 RPM=60(每分钟约 60 请求)**。 | |
| 20 | +> - 推荐通过 Redis 翻译缓存复用结果,避免对相同文本重复打云端 | |
| 21 | +> - 高并发场景需要在调用端做限流 / 去抖,或改为离线批量翻译 | |
| 22 | +> - 如需更高吞吐,可考虑 DeepL 或自建翻译服务 | |
| 23 | + | |
| 18 | 24 | ## Provider 配置 |
| 19 | 25 | |
| 20 | 26 | Provider 与 URL 在 `config/config.yaml` 的 `services.translation`。详见 [QUICKSTART.md](./QUICKSTART.md) §3 与 [DEVELOPER_GUIDE.md](./DEVELOPER_GUIDE.md) §7.2。 | ... | ... |
frontend/static/css/style.css
| ... | ... | @@ -322,9 +322,9 @@ body { |
| 322 | 322 | |
| 323 | 323 | /* Product Grid */ |
| 324 | 324 | .product-grid { |
| 325 | - display: grid; | |
| 326 | - grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); | |
| 327 | - gap: 20px; | |
| 325 | + display: flex; | |
| 326 | + flex-direction: column; | |
| 327 | + gap: 16px; | |
| 328 | 328 | padding: 30px; |
| 329 | 329 | background: #f8f8f8; |
| 330 | 330 | min-height: 400px; |
| ... | ... | @@ -336,9 +336,51 @@ body { |
| 336 | 336 | border-radius: 8px; |
| 337 | 337 | padding: 15px; |
| 338 | 338 | transition: all 0.3s; |
| 339 | - cursor: pointer; | |
| 339 | + cursor: default; | |
| 340 | + display: flex; | |
| 341 | + flex-direction: row; | |
| 342 | + align-items: flex-start; | |
| 343 | + gap: 16px; | |
| 344 | +} | |
| 345 | + | |
| 346 | +.product-main { | |
| 340 | 347 | display: flex; |
| 341 | 348 | flex-direction: column; |
| 349 | + width: 260px; | |
| 350 | + flex-shrink: 0; | |
| 351 | +} | |
| 352 | + | |
| 353 | +.product-debug { | |
| 354 | + flex: 1; | |
| 355 | + font-family: Menlo, Consolas, "Courier New", monospace; | |
| 356 | + font-size: 12px; | |
| 357 | + color: #555; | |
| 358 | + border-left: 1px dashed #eee; | |
| 359 | + padding-left: 12px; | |
| 360 | + max-height: 260px; | |
| 361 | + overflow: auto; | |
| 362 | +} | |
| 363 | + | |
| 364 | +.product-debug-title { | |
| 365 | + font-weight: 600; | |
| 366 | + margin-bottom: 6px; | |
| 367 | + color: #333; | |
| 368 | +} | |
| 369 | + | |
| 370 | +.product-debug-line { | |
| 371 | + margin-bottom: 2px; | |
| 372 | +} | |
| 373 | + | |
| 374 | +.product-debug-link { | |
| 375 | + display: inline-block; | |
| 376 | + margin-top: 6px; | |
| 377 | + font-size: 12px; | |
| 378 | + color: #e67e22; | |
| 379 | + text-decoration: none; | |
| 380 | +} | |
| 381 | + | |
| 382 | +.product-debug-link:hover { | |
| 383 | + text-decoration: underline; | |
| 342 | 384 | } |
| 343 | 385 | |
| 344 | 386 | .product-card:hover { |
| ... | ... | @@ -347,7 +389,7 @@ body { |
| 347 | 389 | } |
| 348 | 390 | |
| 349 | 391 | .product-image-wrapper { |
| 350 | - width: 100%; | |
| 392 | + width: 220px; | |
| 351 | 393 | height: 180px; |
| 352 | 394 | display: flex; |
| 353 | 395 | align-items: center; | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -287,7 +287,8 @@ async function performSearch(page = 1) { |
| 287 | 287 | sort_by: state.sortBy || null, |
| 288 | 288 | sort_order: state.sortOrder, |
| 289 | 289 | sku_filter_dimension: skuFilterDimension, |
| 290 | - debug: state.debug | |
| 290 | + // 测试前端始终开启后端调试信息 | |
| 291 | + debug: true | |
| 291 | 292 | }) |
| 292 | 293 | }); |
| 293 | 294 | |
| ... | ... | @@ -336,7 +337,19 @@ function displayResults(data) { |
| 336 | 337 | } |
| 337 | 338 | |
| 338 | 339 | let html = ''; |
| 339 | - | |
| 340 | + | |
| 341 | + // Build per-SPU debug lookup from debug_info.per_result (if present) | |
| 342 | + let perResultDebugBySpu = {}; | |
| 343 | + if (state.debug && data.debug_info && Array.isArray(data.debug_info.per_result)) { | |
| 344 | + data.debug_info.per_result.forEach((item) => { | |
| 345 | + if (item && item.spu_id) { | |
| 346 | + perResultDebugBySpu[String(item.spu_id)] = item; | |
| 347 | + } | |
| 348 | + }); | |
| 349 | + } | |
| 350 | + | |
| 351 | + const tenantId = getTenantId(); | |
| 352 | + | |
| 340 | 353 | data.results.forEach((result) => { |
| 341 | 354 | const product = result; |
| 342 | 355 | const title = product.title || product.name || 'N/A'; |
| ... | ... | @@ -344,32 +357,71 @@ function displayResults(data) { |
| 344 | 357 | const imageUrl = product.image_url || product.imageUrl || ''; |
| 345 | 358 | const category = product.category || product.categoryName || ''; |
| 346 | 359 | const vendor = product.vendor || product.brandName || ''; |
| 360 | + const spuId = product.spu_id || ''; | |
| 361 | + const debug = spuId ? perResultDebugBySpu[String(spuId)] : null; | |
| 362 | + | |
| 363 | + let debugHtml = ''; | |
| 364 | + if (debug) { | |
| 365 | + const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? ''); | |
| 366 | + const esNorm = typeof debug.es_score_normalized === 'number' | |
| 367 | + ? debug.es_score_normalized.toFixed(4) | |
| 368 | + : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); | |
| 369 | + | |
| 370 | + // Build multilingual title info | |
| 371 | + let titleLines = ''; | |
| 372 | + if (debug.title_multilingual && typeof debug.title_multilingual === 'object') { | |
| 373 | + Object.entries(debug.title_multilingual).forEach(([lang, val]) => { | |
| 374 | + if (val) { | |
| 375 | + titleLines += `<div class="product-debug-line">title.${escapeHtml(String(lang))}: ${escapeHtml(String(val))}</div>`; | |
| 376 | + } | |
| 377 | + }); | |
| 378 | + } | |
| 379 | + | |
| 380 | + const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; | |
| 381 | + | |
| 382 | + debugHtml = ` | |
| 383 | + <div class="product-debug"> | |
| 384 | + <div class="product-debug-title">Ranking Debug</div> | |
| 385 | + <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> | |
| 386 | + <div class="product-debug-line">es_id: ${escapeHtml(String(debug.es_id || ''))}</div> | |
| 387 | + <div class="product-debug-line">ES score: ${esScore}</div> | |
| 388 | + <div class="product-debug-line">ES normalized: ${esNorm}</div> | |
| 389 | + ${titleLines} | |
| 390 | + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | |
| 391 | + 查看 ES 原始文档 | |
| 392 | + </a> | |
| 393 | + </div> | |
| 394 | + `; | |
| 395 | + } | |
| 347 | 396 | |
| 348 | 397 | html += ` |
| 349 | 398 | <div class="product-card"> |
| 350 | - <div class="product-image-wrapper"> | |
| 351 | - ${imageUrl ? ` | |
| 352 | - <img src="${escapeHtml(imageUrl)}" | |
| 353 | - alt="${escapeHtml(title)}" | |
| 354 | - class="product-image" | |
| 355 | - onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'"> | |
| 356 | - ` : ` | |
| 357 | - <div style="color: #ccc; font-size: 14px;">No Image</div> | |
| 358 | - `} | |
| 359 | - </div> | |
| 360 | - | |
| 361 | - <div class="product-price"> | |
| 362 | - ${price !== 'N/A' ? `¥${price}` : 'N/A'} | |
| 363 | - </div> | |
| 364 | - | |
| 365 | - <div class="product-title"> | |
| 366 | - ${escapeHtml(title)} | |
| 367 | - </div> | |
| 368 | - | |
| 369 | - <div class="product-meta"> | |
| 370 | - ${category ? escapeHtml(category) : ''} | |
| 371 | - ${vendor ? ' | ' + escapeHtml(vendor) : ''} | |
| 399 | + <div class="product-main"> | |
| 400 | + <div class="product-image-wrapper"> | |
| 401 | + ${imageUrl ? ` | |
| 402 | + <img src="${escapeHtml(imageUrl)}" | |
| 403 | + alt="${escapeHtml(title)}" | |
| 404 | + class="product-image" | |
| 405 | + onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'"> | |
| 406 | + ` : ` | |
| 407 | + <div style="color: #ccc; font-size: 14px;">No Image</div> | |
| 408 | + `} | |
| 409 | + </div> | |
| 410 | + | |
| 411 | + <div class="product-price"> | |
| 412 | + ${price !== 'N/A' ? `¥${price}` : 'N/A'} | |
| 413 | + </div> | |
| 414 | + | |
| 415 | + <div class="product-title"> | |
| 416 | + ${escapeHtml(title)} | |
| 417 | + </div> | |
| 418 | + | |
| 419 | + <div class="product-meta"> | |
| 420 | + ${category ? escapeHtml(category) : ''} | |
| 421 | + ${vendor ? ' | ' + escapeHtml(vendor) : ''} | |
| 422 | + </div> | |
| 372 | 423 | </div> |
| 424 | + ${debugHtml} | |
| 373 | 425 | </div> |
| 374 | 426 | `; |
| 375 | 427 | }); | ... | ... |
providers/translation.py
| ... | ... | @@ -1,170 +0,0 @@ |
| 1 | -""" | |
| 2 | -Translation provider - direct (in-process) or HTTP service. | |
| 3 | -""" | |
| 4 | - | |
| 5 | -from __future__ import annotations | |
| 6 | - | |
| 7 | -import logging | |
| 8 | -from typing import Any, Dict, List, Optional, Union | |
| 9 | - | |
| 10 | -from concurrent.futures import Future, ThreadPoolExecutor | |
| 11 | -import requests | |
| 12 | - | |
| 13 | -from config.services_config import get_translation_config, get_translation_base_url | |
| 14 | - | |
| 15 | -logger = logging.getLogger(__name__) | |
| 16 | - | |
| 17 | - | |
| 18 | -class HttpTranslationProvider: | |
| 19 | - """Translation via HTTP service.""" | |
| 20 | - | |
| 21 | - def __init__( | |
| 22 | - self, | |
| 23 | - base_url: str, | |
| 24 | - model: str = "qwen", | |
| 25 | - timeout_sec: float = 10.0, | |
| 26 | - translation_context: Optional[str] = None, | |
| 27 | - ): | |
| 28 | - self.base_url = (base_url or "").rstrip("/") | |
| 29 | - self.model = model or "qwen" | |
| 30 | - self.timeout_sec = float(timeout_sec or 10.0) | |
| 31 | - self.translation_context = translation_context or "e-commerce product search" | |
| 32 | - self.executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="http-translator") | |
| 33 | - | |
| 34 | - def _translate_once( | |
| 35 | - self, | |
| 36 | - text: str, | |
| 37 | - target_lang: str, | |
| 38 | - source_lang: Optional[str] = None, | |
| 39 | - ) -> Optional[str]: | |
| 40 | - if not text or not str(text).strip(): | |
| 41 | - return text | |
| 42 | - try: | |
| 43 | - url = f"{self.base_url}/translate" | |
| 44 | - payload = { | |
| 45 | - "text": text, | |
| 46 | - "target_lang": target_lang, | |
| 47 | - "source_lang": source_lang or "auto", | |
| 48 | - "model": self.model, | |
| 49 | - } | |
| 50 | - response = requests.post(url, json=payload, timeout=self.timeout_sec) | |
| 51 | - if response.status_code != 200: | |
| 52 | - logger.warning( | |
| 53 | - "HTTP translator failed: status=%s body=%s", | |
| 54 | - response.status_code, | |
| 55 | - (response.text or "")[:200], | |
| 56 | - ) | |
| 57 | - return None | |
| 58 | - data = response.json() | |
| 59 | - translated = data.get("translated_text") | |
| 60 | - return translated if translated is not None else None | |
| 61 | - except Exception as exc: | |
| 62 | - logger.warning("HTTP translator request failed: %s", exc, exc_info=True) | |
| 63 | - return None | |
| 64 | - | |
| 65 | - def translate( | |
| 66 | - self, | |
| 67 | - text: str, | |
| 68 | - target_lang: str, | |
| 69 | - source_lang: Optional[str] = None, | |
| 70 | - context: Optional[str] = None, | |
| 71 | - prompt: Optional[str] = None, | |
| 72 | - ) -> Optional[str]: | |
| 73 | - del context, prompt | |
| 74 | - result = self._translate_once(text=text, target_lang=target_lang, source_lang=source_lang) | |
| 75 | - return result if result is not None else text | |
| 76 | - | |
| 77 | - def translate_multi( | |
| 78 | - self, | |
| 79 | - text: str, | |
| 80 | - target_langs: List[str], | |
| 81 | - source_lang: Optional[str] = None, | |
| 82 | - context: Optional[str] = None, | |
| 83 | - async_mode: bool = True, | |
| 84 | - prompt: Optional[str] = None, | |
| 85 | - ) -> Dict[str, Optional[str]]: | |
| 86 | - del context, async_mode, prompt | |
| 87 | - out: Dict[str, Optional[str]] = {} | |
| 88 | - for lang in target_langs: | |
| 89 | - out[lang] = self.translate(text, lang, source_lang=source_lang) | |
| 90 | - return out | |
| 91 | - | |
| 92 | - def translate_multi_async( | |
| 93 | - self, | |
| 94 | - text: str, | |
| 95 | - target_langs: List[str], | |
| 96 | - source_lang: Optional[str] = None, | |
| 97 | - context: Optional[str] = None, | |
| 98 | - prompt: Optional[str] = None, | |
| 99 | - ) -> Dict[str, Union[str, Future]]: | |
| 100 | - del context, prompt | |
| 101 | - out: Dict[str, Union[str, Future]] = {} | |
| 102 | - for lang in target_langs: | |
| 103 | - out[lang] = self.executor.submit(self.translate, text, lang, source_lang) | |
| 104 | - return out | |
| 105 | - | |
| 106 | - def translate_for_indexing( | |
| 107 | - self, | |
| 108 | - text: str, | |
| 109 | - shop_language: str, | |
| 110 | - source_lang: Optional[str] = None, | |
| 111 | - context: Optional[str] = None, | |
| 112 | - prompt: Optional[str] = None, | |
| 113 | - index_languages: Optional[List[str]] = None, | |
| 114 | - ) -> Dict[str, Optional[str]]: | |
| 115 | - del context, prompt | |
| 116 | - langs = index_languages if index_languages else ["en", "zh"] | |
| 117 | - source = source_lang or shop_language or "auto" | |
| 118 | - out: Dict[str, Optional[str]] = {} | |
| 119 | - for lang in langs: | |
| 120 | - if lang == shop_language: | |
| 121 | - out[lang] = text | |
| 122 | - else: | |
| 123 | - out[lang] = self.translate(text, target_lang=lang, source_lang=source) | |
| 124 | - return out | |
| 125 | - | |
| 126 | - | |
| 127 | -def create_translation_provider(query_config: Any = None) -> Any: | |
| 128 | - """ | |
| 129 | - Create translation provider from services config. | |
| 130 | - | |
| 131 | - query_config: optional, for api_key/glossary_id/context (used by direct provider). | |
| 132 | - """ | |
| 133 | - cfg = get_translation_config() | |
| 134 | - provider = cfg.provider | |
| 135 | - pc = cfg.get_provider_cfg() | |
| 136 | - | |
| 137 | - if provider in ("direct", "local", "inprocess"): | |
| 138 | - from query.translator import Translator | |
| 139 | - model = pc.get("model") or "qwen" | |
| 140 | - qc = query_config or _empty_query_config() | |
| 141 | - return Translator( | |
| 142 | - model=model, | |
| 143 | - api_key=getattr(qc, "translation_api_key", None), | |
| 144 | - use_cache=True, | |
| 145 | - glossary_id=getattr(qc, "translation_glossary_id", None), | |
| 146 | - translation_context=getattr(qc, "translation_context", "e-commerce product search"), | |
| 147 | - ) | |
| 148 | - | |
| 149 | - if provider in ("http", "service"): | |
| 150 | - base_url = get_translation_base_url() | |
| 151 | - model = pc.get("model") or "qwen" | |
| 152 | - timeout = pc.get("timeout_sec", 10.0) | |
| 153 | - qc = query_config or _empty_query_config() | |
| 154 | - return HttpTranslationProvider( | |
| 155 | - base_url=base_url, | |
| 156 | - model=model, | |
| 157 | - timeout_sec=float(timeout), | |
| 158 | - translation_context=getattr(qc, "translation_context", "e-commerce product search"), | |
| 159 | - ) | |
| 160 | - | |
| 161 | - raise ValueError(f"Unsupported translation provider: {provider}") | |
| 162 | - | |
| 163 | - | |
| 164 | -def _empty_query_config() -> Any: | |
| 165 | - """Minimal object with default translation attrs.""" | |
| 166 | - class _QC: | |
| 167 | - translation_api_key = None | |
| 168 | - translation_glossary_id = None | |
| 169 | - translation_context = "e-commerce product search" | |
| 170 | - return _QC() |
query/translator.py
| ... | ... | @@ -5,6 +5,11 @@ Supports multiple translation models: |
| 5 | 5 | - Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model |
| 6 | 6 | - DeepL: DeepL API for high-quality translations |
| 7 | 7 | |
| 8 | +重要说明(Qwen 机翻限速): | |
| 9 | +- 当前默认使用的 `qwen-mt-flash` 为云端机翻模型,**官方限速较低,约 RPM=60(每分钟约 60 请求)** | |
| 10 | +- 在高并发场景必须依赖 Redis 翻译缓存与批量预热,避免在用户实时请求路径上直接打满 DashScope 限流 | |
| 11 | +- 若业务侧存在大规模离线翻译或更高吞吐需求,建议评估 DeepL 或自建翻译后端 | |
| 12 | + | |
| 8 | 13 | 使用方法 (Usage): |
| 9 | 14 | |
| 10 | 15 | ```python | ... | ... |
reranker/backends/batching_utils.py deleted
| ... | ... | @@ -1,41 +0,0 @@ |
| 1 | -"""Utilities for reranker batching and deduplication.""" | |
| 2 | - | |
| 3 | -from __future__ import annotations | |
| 4 | - | |
| 5 | -from typing import Iterable, List, Sequence, Tuple | |
| 6 | - | |
| 7 | - | |
| 8 | -def deduplicate_with_positions(texts: Sequence[str]) -> Tuple[List[str], List[int]]: | |
| 9 | - """ | |
| 10 | - Deduplicate texts globally while preserving first-seen order. | |
| 11 | - | |
| 12 | - Returns: | |
| 13 | - unique_texts: deduplicated texts in first-seen order | |
| 14 | - position_to_unique: mapping from each original position to unique index | |
| 15 | - """ | |
| 16 | - unique_texts: List[str] = [] | |
| 17 | - position_to_unique: List[int] = [] | |
| 18 | - seen: dict[str, int] = {} | |
| 19 | - | |
| 20 | - for text in texts: | |
| 21 | - idx = seen.get(text) | |
| 22 | - if idx is None: | |
| 23 | - idx = len(unique_texts) | |
| 24 | - seen[text] = idx | |
| 25 | - unique_texts.append(text) | |
| 26 | - position_to_unique.append(idx) | |
| 27 | - | |
| 28 | - return unique_texts, position_to_unique | |
| 29 | - | |
| 30 | - | |
| 31 | -def sort_indices_by_length(lengths: Sequence[int]) -> List[int]: | |
| 32 | - """Return stable ascending indices by lengths.""" | |
| 33 | - return sorted(range(len(lengths)), key=lambda i: lengths[i]) | |
| 34 | - | |
| 35 | - | |
| 36 | -def iter_batches(indices: Sequence[int], batch_size: int) -> Iterable[List[int]]: | |
| 37 | - """Yield consecutive batches from indices.""" | |
| 38 | - if batch_size <= 0: | |
| 39 | - raise ValueError(f"batch_size must be > 0, got {batch_size}") | |
| 40 | - for i in range(0, len(indices), batch_size): | |
| 41 | - yield list(indices[i : i + batch_size]) |
reranker/backends/dashscope_rerank.py
| ... | ... | @@ -21,11 +21,33 @@ from typing import Any, Dict, List, Tuple |
| 21 | 21 | from urllib import error as urllib_error |
| 22 | 22 | from urllib import request as urllib_request |
| 23 | 23 | |
| 24 | -from reranker.backends.batching_utils import deduplicate_with_positions, iter_batches | |
| 25 | 24 | |
| 26 | 25 | logger = logging.getLogger("reranker.backends.dashscope_rerank") |
| 27 | 26 | |
| 28 | 27 | |
| 28 | +def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]: | |
| 29 | + """ | |
| 30 | + Deduplicate texts globally while preserving first-seen order. | |
| 31 | + | |
| 32 | + Returns: | |
| 33 | + unique_texts: deduplicated texts in first-seen order | |
| 34 | + position_to_unique: mapping from each original position to unique index | |
| 35 | + """ | |
| 36 | + unique_texts: List[str] = [] | |
| 37 | + position_to_unique: List[int] = [] | |
| 38 | + seen: Dict[str, int] = {} | |
| 39 | + | |
| 40 | + for text in texts: | |
| 41 | + idx = seen.get(text) | |
| 42 | + if idx is None: | |
| 43 | + idx = len(unique_texts) | |
| 44 | + seen[text] = idx | |
| 45 | + unique_texts.append(text) | |
| 46 | + position_to_unique.append(idx) | |
| 47 | + | |
| 48 | + return unique_texts, position_to_unique | |
| 49 | + | |
| 50 | + | |
| 29 | 51 | class DashScopeRerankBackend: |
| 30 | 52 | """ |
| 31 | 53 | DashScope cloud reranker backend. |
| ... | ... | @@ -206,7 +228,10 @@ class DashScopeRerankBackend: |
| 206 | 228 | then apply global top_n/top_n_cap truncation after merge if needed. |
| 207 | 229 | """ |
| 208 | 230 | indices = list(range(len(unique_texts))) |
| 209 | - batches = list(iter_batches(indices, batch_size=self._batchsize)) | |
| 231 | + batches = [ | |
| 232 | + indices[i : i + self._batchsize] | |
| 233 | + for i in range(0, len(indices), self._batchsize) | |
| 234 | + ] | |
| 210 | 235 | num_batches = len(batches) |
| 211 | 236 | max_workers = min(8, num_batches) if num_batches > 0 else 1 |
| 212 | 237 | unique_scores: List[float] = [0.0] * len(unique_texts) | ... | ... |
reranker/backends/qwen3_vllm.py
| ... | ... | @@ -14,12 +14,6 @@ import threading |
| 14 | 14 | import time |
| 15 | 15 | from typing import Any, Dict, List, Tuple |
| 16 | 16 | |
| 17 | -from reranker.backends.batching_utils import ( | |
| 18 | - deduplicate_with_positions, | |
| 19 | - iter_batches, | |
| 20 | - sort_indices_by_length, | |
| 21 | -) | |
| 22 | - | |
| 23 | 17 | logger = logging.getLogger("reranker.backends.qwen3_vllm") |
| 24 | 18 | |
| 25 | 19 | try: |
| ... | ... | @@ -34,6 +28,29 @@ except ImportError as e: |
| 34 | 28 | ) from e |
| 35 | 29 | |
| 36 | 30 | |
| 31 | +def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]: | |
| 32 | + """ | |
| 33 | + Deduplicate texts globally while preserving first-seen order. | |
| 34 | + | |
| 35 | + Returns: | |
| 36 | + unique_texts: deduplicated texts in first-seen order | |
| 37 | + position_to_unique: mapping from each original position to unique index | |
| 38 | + """ | |
| 39 | + unique_texts: List[str] = [] | |
| 40 | + position_to_unique: List[int] = [] | |
| 41 | + seen: Dict[str, int] = {} | |
| 42 | + | |
| 43 | + for text in texts: | |
| 44 | + idx = seen.get(text) | |
| 45 | + if idx is None: | |
| 46 | + idx = len(unique_texts) | |
| 47 | + seen[text] = idx | |
| 48 | + unique_texts.append(text) | |
| 49 | + position_to_unique.append(idx) | |
| 50 | + | |
| 51 | + return unique_texts, position_to_unique | |
| 52 | + | |
| 53 | + | |
| 37 | 54 | def _format_instruction(instruction: str, query: str, doc: str) -> List[Dict[str, str]]: |
| 38 | 55 | """Build chat messages for one (query, doc) pair.""" |
| 39 | 56 | return [ |
| ... | ... | @@ -71,19 +88,17 @@ class Qwen3VLLMRerankerBackend: |
| 71 | 88 | sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH") |
| 72 | 89 | if sort_by_doc_length is None: |
| 73 | 90 | sort_by_doc_length = self._config.get("sort_by_doc_length", True) |
| 74 | - length_sort_mode = os.getenv("RERANK_VLLM_LENGTH_SORT_MODE") or self._config.get("length_sort_mode", "char") | |
| 75 | 91 | |
| 76 | 92 | self._infer_batch_size = int(infer_batch_size) |
| 77 | 93 | self._sort_by_doc_length = str(sort_by_doc_length).strip().lower() in {"1", "true", "yes", "y", "on"} |
| 78 | - self._length_sort_mode = str(length_sort_mode).strip().lower() | |
| 79 | 94 | if not torch.cuda.is_available(): |
| 80 | 95 | raise RuntimeError("qwen3_vllm backend requires CUDA GPU, but torch.cuda.is_available() is False") |
| 81 | 96 | if dtype not in {"float16", "half", "auto"}: |
| 82 | 97 | raise ValueError(f"Unsupported dtype for qwen3_vllm: {dtype!r}. Use float16/half/auto.") |
| 83 | 98 | if self._infer_batch_size <= 0: |
| 84 | - raise ValueError(f"infer_batch_size must be > 0, got {self._infer_batch_size}") | |
| 85 | - if self._length_sort_mode not in {"char", "token"}: | |
| 86 | - raise ValueError(f"length_sort_mode must be 'char' or 'token', got {self._length_sort_mode!r}") | |
| 99 | + raise ValueError( | |
| 100 | + f"infer_batch_size must be > 0, got {self._infer_batch_size}" | |
| 101 | + ) | |
| 87 | 102 | |
| 88 | 103 | logger.info( |
| 89 | 104 | "[Qwen3_VLLM] Loading model %s (max_model_len=%s, tp=%s, gpu_mem=%.2f, dtype=%s, prefix_caching=%s)", |
| ... | ... | @@ -196,21 +211,7 @@ class Qwen3VLLMRerankerBackend: |
| 196 | 211 | """ |
| 197 | 212 | if not docs: |
| 198 | 213 | return [] |
| 199 | - if self._length_sort_mode == "char": | |
| 200 | - return [len(text) for text in docs] | |
| 201 | - try: | |
| 202 | - enc = self._tokenizer( | |
| 203 | - docs, | |
| 204 | - add_special_tokens=False, | |
| 205 | - truncation=True, | |
| 206 | - max_length=self._max_prompt_len, | |
| 207 | - return_length=True, | |
| 208 | - ) | |
| 209 | - lengths = enc.get("length") | |
| 210 | - if isinstance(lengths, list) and len(lengths) == len(docs): | |
| 211 | - return [int(x) for x in lengths] | |
| 212 | - except Exception as exc: | |
| 213 | - logger.debug("Length estimation fallback to char length: %s", exc) | |
| 214 | + # Use simple character length to approximate document length. | |
| 214 | 215 | return [len(text) for text in docs] |
| 215 | 216 | |
| 216 | 217 | def score_with_meta( |
| ... | ... | @@ -247,7 +248,6 @@ class Qwen3VLLMRerankerBackend: |
| 247 | 248 | "infer_batch_size": self._infer_batch_size, |
| 248 | 249 | "inference_batches": 0, |
| 249 | 250 | "sort_by_doc_length": self._sort_by_doc_length, |
| 250 | - "length_sort_mode": self._length_sort_mode, | |
| 251 | 251 | } |
| 252 | 252 | |
| 253 | 253 | # Deduplicate globally by text, keep mapping to original indices. |
| ... | ... | @@ -257,11 +257,12 @@ class Qwen3VLLMRerankerBackend: |
| 257 | 257 | lengths = self._estimate_doc_lengths(unique_texts) |
| 258 | 258 | order = list(range(len(unique_texts))) |
| 259 | 259 | if self._sort_by_doc_length and len(unique_texts) > 1: |
| 260 | - order = sort_indices_by_length(lengths) | |
| 260 | + order = sorted(order, key=lambda i: lengths[i]) | |
| 261 | 261 | |
| 262 | 262 | unique_scores: List[float] = [0.0] * len(unique_texts) |
| 263 | 263 | inference_batches = 0 |
| 264 | - for batch_indices in iter_batches(order, self._infer_batch_size): | |
| 264 | + for start in range(0, len(order), self._infer_batch_size): | |
| 265 | + batch_indices = order[start : start + self._infer_batch_size] | |
| 265 | 266 | inference_batches += 1 |
| 266 | 267 | pairs = [(query, unique_texts[i]) for i in batch_indices] |
| 267 | 268 | prompts = self._process_inputs(pairs) | ... | ... |
scripts/service_ctl.sh
| ... | ... | @@ -684,6 +684,7 @@ status_one() { |
| 684 | 684 | local pid_info="-" |
| 685 | 685 | local health="down" |
| 686 | 686 | local health_body="" |
| 687 | + local curl_timeout_opts=(--connect-timeout 8 --max-time 8) | |
| 687 | 688 | |
| 688 | 689 | if [ "${service}" = "tei" ]; then |
| 689 | 690 | local cid |
| ... | ... | @@ -696,7 +697,7 @@ status_one() { |
| 696 | 697 | local path |
| 697 | 698 | path="$(health_path_for_service "${service}")" |
| 698 | 699 | if [ -n "${port}" ] && [ -n "${path}" ]; then |
| 699 | - if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then | |
| 700 | + if health_body="$(curl -fsS "${curl_timeout_opts[@]}" "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then | |
| 700 | 701 | health="ok" |
| 701 | 702 | else |
| 702 | 703 | health="fail" |
| ... | ... | @@ -723,7 +724,7 @@ status_one() { |
| 723 | 724 | local path |
| 724 | 725 | path="$(health_path_for_service "${service}")" |
| 725 | 726 | if [ -n "${port}" ] && [ -n "${path}" ]; then |
| 726 | - if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then | |
| 727 | + if health_body="$(curl -fsS "${curl_timeout_opts[@]}" "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then | |
| 727 | 728 | health="ok" |
| 728 | 729 | else |
| 729 | 730 | health="fail" | ... | ... |
search/searcher.py
| ... | ... | @@ -598,7 +598,6 @@ class Searcher: |
| 598 | 598 | es_hits = [] |
| 599 | 599 | if 'hits' in es_response and 'hits' in es_response['hits']: |
| 600 | 600 | es_hits = es_response['hits']['hits'] |
| 601 | - | |
| 602 | 601 | # Extract total and max_score |
| 603 | 602 | total = es_response.get('hits', {}).get('total', {}) |
| 604 | 603 | if isinstance(total, dict): |
| ... | ... | @@ -616,6 +615,37 @@ class Searcher: |
| 616 | 615 | sku_filter_dimension=sku_filter_dimension |
| 617 | 616 | ) |
| 618 | 617 | |
| 618 | + # Build per-result debug info (per SPU) when debug mode is enabled | |
| 619 | + per_result_debug = [] | |
| 620 | + if debug and es_hits and formatted_results: | |
| 621 | + for hit, spu in zip(es_hits, formatted_results): | |
| 622 | + source = hit.get("_source", {}) or {} | |
| 623 | + raw_score = hit.get("_score") | |
| 624 | + try: | |
| 625 | + es_score = float(raw_score) if raw_score is not None else 0.0 | |
| 626 | + except (TypeError, ValueError): | |
| 627 | + es_score = 0.0 | |
| 628 | + try: | |
| 629 | + normalized = float(es_score) / float(max_score) if max_score else None | |
| 630 | + except (TypeError, ValueError, ZeroDivisionError): | |
| 631 | + normalized = None | |
| 632 | + | |
| 633 | + title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None | |
| 634 | + brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None | |
| 635 | + vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None | |
| 636 | + | |
| 637 | + per_result_debug.append( | |
| 638 | + { | |
| 639 | + "spu_id": spu.spu_id, | |
| 640 | + "es_id": hit.get("_id"), | |
| 641 | + "es_score": es_score, | |
| 642 | + "es_score_normalized": normalized, | |
| 643 | + "title_multilingual": title_multilingual, | |
| 644 | + "brief_multilingual": brief_multilingual, | |
| 645 | + "vendor_multilingual": vendor_multilingual, | |
| 646 | + } | |
| 647 | + ) | |
| 648 | + | |
| 619 | 649 | # Format facets |
| 620 | 650 | standardized_facets = None |
| 621 | 651 | if facets: |
| ... | ... | @@ -676,6 +706,8 @@ class Searcher: |
| 676 | 706 | }, |
| 677 | 707 | "search_params": context.metadata.get('search_params', {}) |
| 678 | 708 | } |
| 709 | + if per_result_debug: | |
| 710 | + debug_info["per_result"] = per_result_debug | |
| 679 | 711 | |
| 680 | 712 | # Build result |
| 681 | 713 | result = SearchResult( | ... | ... |
tests/test_reranker_batching_utils.py deleted
| ... | ... | @@ -1,32 +0,0 @@ |
| 1 | -import pytest | |
| 2 | - | |
| 3 | -from reranker.backends.batching_utils import ( | |
| 4 | - deduplicate_with_positions, | |
| 5 | - iter_batches, | |
| 6 | - sort_indices_by_length, | |
| 7 | -) | |
| 8 | - | |
| 9 | - | |
| 10 | -def test_deduplicate_with_positions_global_not_adjacent(): | |
| 11 | - texts = ["a", "b", "a", "c", "b", "a"] | |
| 12 | - unique, mapping = deduplicate_with_positions(texts) | |
| 13 | - assert unique == ["a", "b", "c"] | |
| 14 | - assert mapping == [0, 1, 0, 2, 1, 0] | |
| 15 | - | |
| 16 | - | |
| 17 | -def test_sort_indices_by_length_stable(): | |
| 18 | - lengths = [5, 2, 2, 9, 4] | |
| 19 | - order = sort_indices_by_length(lengths) | |
| 20 | - # Stable sort: index 1 remains ahead of index 2 when lengths are equal. | |
| 21 | - assert order == [1, 2, 4, 0, 3] | |
| 22 | - | |
| 23 | - | |
| 24 | -def test_iter_batches(): | |
| 25 | - indices = list(range(10)) | |
| 26 | - batches = list(iter_batches(indices, 4)) | |
| 27 | - assert batches == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]] | |
| 28 | - | |
| 29 | - | |
| 30 | -def test_iter_batches_invalid_batch_size(): | |
| 31 | - with pytest.raises(ValueError, match="batch_size must be > 0"): | |
| 32 | - list(iter_batches([0, 1], 0)) |