Commit af827ce958e53ce67a2f0ee48052d5830893aed0
1 parent
33f8f578
rerank
Showing
8 changed files
with
143 additions
and
19 deletions
Show diff stats
docs/TODO.txt
| 1 | 1 | ||
| 2 | 2 | ||
| 3 | + | ||
| 4 | + | ||
| 5 | + | ||
| 6 | + | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +翻译的health很慢 | ||
| 10 | + | ||
| 11 | + | ||
| 12 | + | ||
| 13 | + | ||
| 14 | +translator的设计 : | ||
| 15 | + | ||
| 16 | +QueryParser 里面 并不是调用的6006,目前是把6006做了一个provider,然后translate的总体配置又有6006的baseurl,很混乱!!! | ||
| 17 | + | ||
| 18 | +config.yaml 里面的 翻译的配置 不是“6006 专用配置”,而是搜索服务的 | ||
| 19 | +6006本来之前是做一个provider。 | ||
| 20 | +结果后面改造成了综合体,但是还没改完,改到一半发现之前的实现跟我的设计或者想法有偏差。 | ||
| 21 | + | ||
| 22 | + | ||
| 23 | +需要继续改完!!!!!!!! | ||
| 24 | + | ||
| 25 | + | ||
| 26 | +- `config.yaml` **不是“6006 专用配置”**,而是整个系统的 **统一 services 配置**,由 `config/services_config.py` 读取,**搜索 API 进程和翻译服务进程都会用到它**。 | ||
| 27 | +- 关键决定行为的是这一行: | ||
| 28 | + | ||
| 29 | +```yaml | ||
| 30 | +translation: | ||
| 31 | + provider: "llm" | ||
| 32 | +``` | ||
| 33 | + | ||
| 34 | +在当前配置下: | ||
| 35 | + | ||
| 36 | +- 搜索 API 进程里,`QueryParser` 初始化翻译器时走的是: | ||
| 37 | + | ||
| 38 | +```python | ||
| 39 | +create_translation_provider(...) # provider == "llm" | ||
| 40 | +``` | ||
| 41 | + | ||
| 42 | +进而返回的是 `LLMTranslatorProvider`(本进程内调用),**不会走 `base_url`,也不会走 6006 端口**。 | ||
| 43 | +- `base_url: "http://127.0.0.1:6006"` 只在 `provider: "http"` / `"service"` 时被 `HttpTranslationProvider` 使用;在 `provider: "llm"` 时,这个字段对 `QueryParser` 是完全被忽略的。 | ||
| 44 | + | ||
| 45 | +所以现在的实际情况是: | ||
| 46 | + | ||
| 47 | +- **QueryParser 中的翻译是“本进程直连 LLM API”**,所以日志在搜索后端自己的日志文件里。 | ||
| 48 | +- 如果你希望「QueryParser 永远通过 6006 端口的翻译服务」,需要把 provider 改成 HTTP: | ||
| 49 | + | ||
| 50 | +```yaml | ||
| 51 | +translation: | ||
| 52 | + provider: "http" # ← 改成 http 或 service | ||
| 53 | + cache: ... | ||
| 54 | + providers: | ||
| 55 | + http: | ||
| 56 | + base_url: "http://127.0.0.1:6006" | ||
| 57 | + model: "llm" # 或 "qwen-mt-flush",看你想用哪个 | ||
| 58 | + timeout_sec: 10.0 | ||
| 59 | + llm: | ||
| 60 | + model: "qwen-flash" # 留给翻译服务自身内部使用 | ||
| 61 | + qwen-mt: ... | ||
| 62 | + deepl: ... | ||
| 63 | +``` | ||
| 64 | + | ||
| 65 | + | ||
| 66 | + | ||
| 67 | + | ||
| 68 | + | ||
| 69 | + | ||
| 70 | + | ||
| 71 | + | ||
| 72 | + | ||
| 73 | + | ||
| 74 | + | ||
| 75 | + | ||
| 76 | +查看翻译的缓存情况 | ||
| 77 | + | ||
| 78 | +向量的缓存 | ||
| 79 | + | ||
| 80 | + | ||
| 81 | + | ||
| 82 | + | ||
| 83 | + | ||
| 84 | + | ||
| 85 | + | ||
| 86 | + | ||
| 3 | AI - 生产 - MySQL | 87 | AI - 生产 - MySQL |
| 4 | HOST:10.200.16.14 / localhost | 88 | HOST:10.200.16.14 / localhost |
| 5 | 端口:3316 | 89 | 端口:3316 |
frontend/static/js/app.js
| @@ -367,6 +367,18 @@ function displayResults(data) { | @@ -367,6 +367,18 @@ function displayResults(data) { | ||
| 367 | ? debug.es_score_normalized.toFixed(4) | 367 | ? debug.es_score_normalized.toFixed(4) |
| 368 | : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); | 368 | : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); |
| 369 | 369 | ||
| 370 | + const esNormRerank = typeof debug.es_score_norm === 'number' | ||
| 371 | + ? debug.es_score_norm.toFixed(4) | ||
| 372 | + : (debug.es_score_norm == null ? '' : String(debug.es_score_norm)); | ||
| 373 | + | ||
| 374 | + const rerankScore = typeof debug.rerank_score === 'number' | ||
| 375 | + ? debug.rerank_score.toFixed(4) | ||
| 376 | + : (debug.rerank_score == null ? '' : String(debug.rerank_score)); | ||
| 377 | + | ||
| 378 | + const fusedScore = typeof debug.fused_score === 'number' | ||
| 379 | + ? debug.fused_score.toFixed(4) | ||
| 380 | + : (debug.fused_score == null ? '' : String(debug.fused_score)); | ||
| 381 | + | ||
| 370 | // Build multilingual title info | 382 | // Build multilingual title info |
| 371 | let titleLines = ''; | 383 | let titleLines = ''; |
| 372 | if (debug.title_multilingual && typeof debug.title_multilingual === 'object') { | 384 | if (debug.title_multilingual && typeof debug.title_multilingual === 'object') { |
| @@ -385,6 +397,9 @@ function displayResults(data) { | @@ -385,6 +397,9 @@ function displayResults(data) { | ||
| 385 | <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> | 397 | <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> |
| 386 | <div class="product-debug-line">ES score: ${esScore}</div> | 398 | <div class="product-debug-line">ES score: ${esScore}</div> |
| 387 | <div class="product-debug-line">ES normalized: ${esNorm}</div> | 399 | <div class="product-debug-line">ES normalized: ${esNorm}</div> |
| 400 | + <div class="product-debug-line">ES norm (rerank input): ${esNormRerank}</div> | ||
| 401 | + <div class="product-debug-line">Rerank score: ${rerankScore}</div> | ||
| 402 | + <div class="product-debug-line">Fused score: ${fusedScore}</div> | ||
| 388 | ${titleLines} | 403 | ${titleLines} |
| 389 | <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | 404 | <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> |
| 390 | 查看 ES 原始文档 | 405 | 查看 ES 原始文档 |
indexer/document_transformer.py
| @@ -345,7 +345,7 @@ class SPUDocumentTransformer: | @@ -345,7 +345,7 @@ class SPUDocumentTransformer: | ||
| 345 | text=title_text, | 345 | text=title_text, |
| 346 | source_lang=primary_lang, | 346 | source_lang=primary_lang, |
| 347 | index_languages=index_langs, | 347 | index_languages=index_langs, |
| 348 | - scene="product_title", | 348 | + scene="sku_name", |
| 349 | ) | 349 | ) |
| 350 | _set_lang_obj("title", title_text, translations) | 350 | _set_lang_obj("title", title_text, translations) |
| 351 | 351 |
reranker/DEPLOYMENT_AND_TUNING.md
| @@ -144,7 +144,6 @@ curl -sS http://127.0.0.1:6007/health | @@ -144,7 +144,6 @@ curl -sS http://127.0.0.1:6007/health | ||
| 144 | 144 | ||
| 145 | - `RERANK_VLLM_INFER_BATCH_SIZE` | 145 | - `RERANK_VLLM_INFER_BATCH_SIZE` |
| 146 | - `RERANK_VLLM_SORT_BY_DOC_LENGTH` | 146 | - `RERANK_VLLM_SORT_BY_DOC_LENGTH` |
| 147 | -- `RERANK_VLLM_LENGTH_SORT_MODE` | ||
| 148 | 147 | ||
| 149 | ## 6. 本轮关键结论(2026-03-11) | 148 | ## 6. 本轮关键结论(2026-03-11) |
| 150 | 149 |
reranker/README.md
| @@ -158,6 +158,6 @@ uvicorn reranker.server:app --host 0.0.0.0 --port 6007 --log-level info | @@ -158,6 +158,6 @@ uvicorn reranker.server:app --host 0.0.0.0 --port 6007 --log-level info | ||
| 158 | - 无请求级缓存;输入按字符串去重后推理,再按原始顺序回填分数。 | 158 | - 无请求级缓存;输入按字符串去重后推理,再按原始顺序回填分数。 |
| 159 | - 空或 null 的 doc 跳过并计为 0。 | 159 | - 空或 null 的 doc 跳过并计为 0。 |
| 160 | - **Qwen3-vLLM 分批策略**:`docs` 请求体可为 1000+,服务端会按 `infer_batch_size` 拆分;当 `sort_by_doc_length=true` 时,会先按文档长度排序后分批,减少 padding 开销,最终再按输入顺序回填分数。`length_sort_mode` 支持 `char`(默认,更快)与 `token`(更精确)。 | 160 | - **Qwen3-vLLM 分批策略**:`docs` 请求体可为 1000+,服务端会按 `infer_batch_size` 拆分;当 `sort_by_doc_length=true` 时,会先按文档长度排序后分批,减少 padding 开销,最终再按输入顺序回填分数。`length_sort_mode` 支持 `char`(默认,更快)与 `token`(更精确)。 |
| 161 | -- 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`、`RERANK_VLLM_LENGTH_SORT_MODE`。 | 161 | +- 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`。 |
| 162 | - **Qwen3-vLLM**:参考 [Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B),需 GPU 与较多显存;与 BGE 相比适合长文本、高吞吐场景(vLLM 前缀缓存)。 | 162 | - **Qwen3-vLLM**:参考 [Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B),需 GPU 与较多显存;与 BGE 相比适合长文本、高吞吐场景(vLLM 前缀缓存)。 |
| 163 | - **Qwen3-Transformers**:官方 Transformers Usage 方式,无需 vLLM;适合 CPU 或小显存,可选 `attn_implementation: "flash_attention_2"` 加速。 | 163 | - **Qwen3-Transformers**:官方 Transformers Usage 方式,无需 vLLM;适合 CPU 或小显存,可选 `attn_implementation: "flash_attention_2"` 加速。 |
reranker/backends/qwen3_vllm.py
| @@ -295,7 +295,6 @@ class Qwen3VLLMRerankerBackend: | @@ -295,7 +295,6 @@ class Qwen3VLLMRerankerBackend: | ||
| 295 | "normalize": normalize, | 295 | "normalize": normalize, |
| 296 | "infer_batch_size": self._infer_batch_size, | 296 | "infer_batch_size": self._infer_batch_size, |
| 297 | "inference_batches": inference_batches, | 297 | "inference_batches": inference_batches, |
| 298 | - "sort_by_doc_length": self._sort_by_doc_length, | ||
| 299 | - "length_sort_mode": self._length_sort_mode, | 298 | + "sort_by_doc_length": self._sort_by_doc_length |
| 300 | } | 299 | } |
| 301 | return output_scores, meta | 300 | return output_scores, meta |
search/rerank_client.py
| @@ -103,13 +103,12 @@ def fuse_scores_and_resort( | @@ -103,13 +103,12 @@ def fuse_scores_and_resort( | ||
| 103 | weight_ai: float = DEFAULT_WEIGHT_AI, | 103 | weight_ai: float = DEFAULT_WEIGHT_AI, |
| 104 | ) -> List[Dict[str, Any]]: | 104 | ) -> List[Dict[str, Any]]: |
| 105 | """ | 105 | """ |
| 106 | - 将 ES 分数与重排分数线性融合,写回每条 hit 的 _score,并按融合分数降序重排。 | 106 | + 将 ES 分数与重排分数线性融合(不修改原始 _score),并按融合分数降序重排。 |
| 107 | 107 | ||
| 108 | 对每条 hit 会写入: | 108 | 对每条 hit 会写入: |
| 109 | - _original_score: 原始 ES 分数 | 109 | - _original_score: 原始 ES 分数 |
| 110 | - _rerank_score: 重排服务返回的分数 | 110 | - _rerank_score: 重排服务返回的分数 |
| 111 | - _fused_score: 融合分数 | 111 | - _fused_score: 融合分数 |
| 112 | - - _score: 置为融合分数(供后续 ResultFormatter 使用) | ||
| 113 | 112 | ||
| 114 | Args: | 113 | Args: |
| 115 | es_hits: ES hits 列表(会被原地修改) | 114 | es_hits: ES hits 列表(会被原地修改) |
| @@ -150,7 +149,6 @@ def fuse_scores_and_resort( | @@ -150,7 +149,6 @@ def fuse_scores_and_resort( | ||
| 150 | hit["_original_score"] = hit.get("_score") | 149 | hit["_original_score"] = hit.get("_score") |
| 151 | hit["_rerank_score"] = rerank_score | 150 | hit["_rerank_score"] = rerank_score |
| 152 | hit["_fused_score"] = fused | 151 | hit["_fused_score"] = fused |
| 153 | - hit["_score"] = fused | ||
| 154 | 152 | ||
| 155 | fused_debug.append({ | 153 | fused_debug.append({ |
| 156 | "doc_id": hit.get("_id"), | 154 | "doc_id": hit.get("_id"), |
search/searcher.py
| @@ -540,7 +540,11 @@ class Searcher: | @@ -540,7 +540,11 @@ class Searcher: | ||
| 540 | sliced = hits[from_ : from_ + size] | 540 | sliced = hits[from_ : from_ + size] |
| 541 | es_response.setdefault("hits", {})["hits"] = sliced | 541 | es_response.setdefault("hits", {})["hits"] = sliced |
| 542 | if sliced: | 542 | if sliced: |
| 543 | - slice_max = max((h.get("_score") for h in sliced), default=0.0) | 543 | + # 对于启用重排的结果,优先使用 _fused_score 计算 max_score;否则退回原始 _score |
| 544 | + slice_max = max( | ||
| 545 | + (h.get("_fused_score", h.get("_score", 0.0)) for h in sliced), | ||
| 546 | + default=0.0, | ||
| 547 | + ) | ||
| 544 | try: | 548 | try: |
| 545 | es_response["hits"]["max_score"] = float(slice_max) | 549 | es_response["hits"]["max_score"] = float(slice_max) |
| 546 | except (TypeError, ValueError): | 550 | except (TypeError, ValueError): |
| @@ -607,6 +611,18 @@ class Searcher: | @@ -607,6 +611,18 @@ class Searcher: | ||
| 607 | # max_score 会在启用 AI 搜索时被更新为融合分数的最大值 | 611 | # max_score 会在启用 AI 搜索时被更新为融合分数的最大值 |
| 608 | max_score = es_response.get('hits', {}).get('max_score') or 0.0 | 612 | max_score = es_response.get('hits', {}).get('max_score') or 0.0 |
| 609 | 613 | ||
| 614 | + # 从上下文中取出重排调试信息(若有) | ||
| 615 | + rerank_debug_raw = context.get_intermediate_result('rerank_scores', None) | ||
| 616 | + rerank_debug_by_doc: Dict[str, Dict[str, Any]] = {} | ||
| 617 | + if isinstance(rerank_debug_raw, list): | ||
| 618 | + for item in rerank_debug_raw: | ||
| 619 | + if not isinstance(item, dict): | ||
| 620 | + continue | ||
| 621 | + doc_id = item.get("doc_id") | ||
| 622 | + if doc_id is None: | ||
| 623 | + continue | ||
| 624 | + rerank_debug_by_doc[str(doc_id)] = item | ||
| 625 | + | ||
| 610 | # Format results using ResultFormatter | 626 | # Format results using ResultFormatter |
| 611 | formatted_results = ResultFormatter.format_search_results( | 627 | formatted_results = ResultFormatter.format_search_results( |
| 612 | es_hits, | 628 | es_hits, |
| @@ -620,6 +636,11 @@ class Searcher: | @@ -620,6 +636,11 @@ class Searcher: | ||
| 620 | if debug and es_hits and formatted_results: | 636 | if debug and es_hits and formatted_results: |
| 621 | for hit, spu in zip(es_hits, formatted_results): | 637 | for hit, spu in zip(es_hits, formatted_results): |
| 622 | source = hit.get("_source", {}) or {} | 638 | source = hit.get("_source", {}) or {} |
| 639 | + doc_id = hit.get("_id") | ||
| 640 | + rerank_debug = None | ||
| 641 | + if doc_id is not None: | ||
| 642 | + rerank_debug = rerank_debug_by_doc.get(str(doc_id)) | ||
| 643 | + | ||
| 623 | raw_score = hit.get("_score") | 644 | raw_score = hit.get("_score") |
| 624 | try: | 645 | try: |
| 625 | es_score = float(raw_score) if raw_score is not None else 0.0 | 646 | es_score = float(raw_score) if raw_score is not None else 0.0 |
| @@ -634,16 +655,24 @@ class Searcher: | @@ -634,16 +655,24 @@ class Searcher: | ||
| 634 | brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None | 655 | brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None |
| 635 | vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None | 656 | vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None |
| 636 | 657 | ||
| 637 | - per_result_debug.append( | ||
| 638 | - { | ||
| 639 | - "spu_id": spu.spu_id, | ||
| 640 | - "es_score": es_score, | ||
| 641 | - "es_score_normalized": normalized, | ||
| 642 | - "title_multilingual": title_multilingual, | ||
| 643 | - "brief_multilingual": brief_multilingual, | ||
| 644 | - "vendor_multilingual": vendor_multilingual, | ||
| 645 | - } | ||
| 646 | - ) | 658 | + debug_entry: Dict[str, Any] = { |
| 659 | + "spu_id": spu.spu_id, | ||
| 660 | + "es_score": es_score, | ||
| 661 | + "es_score_normalized": normalized, | ||
| 662 | + "title_multilingual": title_multilingual, | ||
| 663 | + "brief_multilingual": brief_multilingual, | ||
| 664 | + "vendor_multilingual": vendor_multilingual, | ||
| 665 | + } | ||
| 666 | + | ||
| 667 | + # 若存在重排调试信息,则补充 doc 级别的融合分数信息 | ||
| 668 | + if rerank_debug: | ||
| 669 | + debug_entry["doc_id"] = rerank_debug.get("doc_id") | ||
| 670 | + # 与 rerank_client 中字段保持一致,便于前端直接使用 | ||
| 671 | + debug_entry["es_score_norm"] = rerank_debug.get("es_score_norm") | ||
| 672 | + debug_entry["rerank_score"] = rerank_debug.get("rerank_score") | ||
| 673 | + debug_entry["fused_score"] = rerank_debug.get("fused_score") | ||
| 674 | + | ||
| 675 | + per_result_debug.append(debug_entry) | ||
| 647 | 676 | ||
| 648 | # Format facets | 677 | # Format facets |
| 649 | standardized_facets = None | 678 | standardized_facets = None |