Commit af827ce958e53ce67a2f0ee48052d5830893aed0
1 parent
33f8f578
rerank
Showing
8 changed files
with
143 additions
and
19 deletions
Show diff stats
docs/TODO.txt
| 1 | 1 | |
| 2 | 2 | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | +翻译的health很慢 | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | +translator的设计 : | |
| 15 | + | |
| 16 | +QueryParser 里面 并不是调用的6006,目前是把6006做了一个provider,然后translate的总体配置又有6006的baseurl,很混乱!!! | |
| 17 | + | |
| 18 | +config.yaml 里面的 翻译的配置 不是“6006 专用配置”,而是搜索服务的 | |
| 19 | +6006本来之前是做一个provider。 | |
| 20 | +结果后面改造成了综合体,但是还没改完,改到一半发现之前的实现跟我的设计或者想法有偏差。 | |
| 21 | + | |
| 22 | + | |
| 23 | +需要继续改完!!!!!!!! | |
| 24 | + | |
| 25 | + | |
| 26 | +- `config.yaml` **不是“6006 专用配置”**,而是整个系统的 **统一 services 配置**,由 `config/services_config.py` 读取,**搜索 API 进程和翻译服务进程都会用到它**。 | |
| 27 | +- 关键决定行为的是这一行: | |
| 28 | + | |
| 29 | +```yaml | |
| 30 | +translation: | |
| 31 | + provider: "llm" | |
| 32 | +``` | |
| 33 | + | |
| 34 | +在当前配置下: | |
| 35 | + | |
| 36 | +- 搜索 API 进程里,`QueryParser` 初始化翻译器时走的是: | |
| 37 | + | |
| 38 | +```python | |
| 39 | +create_translation_provider(...) # provider == "llm" | |
| 40 | +``` | |
| 41 | + | |
| 42 | +进而返回的是 `LLMTranslatorProvider`(本进程内调用),**不会走 `base_url`,也不会走 6006 端口**。 | |
| 43 | +- `base_url: "http://127.0.0.1:6006"` 只在 `provider: "http"` / `"service"` 时被 `HttpTranslationProvider` 使用;在 `provider: "llm"` 时,这个字段对 `QueryParser` 是完全被忽略的。 | |
| 44 | + | |
| 45 | +所以现在的实际情况是: | |
| 46 | + | |
| 47 | +- **QueryParser 中的翻译是“本进程直连 LLM API”**,所以日志在搜索后端自己的日志文件里。 | |
| 48 | +- 如果你希望「QueryParser 永远通过 6006 端口的翻译服务」,需要把 provider 改成 HTTP: | |
| 49 | + | |
| 50 | +```yaml | |
| 51 | +translation: | |
| 52 | + provider: "http" # ← 改成 http 或 service | |
| 53 | + cache: ... | |
| 54 | + providers: | |
| 55 | + http: | |
| 56 | + base_url: "http://127.0.0.1:6006" | |
| 57 | + model: "llm" # 或 "qwen-mt-flush",看你想用哪个 | |
| 58 | + timeout_sec: 10.0 | |
| 59 | + llm: | |
| 60 | + model: "qwen-flash" # 留给翻译服务自身内部使用 | |
| 61 | + qwen-mt: ... | |
| 62 | + deepl: ... | |
| 63 | +``` | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | +查看翻译的缓存情况 | |
| 77 | + | |
| 78 | +向量的缓存 | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 3 | 87 | AI - 生产 - MySQL |
| 4 | 88 | HOST:10.200.16.14 / localhost |
| 5 | 89 | 端口:3316 | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -367,6 +367,18 @@ function displayResults(data) { |
| 367 | 367 | ? debug.es_score_normalized.toFixed(4) |
| 368 | 368 | : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); |
| 369 | 369 | |
| 370 | + const esNormRerank = typeof debug.es_score_norm === 'number' | |
| 371 | + ? debug.es_score_norm.toFixed(4) | |
| 372 | + : (debug.es_score_norm == null ? '' : String(debug.es_score_norm)); | |
| 373 | + | |
| 374 | + const rerankScore = typeof debug.rerank_score === 'number' | |
| 375 | + ? debug.rerank_score.toFixed(4) | |
| 376 | + : (debug.rerank_score == null ? '' : String(debug.rerank_score)); | |
| 377 | + | |
| 378 | + const fusedScore = typeof debug.fused_score === 'number' | |
| 379 | + ? debug.fused_score.toFixed(4) | |
| 380 | + : (debug.fused_score == null ? '' : String(debug.fused_score)); | |
| 381 | + | |
| 370 | 382 | // Build multilingual title info |
| 371 | 383 | let titleLines = ''; |
| 372 | 384 | if (debug.title_multilingual && typeof debug.title_multilingual === 'object') { |
| ... | ... | @@ -385,6 +397,9 @@ function displayResults(data) { |
| 385 | 397 | <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> |
| 386 | 398 | <div class="product-debug-line">ES score: ${esScore}</div> |
| 387 | 399 | <div class="product-debug-line">ES normalized: ${esNorm}</div> |
| 400 | + <div class="product-debug-line">ES norm (rerank input): ${esNormRerank}</div> | |
| 401 | + <div class="product-debug-line">Rerank score: ${rerankScore}</div> | |
| 402 | + <div class="product-debug-line">Fused score: ${fusedScore}</div> | |
| 388 | 403 | ${titleLines} |
| 389 | 404 | <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> |
| 390 | 405 | 查看 ES 原始文档 | ... | ... |
indexer/document_transformer.py
reranker/DEPLOYMENT_AND_TUNING.md
reranker/README.md
| ... | ... | @@ -158,6 +158,6 @@ uvicorn reranker.server:app --host 0.0.0.0 --port 6007 --log-level info |
| 158 | 158 | - 无请求级缓存;输入按字符串去重后推理,再按原始顺序回填分数。 |
| 159 | 159 | - 空或 null 的 doc 跳过并计为 0。 |
| 160 | 160 | - **Qwen3-vLLM 分批策略**:`docs` 请求体可为 1000+,服务端会按 `infer_batch_size` 拆分;当 `sort_by_doc_length=true` 时,会先按文档长度排序后分批,减少 padding 开销,最终再按输入顺序回填分数。`length_sort_mode` 支持 `char`(默认,更快)与 `token`(更精确)。 |
| 161 | -- 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`、`RERANK_VLLM_LENGTH_SORT_MODE`。 | |
| 161 | +- 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`。 | |
| 162 | 162 | - **Qwen3-vLLM**:参考 [Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B),需 GPU 与较多显存;与 BGE 相比适合长文本、高吞吐场景(vLLM 前缀缓存)。 |
| 163 | 163 | - **Qwen3-Transformers**:官方 Transformers Usage 方式,无需 vLLM;适合 CPU 或小显存,可选 `attn_implementation: "flash_attention_2"` 加速。 | ... | ... |
reranker/backends/qwen3_vllm.py
| ... | ... | @@ -295,7 +295,6 @@ class Qwen3VLLMRerankerBackend: |
| 295 | 295 | "normalize": normalize, |
| 296 | 296 | "infer_batch_size": self._infer_batch_size, |
| 297 | 297 | "inference_batches": inference_batches, |
| 298 | - "sort_by_doc_length": self._sort_by_doc_length, | |
| 299 | - "length_sort_mode": self._length_sort_mode, | |
| 298 | + "sort_by_doc_length": self._sort_by_doc_length | |
| 300 | 299 | } |
| 301 | 300 | return output_scores, meta | ... | ... |
search/rerank_client.py
| ... | ... | @@ -103,13 +103,12 @@ def fuse_scores_and_resort( |
| 103 | 103 | weight_ai: float = DEFAULT_WEIGHT_AI, |
| 104 | 104 | ) -> List[Dict[str, Any]]: |
| 105 | 105 | """ |
| 106 | - 将 ES 分数与重排分数线性融合,写回每条 hit 的 _score,并按融合分数降序重排。 | |
| 106 | + 将 ES 分数与重排分数线性融合(不修改原始 _score),并按融合分数降序重排。 | |
| 107 | 107 | |
| 108 | 108 | 对每条 hit 会写入: |
| 109 | 109 | - _original_score: 原始 ES 分数 |
| 110 | 110 | - _rerank_score: 重排服务返回的分数 |
| 111 | 111 | - _fused_score: 融合分数 |
| 112 | - - _score: 置为融合分数(供后续 ResultFormatter 使用) | |
| 113 | 112 | |
| 114 | 113 | Args: |
| 115 | 114 | es_hits: ES hits 列表(会被原地修改) |
| ... | ... | @@ -150,7 +149,6 @@ def fuse_scores_and_resort( |
| 150 | 149 | hit["_original_score"] = hit.get("_score") |
| 151 | 150 | hit["_rerank_score"] = rerank_score |
| 152 | 151 | hit["_fused_score"] = fused |
| 153 | - hit["_score"] = fused | |
| 154 | 152 | |
| 155 | 153 | fused_debug.append({ |
| 156 | 154 | "doc_id": hit.get("_id"), | ... | ... |
search/searcher.py
| ... | ... | @@ -540,7 +540,11 @@ class Searcher: |
| 540 | 540 | sliced = hits[from_ : from_ + size] |
| 541 | 541 | es_response.setdefault("hits", {})["hits"] = sliced |
| 542 | 542 | if sliced: |
| 543 | - slice_max = max((h.get("_score") for h in sliced), default=0.0) | |
| 543 | + # 对于启用重排的结果,优先使用 _fused_score 计算 max_score;否则退回原始 _score | |
| 544 | + slice_max = max( | |
| 545 | + (h.get("_fused_score", h.get("_score", 0.0)) for h in sliced), | |
| 546 | + default=0.0, | |
| 547 | + ) | |
| 544 | 548 | try: |
| 545 | 549 | es_response["hits"]["max_score"] = float(slice_max) |
| 546 | 550 | except (TypeError, ValueError): |
| ... | ... | @@ -607,6 +611,18 @@ class Searcher: |
| 607 | 611 | # max_score 会在启用 AI 搜索时被更新为融合分数的最大值 |
| 608 | 612 | max_score = es_response.get('hits', {}).get('max_score') or 0.0 |
| 609 | 613 | |
| 614 | + # 从上下文中取出重排调试信息(若有) | |
| 615 | + rerank_debug_raw = context.get_intermediate_result('rerank_scores', None) | |
| 616 | + rerank_debug_by_doc: Dict[str, Dict[str, Any]] = {} | |
| 617 | + if isinstance(rerank_debug_raw, list): | |
| 618 | + for item in rerank_debug_raw: | |
| 619 | + if not isinstance(item, dict): | |
| 620 | + continue | |
| 621 | + doc_id = item.get("doc_id") | |
| 622 | + if doc_id is None: | |
| 623 | + continue | |
| 624 | + rerank_debug_by_doc[str(doc_id)] = item | |
| 625 | + | |
| 610 | 626 | # Format results using ResultFormatter |
| 611 | 627 | formatted_results = ResultFormatter.format_search_results( |
| 612 | 628 | es_hits, |
| ... | ... | @@ -620,6 +636,11 @@ class Searcher: |
| 620 | 636 | if debug and es_hits and formatted_results: |
| 621 | 637 | for hit, spu in zip(es_hits, formatted_results): |
| 622 | 638 | source = hit.get("_source", {}) or {} |
| 639 | + doc_id = hit.get("_id") | |
| 640 | + rerank_debug = None | |
| 641 | + if doc_id is not None: | |
| 642 | + rerank_debug = rerank_debug_by_doc.get(str(doc_id)) | |
| 643 | + | |
| 623 | 644 | raw_score = hit.get("_score") |
| 624 | 645 | try: |
| 625 | 646 | es_score = float(raw_score) if raw_score is not None else 0.0 |
| ... | ... | @@ -634,16 +655,24 @@ class Searcher: |
| 634 | 655 | brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None |
| 635 | 656 | vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None |
| 636 | 657 | |
| 637 | - per_result_debug.append( | |
| 638 | - { | |
| 639 | - "spu_id": spu.spu_id, | |
| 640 | - "es_score": es_score, | |
| 641 | - "es_score_normalized": normalized, | |
| 642 | - "title_multilingual": title_multilingual, | |
| 643 | - "brief_multilingual": brief_multilingual, | |
| 644 | - "vendor_multilingual": vendor_multilingual, | |
| 645 | - } | |
| 646 | - ) | |
| 658 | + debug_entry: Dict[str, Any] = { | |
| 659 | + "spu_id": spu.spu_id, | |
| 660 | + "es_score": es_score, | |
| 661 | + "es_score_normalized": normalized, | |
| 662 | + "title_multilingual": title_multilingual, | |
| 663 | + "brief_multilingual": brief_multilingual, | |
| 664 | + "vendor_multilingual": vendor_multilingual, | |
| 665 | + } | |
| 666 | + | |
| 667 | + # 若存在重排调试信息,则补充 doc 级别的融合分数信息 | |
| 668 | + if rerank_debug: | |
| 669 | + debug_entry["doc_id"] = rerank_debug.get("doc_id") | |
| 670 | + # 与 rerank_client 中字段保持一致,便于前端直接使用 | |
| 671 | + debug_entry["es_score_norm"] = rerank_debug.get("es_score_norm") | |
| 672 | + debug_entry["rerank_score"] = rerank_debug.get("rerank_score") | |
| 673 | + debug_entry["fused_score"] = rerank_debug.get("fused_score") | |
| 674 | + | |
| 675 | + per_result_debug.append(debug_entry) | |
| 647 | 676 | |
| 648 | 677 | # Format facets |
| 649 | 678 | standardized_facets = None | ... | ... |