diff --git a/docs/TODO.txt b/docs/TODO.txt
index 6edc6c3..682c2fd 100644
--- a/docs/TODO.txt
+++ b/docs/TODO.txt
@@ -1,5 +1,89 @@
+
+
+
+
+
+
+翻译的health很慢
+
+
+
+
+translator的设计 :
+
+QueryParser 里面 并不是调用的6006,目前是把6006做了一个provider,然后translate的总体配置又有6006的baseurl,很混乱!!!
+
+config.yaml 里面的 翻译的配置 不是“6006 专用配置”,而是搜索服务的
+6006本来之前是做一个provider。
+结果后面改造成了综合体,但是还没改完,改到一半发现之前的实现跟我的设计或者想法有偏差。
+
+
+需要继续改完!!!!!!!!
+
+
+- `config.yaml` **不是“6006 专用配置”**,而是整个系统的 **统一 services 配置**,由 `config/services_config.py` 读取,**搜索 API 进程和翻译服务进程都会用到它**。
+- 关键决定行为的是这一行:
+
+```yaml
+translation:
+ provider: "llm"
+```
+
+在当前配置下:
+
+- 搜索 API 进程里,`QueryParser` 初始化翻译器时走的是:
+
+```python
+create_translation_provider(...) # provider == "llm"
+```
+
+进而返回的是 `LLMTranslatorProvider`(本进程内调用),**不会走 `base_url`,也不会走 6006 端口**。
+- `base_url: "http://127.0.0.1:6006"` 只在 `provider: "http"` / `"service"` 时被 `HttpTranslationProvider` 使用;在 `provider: "llm"` 时,这个字段对 `QueryParser` 是完全被忽略的。
+
+所以现在的实际情况是:
+
+- **QueryParser 中的翻译是“本进程直连 LLM API”**,所以日志在搜索后端自己的日志文件里。
+- 如果你希望「QueryParser 永远通过 6006 端口的翻译服务」,需要把 provider 改成 HTTP:
+
+```yaml
+translation:
+ provider: "http" # ← 改成 http 或 service
+ cache: ...
+ providers:
+ http:
+ base_url: "http://127.0.0.1:6006"
+ model: "llm" # 或 "qwen-mt-flush",看你想用哪个
+ timeout_sec: 10.0
+ llm:
+ model: "qwen-flash" # 留给翻译服务自身内部使用
+ qwen-mt: ...
+ deepl: ...
+```
+
+
+
+
+
+
+
+
+
+
+
+
+查看翻译的缓存情况
+
+向量的缓存
+
+
+
+
+
+
+
+
AI - 生产 - MySQL
HOST:10.200.16.14 / localhost
端口:3316
diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js
index 0a92ea2..4f45b22 100644
--- a/frontend/static/js/app.js
+++ b/frontend/static/js/app.js
@@ -367,6 +367,18 @@ function displayResults(data) {
? debug.es_score_normalized.toFixed(4)
: (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
+ const esNormRerank = typeof debug.es_score_norm === 'number'
+ ? debug.es_score_norm.toFixed(4)
+ : (debug.es_score_norm == null ? '' : String(debug.es_score_norm));
+
+ const rerankScore = typeof debug.rerank_score === 'number'
+ ? debug.rerank_score.toFixed(4)
+ : (debug.rerank_score == null ? '' : String(debug.rerank_score));
+
+ const fusedScore = typeof debug.fused_score === 'number'
+ ? debug.fused_score.toFixed(4)
+ : (debug.fused_score == null ? '' : String(debug.fused_score));
+
// Build multilingual title info
let titleLines = '';
if (debug.title_multilingual && typeof debug.title_multilingual === 'object') {
@@ -385,6 +397,9 @@ function displayResults(data) {
spu_id: ${escapeHtml(String(spuId || ''))}
ES score: ${esScore}
ES normalized: ${esNorm}
+ ES norm (rerank input): ${esNormRerank}
+ Rerank score: ${rerankScore}
+ Fused score: ${fusedScore}
${titleLines}
查看 ES 原始文档
diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py
index 7de8899..a7d7e7b 100644
--- a/indexer/document_transformer.py
+++ b/indexer/document_transformer.py
@@ -345,7 +345,7 @@ class SPUDocumentTransformer:
text=title_text,
source_lang=primary_lang,
index_languages=index_langs,
- scene="product_title",
+ scene="sku_name",
)
_set_lang_obj("title", title_text, translations)
diff --git a/reranker/DEPLOYMENT_AND_TUNING.md b/reranker/DEPLOYMENT_AND_TUNING.md
index 54b03a2..fc181ed 100644
--- a/reranker/DEPLOYMENT_AND_TUNING.md
+++ b/reranker/DEPLOYMENT_AND_TUNING.md
@@ -144,7 +144,6 @@ curl -sS http://127.0.0.1:6007/health
- `RERANK_VLLM_INFER_BATCH_SIZE`
- `RERANK_VLLM_SORT_BY_DOC_LENGTH`
-- `RERANK_VLLM_LENGTH_SORT_MODE`
## 6. 本轮关键结论(2026-03-11)
diff --git a/reranker/README.md b/reranker/README.md
index 102516e..9d13809 100644
--- a/reranker/README.md
+++ b/reranker/README.md
@@ -158,6 +158,6 @@ uvicorn reranker.server:app --host 0.0.0.0 --port 6007 --log-level info
- 无请求级缓存;输入按字符串去重后推理,再按原始顺序回填分数。
- 空或 null 的 doc 跳过并计为 0。
- **Qwen3-vLLM 分批策略**:`docs` 请求体可为 1000+,服务端会按 `infer_batch_size` 拆分;当 `sort_by_doc_length=true` 时,会先按文档长度排序后分批,减少 padding 开销,最终再按输入顺序回填分数。`length_sort_mode` 支持 `char`(默认,更快)与 `token`(更精确)。
-- 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`、`RERANK_VLLM_LENGTH_SORT_MODE`。
+- 运行时可用环境变量临时覆盖批量参数:`RERANK_VLLM_INFER_BATCH_SIZE`、`RERANK_VLLM_SORT_BY_DOC_LENGTH`。
- **Qwen3-vLLM**:参考 [Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B),需 GPU 与较多显存;与 BGE 相比适合长文本、高吞吐场景(vLLM 前缀缓存)。
- **Qwen3-Transformers**:官方 Transformers Usage 方式,无需 vLLM;适合 CPU 或小显存,可选 `attn_implementation: "flash_attention_2"` 加速。
diff --git a/reranker/backends/qwen3_vllm.py b/reranker/backends/qwen3_vllm.py
index 4b23f8d..d953107 100644
--- a/reranker/backends/qwen3_vllm.py
+++ b/reranker/backends/qwen3_vllm.py
@@ -295,7 +295,6 @@ class Qwen3VLLMRerankerBackend:
"normalize": normalize,
"infer_batch_size": self._infer_batch_size,
"inference_batches": inference_batches,
- "sort_by_doc_length": self._sort_by_doc_length,
- "length_sort_mode": self._length_sort_mode,
+ "sort_by_doc_length": self._sort_by_doc_length
}
return output_scores, meta
diff --git a/search/rerank_client.py b/search/rerank_client.py
index 52431a7..24c3686 100644
--- a/search/rerank_client.py
+++ b/search/rerank_client.py
@@ -103,13 +103,12 @@ def fuse_scores_and_resort(
weight_ai: float = DEFAULT_WEIGHT_AI,
) -> List[Dict[str, Any]]:
"""
- 将 ES 分数与重排分数线性融合,写回每条 hit 的 _score,并按融合分数降序重排。
+ 将 ES 分数与重排分数线性融合(不修改原始 _score),并按融合分数降序重排。
对每条 hit 会写入:
- _original_score: 原始 ES 分数
- _rerank_score: 重排服务返回的分数
- _fused_score: 融合分数
- - _score: 置为融合分数(供后续 ResultFormatter 使用)
Args:
es_hits: ES hits 列表(会被原地修改)
@@ -150,7 +149,6 @@ def fuse_scores_and_resort(
hit["_original_score"] = hit.get("_score")
hit["_rerank_score"] = rerank_score
hit["_fused_score"] = fused
- hit["_score"] = fused
fused_debug.append({
"doc_id": hit.get("_id"),
diff --git a/search/searcher.py b/search/searcher.py
index d06730e..33e57f0 100644
--- a/search/searcher.py
+++ b/search/searcher.py
@@ -540,7 +540,11 @@ class Searcher:
sliced = hits[from_ : from_ + size]
es_response.setdefault("hits", {})["hits"] = sliced
if sliced:
- slice_max = max((h.get("_score") for h in sliced), default=0.0)
+ # 对于启用重排的结果,优先使用 _fused_score 计算 max_score;否则退回原始 _score
+ slice_max = max(
+ (h.get("_fused_score", h.get("_score", 0.0)) for h in sliced),
+ default=0.0,
+ )
try:
es_response["hits"]["max_score"] = float(slice_max)
except (TypeError, ValueError):
@@ -607,6 +611,18 @@ class Searcher:
# max_score 会在启用 AI 搜索时被更新为融合分数的最大值
max_score = es_response.get('hits', {}).get('max_score') or 0.0
+ # 从上下文中取出重排调试信息(若有)
+ rerank_debug_raw = context.get_intermediate_result('rerank_scores', None)
+ rerank_debug_by_doc: Dict[str, Dict[str, Any]] = {}
+ if isinstance(rerank_debug_raw, list):
+ for item in rerank_debug_raw:
+ if not isinstance(item, dict):
+ continue
+ doc_id = item.get("doc_id")
+ if doc_id is None:
+ continue
+ rerank_debug_by_doc[str(doc_id)] = item
+
# Format results using ResultFormatter
formatted_results = ResultFormatter.format_search_results(
es_hits,
@@ -620,6 +636,11 @@ class Searcher:
if debug and es_hits and formatted_results:
for hit, spu in zip(es_hits, formatted_results):
source = hit.get("_source", {}) or {}
+ doc_id = hit.get("_id")
+ rerank_debug = None
+ if doc_id is not None:
+ rerank_debug = rerank_debug_by_doc.get(str(doc_id))
+
raw_score = hit.get("_score")
try:
es_score = float(raw_score) if raw_score is not None else 0.0
@@ -634,16 +655,24 @@ class Searcher:
brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
vendor_multilingual = source.get("vendor") if isinstance(source.get("vendor"), dict) else None
- per_result_debug.append(
- {
- "spu_id": spu.spu_id,
- "es_score": es_score,
- "es_score_normalized": normalized,
- "title_multilingual": title_multilingual,
- "brief_multilingual": brief_multilingual,
- "vendor_multilingual": vendor_multilingual,
- }
- )
+ debug_entry: Dict[str, Any] = {
+ "spu_id": spu.spu_id,
+ "es_score": es_score,
+ "es_score_normalized": normalized,
+ "title_multilingual": title_multilingual,
+ "brief_multilingual": brief_multilingual,
+ "vendor_multilingual": vendor_multilingual,
+ }
+
+ # 若存在重排调试信息,则补充 doc 级别的融合分数信息
+ if rerank_debug:
+ debug_entry["doc_id"] = rerank_debug.get("doc_id")
+ # 与 rerank_client 中字段保持一致,便于前端直接使用
+ debug_entry["es_score_norm"] = rerank_debug.get("es_score_norm")
+ debug_entry["rerank_score"] = rerank_debug.get("rerank_score")
+ debug_entry["fused_score"] = rerank_debug.get("fused_score")
+
+ per_result_debug.append(debug_entry)
# Format facets
standardized_facets = None
--
libgit2 0.21.2