Commit daa2690ba5559d5c749855fe500db9ae2dc3a4f4

Authored by tangwang
1 parent 16d28bf8

漏斗参数调优&呈现优化

config/config.yaml
... ... @@ -421,19 +421,31 @@ services:
421 421 normalize_embeddings: true
422 422 rerank:
423 423 provider: "http"
424   - base_url: "http://127.0.0.1:6007"
425 424 providers:
426 425 http:
427   - base_url: "http://127.0.0.1:6007"
428   - service_url: "http://127.0.0.1:6007/rerank"
429   - service_urls:
430   - default: "http://127.0.0.1:6007/rerank"
431   - fine: "http://127.0.0.1:6009/rerank"
  426 + instances:
  427 + default:
  428 + base_url: "http://127.0.0.1:6007"
  429 + service_url: "http://127.0.0.1:6007/rerank"
  430 + fine:
  431 + base_url: "http://127.0.0.1:6009"
  432 + service_url: "http://127.0.0.1:6009/rerank"
432 433 request:
433 434 max_docs: 1000
434 435 normalize: true
435   - # 服务内后端(reranker 进程启动时读取)
436   - backend: "qwen3_vllm_score" # bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank
  436 + default_instance: "default"
  437 + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
  438 + instances:
  439 + default:
  440 + host: "0.0.0.0"
  441 + port: 6007
  442 + backend: "qwen3_vllm_score"
  443 + runtime_dir: "./.runtime/reranker/default"
  444 + fine:
  445 + host: "0.0.0.0"
  446 + port: 6009
  447 + backend: "bge"
  448 + runtime_dir: "./.runtime/reranker/fine"
437 449 backends:
438 450 bge:
439 451 model_name: "BAAI/bge-reranker-v2-m3"
... ...
config/env_config.py
... ... @@ -10,6 +10,7 @@ from __future__ import annotations
10 10 from typing import Any, Dict
11 11  
12 12 from config.loader import get_app_config
  13 +from config.services_config import get_rerank_service_url
13 14  
14 15  
15 16 def _app():
... ... @@ -92,10 +93,7 @@ INDEXER_BASE_URL = (
92 93 )
93 94 EMBEDDING_TEXT_SERVICE_URL = _app().services.embedding.get_provider_config().get("text_base_url")
94 95 EMBEDDING_IMAGE_SERVICE_URL = _app().services.embedding.get_provider_config().get("image_base_url")
95   -RERANKER_SERVICE_URL = (
96   - _app().services.rerank.get_provider_config().get("service_url")
97   - or _app().services.rerank.get_provider_config().get("base_url")
98   -)
  96 +RERANKER_SERVICE_URL = get_rerank_service_url()
99 97  
100 98  
101 99 def get_es_config() -> Dict[str, Any]:
... ...
config/loader.py
... ... @@ -43,6 +43,7 @@ from config.schema import (
43 43 RerankConfig,
44 44 RerankFusionConfig,
45 45 RerankServiceConfig,
  46 + RerankServiceInstanceConfig,
46 47 RuntimeConfig,
47 48 SearchConfig,
48 49 SecretsConfig,
... ... @@ -615,13 +616,61 @@ class AppConfigLoader:
615 616 rerank_providers = dict(rerank_raw.get("providers") or {})
616 617 if rerank_provider not in rerank_providers:
617 618 raise ConfigurationError(f"services.rerank.providers.{rerank_provider} must be configured")
618   - rerank_backend = str(rerank_raw.get("backend") or "").strip().lower()
619 619 rerank_backends = {
620 620 str(key).strip().lower(): dict(value)
621 621 for key, value in dict(rerank_raw.get("backends") or {}).items()
622 622 }
623   - if rerank_backend not in rerank_backends:
624   - raise ConfigurationError(f"services.rerank.backends.{rerank_backend} must be configured")
  623 + default_instance = str(rerank_raw.get("default_instance") or "default").strip() or "default"
  624 + raw_instances = rerank_raw.get("instances") if isinstance(rerank_raw.get("instances"), dict) else {}
  625 + if not raw_instances:
  626 + legacy_backend = str(rerank_raw.get("backend") or "").strip().lower()
  627 + if legacy_backend not in rerank_backends:
  628 + raise ConfigurationError(f"services.rerank.backends.{legacy_backend} must be configured")
  629 + provider_cfg = dict(rerank_providers.get(rerank_provider) or {})
  630 + raw_instances = {
  631 + default_instance: {
  632 + "host": "0.0.0.0",
  633 + "port": 6007,
  634 + "backend": legacy_backend,
  635 + "base_url": provider_cfg.get("base_url"),
  636 + "service_url": provider_cfg.get("service_url"),
  637 + }
  638 + }
  639 + rerank_instances = {}
  640 + for instance_name, instance_raw in raw_instances.items():
  641 + if not isinstance(instance_raw, dict):
  642 + raise ConfigurationError(f"services.rerank.instances.{instance_name} must be a mapping")
  643 + normalized_instance_name = str(instance_name).strip()
  644 + backend_name = str(instance_raw.get("backend") or "").strip().lower()
  645 + if backend_name not in rerank_backends:
  646 + raise ConfigurationError(
  647 + f"services.rerank.instances.{normalized_instance_name}.backend must reference configured services.rerank.backends"
  648 + )
  649 + port = int(instance_raw.get("port", 6007))
  650 + rerank_instances[normalized_instance_name] = RerankServiceInstanceConfig(
  651 + host=str(instance_raw.get("host") or "0.0.0.0"),
  652 + port=port,
  653 + backend=backend_name,
  654 + runtime_dir=(
  655 + str(v)
  656 + if (v := instance_raw.get("runtime_dir")) not in (None, "")
  657 + else None
  658 + ),
  659 + base_url=(
  660 + str(v).rstrip("/")
  661 + if (v := instance_raw.get("base_url")) not in (None, "")
  662 + else None
  663 + ),
  664 + service_url=(
  665 + str(v).rstrip("/")
  666 + if (v := instance_raw.get("service_url")) not in (None, "")
  667 + else None
  668 + ),
  669 + )
  670 + if default_instance not in rerank_instances:
  671 + raise ConfigurationError(
  672 + f"services.rerank.default_instance={default_instance!r} must exist in services.rerank.instances"
  673 + )
625 674 rerank_request = dict(rerank_raw.get("request") or {})
626 675 rerank_request.setdefault("max_docs", 1000)
627 676 rerank_request.setdefault("normalize", True)
... ... @@ -629,7 +678,8 @@ class AppConfigLoader:
629 678 rerank_config = RerankServiceConfig(
630 679 provider=rerank_provider,
631 680 providers=rerank_providers,
632   - backend=rerank_backend,
  681 + default_instance=default_instance,
  682 + instances=rerank_instances,
633 683 backends=rerank_backends,
634 684 request=rerank_request,
635 685 )
... ... @@ -754,8 +804,19 @@ class AppConfigLoader:
754 804 errors.append("services.embedding.providers.<provider>.image_base_url is required")
755 805  
756 806 rerank_provider_cfg = app_config.services.rerank.get_provider_config()
757   - if not rerank_provider_cfg.get("service_url") and not rerank_provider_cfg.get("base_url"):
758   - errors.append("services.rerank.providers.<provider>.service_url or base_url is required")
  807 + provider_instances = rerank_provider_cfg.get("instances")
  808 + if not isinstance(provider_instances, dict):
  809 + provider_instances = {}
  810 + for instance_name in app_config.services.rerank.instances:
  811 + instance_cfg = app_config.services.rerank.get_instance(instance_name)
  812 + provider_instance_cfg = provider_instances.get(instance_name) if isinstance(provider_instances, dict) else None
  813 + has_instance_url = False
  814 + if isinstance(provider_instance_cfg, dict):
  815 + has_instance_url = bool(provider_instance_cfg.get("service_url") or provider_instance_cfg.get("base_url"))
  816 + if not has_instance_url and not instance_cfg.service_url and not instance_cfg.base_url:
  817 + errors.append(
  818 + f"services.rerank instance {instance_name!r} must define service_url/base_url either under providers.<provider>.instances or services.rerank.instances"
  819 + )
759 820  
760 821 if errors:
761 822 raise ConfigurationError("Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors))
... ...
config/schema.py
... ... @@ -236,20 +236,41 @@ class EmbeddingServiceConfig:
236 236  
237 237  
238 238 @dataclass(frozen=True)
  239 +class RerankServiceInstanceConfig:
  240 + """One named reranker service instance."""
  241 +
  242 + host: str = "0.0.0.0"
  243 + port: int = 6007
  244 + backend: str = "qwen3_vllm_score"
  245 + runtime_dir: Optional[str] = None
  246 + base_url: Optional[str] = None
  247 + service_url: Optional[str] = None
  248 +
  249 +
  250 +@dataclass(frozen=True)
239 251 class RerankServiceConfig:
240 252 """Reranker service configuration."""
241 253  
242 254 provider: str
243 255 providers: Dict[str, Any]
244   - backend: str
  256 + default_instance: str
  257 + instances: Dict[str, RerankServiceInstanceConfig]
245 258 backends: Dict[str, Dict[str, Any]]
246 259 request: Dict[str, Any]
247 260  
248 261 def get_provider_config(self) -> Dict[str, Any]:
249 262 return dict(self.providers.get(self.provider, {}) or {})
250 263  
251   - def get_backend_config(self) -> Dict[str, Any]:
252   - return dict(self.backends.get(self.backend, {}) or {})
  264 + def get_instance(self, name: Optional[str] = None) -> RerankServiceInstanceConfig:
  265 + instance_name = str(name or self.default_instance).strip() or self.default_instance
  266 + instance = self.instances.get(instance_name)
  267 + if instance is None:
  268 + raise KeyError(f"Unknown rerank service instance: {instance_name!r}")
  269 + return instance
  270 +
  271 + def get_backend_config(self, name: Optional[str] = None) -> Dict[str, Any]:
  272 + instance = self.get_instance(name)
  273 + return dict(self.backends.get(instance.backend, {}) or {})
253 274  
254 275  
255 276 @dataclass(frozen=True)
... ...
config/services_config.py
... ... @@ -11,7 +11,12 @@ import os
11 11 from typing import Any, Dict, Tuple
12 12  
13 13 from config.loader import get_app_config
14   -from config.schema import EmbeddingServiceConfig, RerankServiceConfig, TranslationServiceConfig
  14 +from config.schema import (
  15 + EmbeddingServiceConfig,
  16 + RerankServiceConfig,
  17 + RerankServiceInstanceConfig,
  18 + TranslationServiceConfig,
  19 +)
15 20  
16 21  
17 22 def get_translation_config() -> Dict[str, Any]:
... ... @@ -26,6 +31,16 @@ def get_rerank_config() -&gt; RerankServiceConfig:
26 31 return get_app_config().services.rerank
27 32  
28 33  
  34 +def get_rerank_instance_config(profile: str | None = None) -> RerankServiceInstanceConfig:
  35 + cfg = get_app_config().services.rerank
  36 + instance_name = str(
  37 + profile
  38 + or os.getenv("RERANK_INSTANCE")
  39 + or cfg.default_instance
  40 + ).strip() or cfg.default_instance
  41 + return cfg.get_instance(instance_name)
  42 +
  43 +
29 44 def get_translation_base_url() -> str:
30 45 return get_app_config().services.translation.endpoint
31 46  
... ... @@ -60,31 +75,56 @@ def get_embedding_image_backend_config() -&gt; Tuple[str, Dict[str, Any]]:
60 75 return cfg.image_backend, cfg.get_image_backend_config()
61 76  
62 77  
63   -def get_rerank_backend_config() -> Tuple[str, Dict[str, Any]]:
  78 +def get_rerank_backend_config(profile: str | None = None) -> Tuple[str, Dict[str, Any]]:
64 79 cfg = get_app_config().services.rerank
65   - backend = str(os.getenv("RERANK_BACKEND") or cfg.backend).strip()
66   - if backend != cfg.backend:
  80 + instance = get_rerank_instance_config(profile)
  81 + backend = str(os.getenv("RERANK_BACKEND") or instance.backend).strip()
  82 + if backend != instance.backend:
67 83 backend_cfg = cfg.backends.get(backend)
68 84 if backend_cfg is None:
69 85 raise ValueError(f"Unknown rerank backend override from RERANK_BACKEND: {backend!r}")
70 86 return backend, dict(backend_cfg)
71   - return cfg.backend, cfg.get_backend_config()
  87 + return instance.backend, cfg.get_backend_config(profile)
72 88  
73 89  
74 90 def get_rerank_base_url(profile: str | None = None) -> str:
75   - provider_cfg = get_app_config().services.rerank.get_provider_config()
  91 + cfg = get_app_config().services.rerank
  92 + instance = get_rerank_instance_config(profile)
  93 + provider_cfg = cfg.get_provider_config()
  94 + profile_name = str(profile or os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  95 +
76 96 base = None
77   - profile_name = str(profile).strip() if profile else ""
78   - if profile_name:
79   - service_urls = provider_cfg.get("service_urls")
80   - if isinstance(service_urls, dict):
81   - base = service_urls.get(profile_name)
  97 + provider_instances = provider_cfg.get("instances")
  98 + if isinstance(provider_instances, dict):
  99 + instance_provider_cfg = provider_instances.get(profile_name)
  100 + if isinstance(instance_provider_cfg, dict):
  101 + base = instance_provider_cfg.get("base_url") or instance_provider_cfg.get("service_url")
  102 + if not base:
  103 + base = instance.base_url or instance.service_url
82 104 if not base:
83 105 base = provider_cfg.get("service_url") or provider_cfg.get("base_url")
84 106 if not base:
85 107 raise ValueError("Rerank service URL is not configured")
86   - return str(base).rstrip("/")
  108 + base = str(base).rstrip("/")
  109 + if base.endswith("/rerank"):
  110 + base = base[: -len("/rerank")]
  111 + return base
87 112  
88 113  
89 114 def get_rerank_service_url(profile: str | None = None) -> str:
90   - return get_rerank_base_url(profile=profile)
  115 + cfg = get_app_config().services.rerank
  116 + instance = get_rerank_instance_config(profile)
  117 + provider_cfg = cfg.get_provider_config()
  118 + profile_name = str(profile or os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  119 +
  120 + service_url = None
  121 + provider_instances = provider_cfg.get("instances")
  122 + if isinstance(provider_instances, dict):
  123 + instance_provider_cfg = provider_instances.get(profile_name)
  124 + if isinstance(instance_provider_cfg, dict):
  125 + service_url = instance_provider_cfg.get("service_url")
  126 + if not service_url:
  127 + service_url = instance.service_url
  128 + if not service_url:
  129 + service_url = f"{get_rerank_base_url(profile=profile)}/rerank"
  130 + return str(service_url).rstrip("/")
... ...
docs/issue-2026-03-27-添加粗排精排-第2轮-done-0327.txt 0 → 100644
... ... @@ -0,0 +1,38 @@
  1 +这是上一轮检索效果优化的需求:
  2 +参考
  3 +searcher.py
  4 +rerank_client.py
  5 +schema.py
  6 +es_query_builder.py
  7 +config.yaml
  8 +相关性检索优化说明.md
  9 +
  10 +在ES返回到rerank期间增加一轮粗排+一轮精排。
  11 +1. ES召回,600
  12 +2. 粗排:600->240。配置文件增加粗排相关配置,包括输入条数(配置为700,ES拉取的条数改为粗排输入条数),然后增加粗排的融合公式配置,参考现有的reranker融合公式即可、只是去掉其中的重排模型项。
  13 +3. 现在的sku选择、为reranker生成title后缀这一套逻辑,是放在粗排后,因为精排也是一个reranker模型(只不过是一个轻量级的,bge-reranker),需要用这个title后缀。
  14 +4. 精排:240-80,使用bge-reranker,但是,因为reranker只能选一个backend,考虑如何重构。现在,精排也是一个独立的进程、独立提供端口,服务。但是,因为跟重排逻辑是一致的(即使有部分不一致也应该分离不一致的点进行配置化),所以共用代码,只是根据需要启动两个实例,避免代码冗余。
  15 +5. 重排:80,也是用当前的重排代码,调用单独的实例(即现在使用的实例),返回后,经过融合公式,到分页,也参考现在的融合公式,但是,加入一项精排模型打分。
  16 +测试时,使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试
  17 +
  18 +你已经完成了一般修改,已把三段排序链路接上了,主改动在 search/searcher.py、search/rerank_client.py、config/schema.py、config/loader.py、config/services_config.py 和 config/config.yaml。
  19 +
  20 +现在链路是:
  21 +ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到 coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker,并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url,这样 fine/final 可以共用同一套服务代码,只起不同实例。
  22 +
  23 +你是用了./.venv/bin/pytest -q tests/test_rerank_client.py tests/test_search_rerank_window.py 进行验证,结果是 18 passed。也按要求尝试了 ./scripts/service_ctl.sh restart reranker backend 做联调,但当前环境里只有 6007 的现有 reranker,配置里的 fine 端口 6009 还没对应实例,而且服务重启后进程状态不太稳定,没能完成完整线上链路验证。当前最关键的落地前置是把轻量 bge 实例补起来,再用同样脚本重启联调。
  24 +
  25 +因此,需要回顾这个漏斗的代码,进行进一步的优化:
  26 +1. Fine Rank环节的信息都是N/A。fine 实例也需要在config.yaml中体现,并且需要被server_ctl.sh管理,参考现在是怎么管理reranker的。fine和reranker分别调用哪个模型(两者代码功能,但是只是启动不同的实例),请你深度思考后决定。这里是需要参考当前代码、根据实际需要(原来只有一个reranker、现在多了一个精排,也要调用一个重排模型,因此带来的必要的部分重构。
  27 +2. Ranking Funnel、Fusion Factors、Signal Breakdown
  28 +这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。
  29 +ES 召回的环节,展示Matched Queries各项打分、ES的总分、norm后打分、排序位置,等等关键信息
  30 +粗排:粗排融合公式的各项输入、重要中间结果和参数、最后得分,排序位置以及上升/下降了多少。等等关键信息。
  31 +精排:同样例举关键的输入、中间过程、输出、排序和位置变化等。
  32 +reranker:类似
  33 +
  34 +因为涉及的环节较多,非常要注意的一个点是:不要每次修改都在原来的基础上,为实现目标而打补丁,应该观察一下所涉及的代码现在是怎么做的,务必注意如何适当的清理掉现有逻辑,该如何对其进行修改,来达到目的,以达到代码的精简,避免冗余、分叉。
  35 +特别是1关于将现在的单独的reranker服务,在不增加代码复杂度的情况下,如何可以通过配置启动多个实例、各自可以配置选择的模型等相关参数、分别服务于精排和重排,是设计的重中之重。这里需要思考配置如何拆分,需要多读相关代码、思考最佳实践。
  36 +
  37 +
  38 +
... ...
docs/issue-2026-03-27-添加粗排精排-第一轮-done-0327.txt 0 → 100644
... ... @@ -0,0 +1,136 @@
  1 +我:
  2 +进行一轮检索效果优化:
  3 +参考
  4 +[searcher.py](search/searcher.py)
  5 +[rerank_client.py](search/rerank_client.py)
  6 +[schema.py](config/schema.py)
  7 +[es_query_builder.py](search/es_query_builder.py)
  8 +[config.yaml](config/config.yaml)
  9 +[相关性检索优化说明.md](docs/相关性检索优化说明.md)
  10 +
  11 +在ES返回到rerank期间增加一轮粗排+一轮精排。
  12 +1. ES召回,600
  13 +2. 粗排:600->240。配置文件增加粗排相关配置,包括输入条数(配置为700,ES拉取的条数改为粗排输入条数),然后增加粗排的融合公式配置,参考现有的reranker融合公式即可、只是去掉其中的重排模型项。
  14 +3. 现在的sku选择、为reranker生成title后缀这一套逻辑,是放在粗排后,因为精排也是一个reranker模型(只不过是一个轻量级的,bge-reranker),需要用这个title后缀。
  15 +4. 精排:240-80,使用bge-reranker,但是,因为reranker只能选一个backend,考虑如何重构。现在,精排也是一个独立的进程、独立提供端口,服务。但是,因为跟重排逻辑是一致的(即使有部分不一致也应该分离不一致的点进行配置化),所以共用代码,只是根据需要启动两个实例,避免代码冗余。
  16 +5. 重排:80,也是用当前的重排代码,调用单独的实例(即现在使用的实例),返回后,经过融合公式,到分页,也参考现在的融合公式,但是,加入一项精排模型打分。
  17 +测试时,使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试
  18 +
  19 +
  20 +
  21 +
  22 +
  23 +codex:
  24 +前端这轮我把调试展示重构成了“两层漏斗”:
  25 +
  26 +1. 整体漏斗
  27 +在 [frontend/static/js/app.js](/data/saas-search/frontend/static/js/app.js) 里把原来散着的 `Query Analysis / Rerank / Stage Timings / ES Query` 重新组织成了:
  28 +- `Query Context`
  29 +- `Ranking Funnel`
  30 +- `Timing Breakdown`
  31 +- `Raw Payloads`
  32 +
  33 +其中 `Ranking Funnel` 现在按阶段卡片展示:
  34 +- `ES Recall`
  35 +- `Coarse Rank`
  36 +- `Fine Rank`
  37 +- `Final Rerank`
  38 +- `Page Return`
  39 +
  40 +每张卡都尽量只放调参最关键的信息:输入/输出条数、模板、service URL、top_n、fusion/meta。这样你看漏斗收缩、看每段服务表现、看融合参数会更直接。
  41 +
  42 +2. 单条结果漏斗
  43 +在结果卡片右侧的 debug 区,我改成了真正的单条漏斗卡片,按“分数结果”和“公式分解”分开看:
  44 +- 顶部先看 `Initial Rank / Final Rank / Rank Delta`
  45 +- 再看阶段分数:`ES / ES Norm / Coarse / Fine / Rerank / Fused`
  46 +- 再看融合因子:`coarse_text_factor / coarse_knn_factor / text_factor / knn_factor / fine_factor / rerank_factor`
  47 +- 再看信号拆解:`text_source / text_translation / text_primary / text_support / text_knn / image_knn`
  48 +
  49 +同时保留了可展开的:
  50 +- `Rerank Input`
  51 +- `Selected SKU`
  52 +- `Matched Queries`
  53 +- `Multilingual Fields`
  54 +
  55 +为了支撑前端,我顺手在 [search/searcher.py](/data/saas-search/search/searcher.py) 里补了单条 `coarse_score / coarse_text_factor / coarse_knn_factor` 到 `per_result`,这样单条卡片现在能把粗排也完整展示出来。
  56 +
  57 +样式上在 [frontend/static/css/style.css](/data/saas-search/frontend/static/css/style.css) 加了阶段卡片、分数 pill、时延条形图和移动端适配。JS 语法我用 `node --check frontend/static/js/app.js` 过了,后端补充字段也用 `./.venv/bin/python -m py_compile search/searcher.py` 过了。
  58 +
  59 +如果你愿意,我下一步可以继续把这个视图再往“调参工作台”方向推一层,比如再加:
  60 +- 漏斗阶段间的 drop rate
  61 +- 每条结果的 rank jump 高亮
  62 +- 批量对比 top N 结果里某个因子分布
  63 +- fusion 参数和结果联动的可视化摘要
  64 +
  65 +
  66 +
  67 +
  68 +
  69 +
  70 +
  71 +
  72 +我:
  73 +1. Fine Rank环节的信息都是N/A,是没有配置吗。fine rank是使用bge-reranker,复用当前reranker模型的代码,但是需要单独起一个服务、单独加载一个模型。
  74 +2. Ranking Funnel、Fusion Factors、Signal Breakdown
  75 +这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。
  76 +ES 召回的环节,展示Matched Queries各项打分、ES的总分、norm后打分、排序位置,等等关键信息
  77 +粗排:粗排融合公式的各项输入、重要中间结果和参数、最后得分,排序位置以及上升/下降了多少。等等关键信息。
  78 +精排:同样例举关键的输入、中间过程、输出、排序和位置变化等。
  79 +reranker:类似
  80 +
  81 +因为涉及的环节较多,非常要注意的一个点是:不要每次修改都在原来的基础上,为实现目标而打补丁,应该观察一下所涉及的代码现在是怎么做的,务必注意如何适当的清理掉现有逻辑,该如何对其进行修改,来达到目的,以达到代码的精简,避免冗余、分叉。
  82 +
  83 +
  84 +
  85 +
  86 +
  87 +
  88 +codex:
  89 +`Fine Rank` 之前显示 `N/A`,根因不是前端单纯没写,而是后端原来只有“精排成功拿到分数”时才会填那块 debug;一旦 `fine` 服务没起、没配 `service_url`、调用失败,前端只能看到空值。现在我把这层补齐了:
  90 +
  91 +- 顶层 `debug_info.fine_rank` 现在会始终带 `status / service_profile / service_url / input_window / output_window / query_template / doc_template`
  92 +- 失败时会带 `error`
  93 +- 成功时再补 `docs / top_n / meta`
  94 +- 所以你现在能区分:
  95 + - `disabled`
  96 + - `pending`
  97 + - `ok`
  98 + - `failed_no_scores`
  99 + - `failed`
  100 +
  101 +也就是说,`fine rank` 本身仍然需要你按设计单独起一个 `bge-reranker` 实例,这一层代码已经按“独立服务 + 共用 reranker 代码”的方式接好了,但服务本身还是要单独启动。
  102 +
  103 +这轮我也把调试展示做了结构化重构,不再继续在旧视图上打补丁。现在是按阶段组织信息:
  104 +
  105 +- 整体漏斗:
  106 + - `ES Recall`
  107 + - `Coarse Rank`
  108 + - `Fine Rank`
  109 + - `Final Rerank`
  110 + - `Page Return`
  111 +- 单条结果:
  112 + - 用统一的 `ranking_funnel` 结构来表达每个阶段的
  113 + - `rank`
  114 + - `delta`
  115 + - `score`
  116 + - 关键中间量
  117 + - 关键 factor
  118 + - 输入 payload
  119 +
  120 +具体改动在:
  121 +- [search/searcher.py](/data/saas-search/search/searcher.py)
  122 +- [frontend/static/js/app.js](/data/saas-search/frontend/static/js/app.js)
  123 +- [frontend/static/css/style.css](/data/saas-search/frontend/static/css/style.css)
  124 +
  125 +单条结果现在按 stage card 展示,而不是把 `Ranking Funnel / Fusion Factors / Signal Breakdown` 拆成三块平铺,这样你可以顺着 ES -> 粗排 -> 精排 -> rerank 看每段的输入、过程、输出和位次变化,更适合调融合公式。
  126 +
  127 +检查我也跑过了:
  128 +- `node --check frontend/static/js/app.js`
  129 +- `./.venv/bin/python -m py_compile search/searcher.py`
  130 +- `./.venv/bin/pytest -q tests/test_search_rerank_window.py`
  131 +
  132 +都通过了。
  133 +
  134 +下一步如果你要,我建议直接继续做两件事中的一个:
  135 +1. 把 `fine` 实例真正拉起来,然后我再把前端里 `status=failed/pending` 的场景跑成真实数据。
  136 +2. 在当前漏斗视图上继续加“阶段 rank 分布 / delta 高亮 / topN 因子对比”,把它做成更像调参工作台。
0 137 \ No newline at end of file
... ...
frontend/static/js/app.js
... ... @@ -498,6 +498,12 @@ function renderJsonDetails(title, payload, open = false) {
498 498 function buildProductDebugHtml({ debug, result, spuId, tenantId }) {
499 499 const resultJson = customStringify(result);
500 500 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
  501 + const funnel = debug.ranking_funnel || {};
  502 + const esStage = funnel.es_recall || {};
  503 + const coarseStage = funnel.coarse_rank || {};
  504 + const fineStage = funnel.fine_rank || {};
  505 + const rerankStage = funnel.rerank || {};
  506 + const finalPageStage = funnel.final_page || {};
501 507  
502 508 const rankSummary = renderMetricList([
503 509 { label: 'Initial Rank', value: debug.initial_rank ?? 'N/A' },
... ... @@ -507,33 +513,57 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) {
507 513 ]);
508 514  
509 515 const stageScores = renderScorePills([
510   - { label: 'ES', value: formatDebugNumber(debug.es_score), tone: 'tone-es' },
511   - { label: 'ES Norm', value: formatDebugNumber(debug.es_score_normalized), tone: 'tone-neutral' },
512   - { label: 'Coarse', value: formatDebugNumber(debug.coarse_score), tone: 'tone-coarse' },
513   - { label: 'Fine', value: formatDebugNumber(debug.fine_score), tone: 'tone-fine' },
514   - { label: 'Rerank', value: formatDebugNumber(debug.rerank_score), tone: 'tone-rerank' },
515   - { label: 'Fused', value: formatDebugNumber(debug.fused_score), tone: 'tone-final' },
  516 + { label: 'ES', value: formatDebugNumber(esStage.score ?? debug.es_score), tone: 'tone-es' },
  517 + { label: 'ES Norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized), tone: 'tone-neutral' },
  518 + { label: 'Coarse', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score), tone: 'tone-coarse' },
  519 + { label: 'Fine', value: formatDebugNumber(fineStage.score ?? debug.fine_score), tone: 'tone-fine' },
  520 + { label: 'Rerank', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score), tone: 'tone-rerank' },
  521 + { label: 'Fused', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score), tone: 'tone-final' },
516 522 ]);
517 523  
518   - const factorMetrics = renderMetricList([
519   - { label: 'coarse_text_factor', value: formatDebugNumber(debug.coarse_text_factor) },
520   - { label: 'coarse_knn_factor', value: formatDebugNumber(debug.coarse_knn_factor) },
521   - { label: 'text_factor', value: formatDebugNumber(debug.text_factor) },
522   - { label: 'knn_factor', value: formatDebugNumber(debug.knn_factor) },
523   - { label: 'fine_factor', value: formatDebugNumber(debug.fine_factor) },
524   - { label: 'rerank_factor', value: formatDebugNumber(debug.rerank_factor) },
525   - ]);
526   -
527   - const signalMetrics = renderMetricList([
528   - { label: 'text_score', value: formatDebugNumber(debug.text_score) },
529   - { label: 'text_source', value: formatDebugNumber(debug.text_source_score) },
530   - { label: 'text_translation', value: formatDebugNumber(debug.text_translation_score) },
531   - { label: 'text_primary', value: formatDebugNumber(debug.text_primary_score) },
532   - { label: 'text_support', value: formatDebugNumber(debug.text_support_score) },
533   - { label: 'knn_score', value: formatDebugNumber(debug.knn_score) },
534   - { label: 'text_knn', value: formatDebugNumber(debug.text_knn_score) },
535   - { label: 'image_knn', value: formatDebugNumber(debug.image_knn_score) },
536   - ]);
  524 + const stageGrid = `
  525 + <div class="debug-stage-grid">
  526 + ${buildStageCard('ES Recall', 'Matched queries and ES raw score', [
  527 + { label: 'rank', value: esStage.rank ?? debug.initial_rank ?? 'N/A' },
  528 + { label: 'es_score', value: formatDebugNumber(esStage.score ?? debug.es_score) },
  529 + { label: 'es_norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized) },
  530 + ], renderJsonDetails('Matched Queries', esStage.matched_queries ?? debug.matched_queries, false))}
  531 + ${buildStageCard('Coarse Rank', 'Text + vector fusion', [
  532 + { label: 'rank', value: coarseStage.rank ?? 'N/A' },
  533 + { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' },
  534 + { label: 'coarse_score', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score) },
  535 + { label: 'text_score', value: formatDebugNumber(coarseStage.text_score ?? debug.text_score) },
  536 + { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score ?? debug.text_source_score) },
  537 + { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score ?? debug.text_translation_score) },
  538 + { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score ?? debug.text_primary_score) },
  539 + { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score ?? debug.text_support_score) },
  540 + { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score ?? debug.knn_score) },
  541 + { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score ?? debug.text_knn_score) },
  542 + { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score ?? debug.image_knn_score) },
  543 + { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor ?? debug.coarse_text_factor) },
  544 + { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor ?? debug.coarse_knn_factor) },
  545 + ], renderJsonDetails('Coarse Signals', coarseStage.signals, false))}
  546 + ${buildStageCard('Fine Rank', 'Lightweight reranker output', [
  547 + { label: 'rank', value: fineStage.rank ?? 'N/A' },
  548 + { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' },
  549 + { label: 'fine_score', value: formatDebugNumber(fineStage.score ?? debug.fine_score) },
  550 + ], renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, false))}
  551 + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
  552 + { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' },
  553 + { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' },
  554 + { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) },
  555 + { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) },
  556 + { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) },
  557 + { label: 'text_source', value: formatDebugNumber(rerankStage.signals?.text_source_score ?? debug.text_source_score) },
  558 + { label: 'text_translation', value: formatDebugNumber(rerankStage.signals?.text_translation_score ?? debug.text_translation_score) },
  559 + { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) },
  560 + { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) },
  561 + { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) },
  562 + { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) },
  563 + { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) },
  564 + ], renderJsonDetails('Rerank Signals', rerankStage.signals, false))}
  565 + </div>
  566 + `;
537 567  
538 568 const titlePayload = {};
539 569 if (debug.title_multilingual) titlePayload.title = debug.title_multilingual;
... ... @@ -545,13 +575,8 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) {
545 575 <div class="product-debug-title">Ranking Funnel</div>
546 576 ${rankSummary}
547 577 ${stageScores}
548   - <div class="product-debug-subtitle">Fusion Factors</div>
549   - ${factorMetrics}
550   - <div class="product-debug-subtitle">Signal Breakdown</div>
551   - ${signalMetrics}
552   - ${renderJsonDetails('Rerank Input', debug.rerank_input, true)}
  578 + ${stageGrid}
553 579 ${renderJsonDetails('Selected SKU', debug.style_intent_sku, true)}
554   - ${renderJsonDetails('Matched Queries', debug.matched_queries, false)}
555 580 ${renderJsonDetails('Multilingual Fields', titlePayload, false)}
556 581 <div class="product-debug-actions">
557 582 <button type="button" class="product-debug-inline-result-btn"
... ... @@ -1019,9 +1044,10 @@ function buildGlobalFunnelHtml(data, debugInfo) {
1019 1044 const featureFlags = debugInfo.feature_flags || {};
1020 1045 const esResponse = debugInfo.es_response || {};
1021 1046 const esQueryContext = debugInfo.es_query_context || {};
1022   - const coarseInfo = debugInfo.coarse_rank || {};
1023   - const fineInfo = debugInfo.fine_rank || {};
1024   - const rerankInfo = debugInfo.rerank || {};
  1047 + const rankingFunnel = debugInfo.ranking_funnel || {};
  1048 + const coarseInfo = rankingFunnel.coarse_rank || debugInfo.coarse_rank || {};
  1049 + const fineInfo = rankingFunnel.fine_rank || debugInfo.fine_rank || {};
  1050 + const rerankInfo = rankingFunnel.rerank || debugInfo.rerank || {};
1025 1051 const translations = queryAnalysis.translations || {};
1026 1052  
1027 1053 const summaryHtml = `
... ... @@ -1060,14 +1086,20 @@ function buildGlobalFunnelHtml(data, debugInfo) {
1060 1086 ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')}
1061 1087 ${buildStageCard('Fine Rank', 'Lightweight reranker', [
1062 1088 { label: 'service_url', value: fineInfo.service_url || 'N/A' },
1063   - { label: 'docs', value: fineInfo.docs ?? fineInfo.top_n ?? 'N/A' },
  1089 + { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' },
  1090 + { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' },
1064 1091 { label: 'top_n', value: fineInfo.top_n ?? 'N/A' },
  1092 + { label: 'backend', value: fineInfo.backend || 'N/A' },
  1093 + { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' },
1065 1094 { label: 'query_template', value: fineInfo.query_template || 'N/A' },
1066 1095 ], fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')}
1067 1096 ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
1068 1097 { label: 'service_url', value: rerankInfo.service_url || 'N/A' },
1069   - { label: 'docs', value: rerankInfo.docs ?? 'N/A' },
  1098 + { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' },
  1099 + { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' },
1070 1100 { label: 'top_n', value: rerankInfo.top_n ?? 'N/A' },
  1101 + { label: 'backend', value: rerankInfo.backend || 'N/A' },
  1102 + { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' },
1071 1103 { label: 'query_template', value: rerankInfo.query_template || 'N/A' },
1072 1104 ], `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)}
1073 1105 ${buildStageCard('Page Return', 'Final slice returned to UI', [
... ...
reranker/README.md
... ... @@ -71,7 +71,7 @@ Reranker 服务提供统一的 `/rerank` API,支持可插拔后端(BGE、Jin
71 71 - `qwen3_transformers_packed` -> `.venv-reranker-transformers-packed`
72 72 - `qwen3_gguf` -> `.venv-reranker-gguf`
73 73 - `qwen3_gguf_06b` -> `.venv-reranker-gguf-06b`
74   -- `bge` -> `.venv-reranker-bge`
  74 +- `bge` -> `.venv-reranker`
75 75 - `dashscope_rerank` -> `.venv-reranker-dashscope`
76 76  
77 77  
... ...
reranker/config.py
... ... @@ -2,19 +2,29 @@
2 2  
3 3 from __future__ import annotations
4 4  
  5 +import os
  6 +
5 7 from config.loader import get_app_config
6 8  
7 9  
8 10 class RerankerConfig(object):
9 11 def __init__(self) -> None:
10 12 app_config = get_app_config()
11   - runtime = app_config.runtime
12 13 service = app_config.services.rerank
13   - backend = service.get_backend_config()
  14 + instance_name = str(os.getenv("RERANK_INSTANCE") or service.default_instance).strip() or service.default_instance
  15 + instance = service.get_instance(instance_name)
  16 + backend = service.get_backend_config(instance_name)
14 17 request = service.request
15 18  
16   - self.HOST = runtime.reranker_host
17   - self.PORT = runtime.reranker_port
  19 + self.INSTANCE = instance_name
  20 + self.HOST = str(os.getenv("RERANKER_HOST") or instance.host)
  21 + self.PORT = int(os.getenv("RERANKER_PORT") or instance.port)
  22 + self.BACKEND = str(os.getenv("RERANK_BACKEND") or instance.backend)
  23 + self.RUNTIME_DIR = str(
  24 + os.getenv("RERANKER_RUNTIME_DIR")
  25 + or instance.runtime_dir
  26 + or f"./.runtime/reranker/{instance_name}"
  27 + )
18 28  
19 29 self.MODEL_NAME = str(backend.get("model_name") or "Qwen/Qwen3-Reranker-0.6B")
20 30 self.DEVICE = backend.get("device")
... ...
reranker/server.py
... ... @@ -6,7 +6,7 @@ POST /rerank
6 6 Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
7 7 Response: { "scores": [float], "meta": {...} }
8 8  
9   -Backend selected via config: services.rerank.backend
  9 +Backend selected via config: services.rerank.instances.<name>.backend
10 10 (bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND.
11 11 """
12 12  
... ... @@ -76,14 +76,15 @@ class RerankResponse(BaseModel):
76 76 @app.on_event("startup")
77 77 def load_model() -> None:
78 78 global _reranker, _backend_name
79   - logger.info("Starting reranker service on port %s", CONFIG.PORT)
  79 + logger.info("Starting reranker service | instance=%s port=%s", CONFIG.INSTANCE, CONFIG.PORT)
80 80 try:
81   - backend_name, backend_cfg = get_rerank_backend_config()
  81 + backend_name, backend_cfg = get_rerank_backend_config(CONFIG.INSTANCE)
82 82 _backend_name = backend_name
83 83 _reranker = get_rerank_backend(backend_name, backend_cfg)
84 84 model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
85 85 logger.info(
86   - "Reranker ready | backend=%s model=%s",
  86 + "Reranker ready | instance=%s backend=%s model=%s",
  87 + CONFIG.INSTANCE,
87 88 _backend_name,
88 89 model_info,
89 90 )
... ... @@ -101,6 +102,7 @@ def health() -&gt; Dict[str, Any]:
101 102 ).get("model_name", _backend_name)
102 103 payload: Dict[str, Any] = {
103 104 "status": "ok" if _reranker is not None else "unavailable",
  105 + "instance": CONFIG.INSTANCE,
104 106 "model_loaded": _reranker is not None,
105 107 "model": model_info,
106 108 "backend": _backend_name,
... ...
scripts/lib/reranker_backend_env.sh
... ... @@ -45,7 +45,7 @@ reranker_backend_venv_dir() {
45 45 qwen3_gguf_06b) printf '%s/.venv-reranker-gguf-06b\n' "${project_root}" ;;
46 46 qwen3_transformers) printf '%s/.venv-reranker-transformers\n' "${project_root}" ;;
47 47 qwen3_transformers_packed) printf '%s/.venv-reranker-transformers-packed\n' "${project_root}" ;;
48   - bge) printf '%s/.venv-reranker-bge\n' "${project_root}" ;;
  48 + bge) printf '%s/.venv-reranker\n' "${project_root}" ;;
49 49 dashscope_rerank) printf '%s/.venv-reranker-dashscope\n' "${project_root}" ;;
50 50 *) printf '%s/.venv-reranker-%s\n' "${project_root}" "${backend}" ;;
51 51 esac
... ...
scripts/service_ctl.sh
... ... @@ -16,14 +16,45 @@ mkdir -p &quot;${LOG_DIR}&quot;
16 16 source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
17 17  
18 18 CORE_SERVICES=("backend" "indexer" "frontend")
19   -OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker")
  19 +OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker" "reranker-fine")
20 20 FULL_SERVICES=("${OPTIONAL_SERVICES[@]}" "${CORE_SERVICES[@]}")
21   -STOP_ORDER_SERVICES=("frontend" "indexer" "backend" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei")
  21 +STOP_ORDER_SERVICES=("frontend" "indexer" "backend" "reranker-fine" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei")
22 22  
23 23 all_services() {
24 24 echo "${FULL_SERVICES[@]}"
25 25 }
26 26  
  27 +config_python_bin() {
  28 + if [ -x "${PROJECT_ROOT}/.venv/bin/python" ]; then
  29 + echo "${PROJECT_ROOT}/.venv/bin/python"
  30 + else
  31 + echo "${PYTHON:-python3}"
  32 + fi
  33 +}
  34 +
  35 +reranker_instance_for_service() {
  36 + local service="$1"
  37 + case "${service}" in
  38 + reranker) echo "default" ;;
  39 + reranker-fine) echo "fine" ;;
  40 + *) echo "" ;;
  41 + esac
  42 +}
  43 +
  44 +get_reranker_instance_port() {
  45 + local instance="$1"
  46 + local pybin
  47 + pybin="$(config_python_bin)"
  48 + RERANK_INSTANCE="${instance}" PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${pybin}" - <<'PY'
  49 +from config.loader import get_app_config
  50 +import os
  51 +
  52 +cfg = get_app_config().services.rerank
  53 +name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  54 +print(cfg.get_instance(name).port)
  55 +PY
  56 +}
  57 +
27 58 get_port() {
28 59 local service="$1"
29 60 case "${service}" in
... ... @@ -33,7 +64,20 @@ get_port() {
33 64 embedding) echo "${EMBEDDING_TEXT_PORT:-6005}" ;;
34 65 embedding-image) echo "${EMBEDDING_IMAGE_PORT:-6008}" ;;
35 66 translator) echo "${TRANSLATION_PORT:-6006}" ;;
36   - reranker) echo "${RERANKER_PORT:-6007}" ;;
  67 + reranker)
  68 + if [ -n "${RERANKER_PORT:-}" ]; then
  69 + echo "${RERANKER_PORT}"
  70 + else
  71 + get_reranker_instance_port "default"
  72 + fi
  73 + ;;
  74 + reranker-fine)
  75 + if [ -n "${RERANKER_FINE_PORT:-}" ]; then
  76 + echo "${RERANKER_FINE_PORT}"
  77 + else
  78 + get_reranker_instance_port "fine"
  79 + fi
  80 + ;;
37 81 tei) echo "${TEI_PORT:-8080}" ;;
38 82 cnclip) echo "${CNCLIP_PORT:-51000}" ;;
39 83 *) echo "" ;;
... ... @@ -70,6 +114,7 @@ service_start_cmd() {
70 114 embedding-image) echo "./scripts/start_embedding_image_service.sh" ;;
71 115 translator) echo "./scripts/start_translator.sh" ;;
72 116 reranker) echo "./scripts/start_reranker.sh" ;;
  117 + reranker-fine) echo "./scripts/start_reranker.sh" ;;
73 118 tei) echo "./scripts/start_tei_service.sh" ;;
74 119 cnclip) echo "./scripts/start_cnclip_service.sh" ;;
75 120 *) return 1 ;;
... ... @@ -79,7 +124,7 @@ service_start_cmd() {
79 124 service_exists() {
80 125 local service="$1"
81 126 case "${service}" in
82   - backend|indexer|frontend|embedding|embedding-image|translator|reranker|tei|cnclip) return 0 ;;
  127 + backend|indexer|frontend|embedding|embedding-image|translator|reranker|reranker-fine|tei|cnclip) return 0 ;;
83 128 *) return 1 ;;
84 129 esac
85 130 }
... ... @@ -97,7 +142,7 @@ validate_targets() {
97 142 health_path_for_service() {
98 143 local service="$1"
99 144 case "${service}" in
100   - backend|indexer|embedding|embedding-image|translator|reranker|tei) echo "/health" ;;
  145 + backend|indexer|embedding|embedding-image|translator|reranker|reranker-fine|tei) echo "/health" ;;
101 146 *) echo "" ;;
102 147 esac
103 148 }
... ... @@ -505,7 +550,7 @@ get_cnclip_flow_device() {
505 550 start_health_retries_for_service() {
506 551 local service="$1"
507 552 case "${service}" in
508   - reranker) echo 90 ;;
  553 + reranker|reranker-fine) echo 90 ;;
509 554 *) echo 30 ;;
510 555 esac
511 556 }
... ... @@ -593,9 +638,15 @@ start_one() {
593 638 return 1
594 639 fi
595 640 ;;
596   - backend|indexer|frontend|embedding|embedding-image|translator|reranker)
  641 + backend|indexer|frontend|embedding|embedding-image|translator|reranker|reranker-fine)
597 642 echo "[start] ${service}"
598   - nohup "${cmd}" >> "${lf}" 2>&1 &
  643 + local rerank_instance=""
  644 + rerank_instance="$(reranker_instance_for_service "${service}")"
  645 + if [ -n "${rerank_instance}" ]; then
  646 + nohup env RERANK_INSTANCE="${rerank_instance}" "${cmd}" >> "${lf}" 2>&1 &
  647 + else
  648 + nohup "${cmd}" >> "${lf}" 2>&1 &
  649 + fi
599 650 local pid=$!
600 651 echo "${pid}" > "${pf}"
601 652 wait_for_startup_health "${service}" "${pid}" "${lf}"
... ... @@ -673,7 +724,7 @@ stop_one() {
673 724 fi
674 725 fi
675 726  
676   - if [ "${service}" = "reranker" ]; then
  727 + if [[ "${service}" == reranker* ]] && ! service_is_running "reranker" && ! service_is_running "reranker-fine"; then
677 728 cleanup_reranker_orphans
678 729 fi
679 730 }
... ... @@ -871,7 +922,7 @@ Special targets:
871 922  
872 923 Examples:
873 924 ./scripts/service_ctl.sh up all
874   - ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker
  925 + ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker reranker-fine
875 926 ./scripts/service_ctl.sh up backend indexer frontend
876 927 ./scripts/service_ctl.sh restart
877 928 ./scripts/service_ctl.sh monitor-start all
... ...
scripts/start_reranker.sh
... ... @@ -14,9 +14,31 @@ load_env_file &quot;${PROJECT_ROOT}/.env&quot;
14 14 # shellcheck source=scripts/lib/reranker_backend_env.sh
15 15 source "${PROJECT_ROOT}/scripts/lib/reranker_backend_env.sh"
16 16  
17   -RERANKER_HOST="${RERANKER_HOST:-0.0.0.0}"
18   -RERANKER_PORT="${RERANKER_PORT:-6007}"
19   -RERANK_BACKEND="${RERANK_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}"
  17 +CONFIG_PYTHON="${PROJECT_ROOT}/.venv/bin/python"
  18 +if [[ ! -x "${CONFIG_PYTHON}" ]]; then
  19 + CONFIG_PYTHON="${PYTHON:-python3}"
  20 +fi
  21 +
  22 +RERANK_INSTANCE="${RERANK_INSTANCE:-default}"
  23 +
  24 +read -r INSTANCE_HOST INSTANCE_PORT INSTANCE_BACKEND INSTANCE_RUNTIME_DIR <<EOF
  25 +$(
  26 + PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${CONFIG_PYTHON}" - <<'PY'
  27 +from config.loader import get_app_config
  28 +import os
  29 +
  30 +cfg = get_app_config().services.rerank
  31 +name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  32 +instance = cfg.get_instance(name)
  33 +runtime_dir = instance.runtime_dir or f"./.runtime/reranker/{name}"
  34 +print(instance.host, instance.port, instance.backend, runtime_dir)
  35 +PY
  36 +)
  37 +EOF
  38 +
  39 +RERANKER_HOST="${RERANKER_HOST:-${INSTANCE_HOST:-0.0.0.0}}"
  40 +RERANKER_PORT="${RERANKER_PORT:-${INSTANCE_PORT:-6007}}"
  41 +RERANK_BACKEND="${RERANK_BACKEND:-${INSTANCE_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}}"
20 42 RERANKER_VENV="${RERANKER_VENV:-$(reranker_backend_venv_dir "${PROJECT_ROOT}" "${RERANK_BACKEND}")}"
21 43 PYTHON_BIN="${RERANKER_VENV}/bin/python"
22 44  
... ... @@ -27,7 +49,10 @@ if [[ ! -x &quot;${PYTHON_BIN}&quot; ]]; then
27 49 fi
28 50  
29 51 # Keep vLLM/triton/torch caches out of system disk.
30   -RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker}"
  52 +RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${INSTANCE_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker/${RERANK_INSTANCE}}}"
  53 +if [[ "${RERANKER_RUNTIME_DIR}" != /* ]]; then
  54 + RERANKER_RUNTIME_DIR="${PROJECT_ROOT}/${RERANKER_RUNTIME_DIR#./}"
  55 +fi
31 56 mkdir -p "${RERANKER_RUNTIME_DIR}/home" \
32 57 "${RERANKER_RUNTIME_DIR}/cache" \
33 58 "${RERANKER_RUNTIME_DIR}/config" \
... ... @@ -102,6 +127,7 @@ fi
102 127 echo "========================================"
103 128 echo "Starting Reranker Service"
104 129 echo "========================================"
  130 +echo "Instance: ${RERANK_INSTANCE}"
105 131 echo "Python: ${PYTHON_BIN}"
106 132 echo "Host: ${RERANKER_HOST}"
107 133 echo "Port: ${RERANKER_PORT}"
... ...
search/searcher.py
... ... @@ -373,6 +373,9 @@ class Searcher:
373 373  
374 374 es_score_normalization_factor: Optional[float] = None
375 375 initial_ranks_by_doc: Dict[str, int] = {}
  376 + coarse_ranks_by_doc: Dict[str, int] = {}
  377 + fine_ranks_by_doc: Dict[str, int] = {}
  378 + rerank_ranks_by_doc: Dict[str, int] = {}
376 379 coarse_debug_info: Optional[Dict[str, Any]] = None
377 380 fine_debug_info: Optional[Dict[str, Any]] = None
378 381 rerank_debug_info: Optional[Dict[str, Any]] = None
... ... @@ -626,7 +629,7 @@ class Searcher:
626 629 style_intent_decisions: Dict[str, SkuSelectionDecision] = {}
627 630 if do_rerank and in_rerank_window:
628 631 from dataclasses import asdict
629   - from config.services_config import get_rerank_service_url
  632 + from config.services_config import get_rerank_backend_config, get_rerank_service_url
630 633 from .rerank_client import coarse_resort_hits, run_lightweight_rerank, run_rerank
631 634  
632 635 rerank_query = parsed_query.text_for_rerank() if parsed_query else query
... ... @@ -642,11 +645,17 @@ class Searcher:
642 645 hits = hits[:coarse_output_window]
643 646 es_response.setdefault("hits", {})["hits"] = hits
644 647 if debug:
645   - coarse_debug_info = {
646   - "docs_in": es_fetch_size,
647   - "docs_out": len(hits),
648   - "fusion": asdict(coarse_cfg.fusion),
  648 + coarse_ranks_by_doc = {
  649 + str(hit.get("_id")): rank
  650 + for rank, hit in enumerate(hits, 1)
  651 + if hit.get("_id") is not None
649 652 }
  653 + if debug:
  654 + coarse_debug_info = {
  655 + "docs_in": es_fetch_size,
  656 + "docs_out": len(hits),
  657 + "fusion": asdict(coarse_cfg.fusion),
  658 + }
650 659 context.store_intermediate_result("coarse_rank_scores", coarse_debug)
651 660 context.logger.info(
652 661 "粗排完成 | docs_in=%s | docs_out=%s",
... ... @@ -717,12 +726,23 @@ class Searcher:
717 726 hits = hits[:fine_output_window]
718 727 es_response["hits"]["hits"] = hits
719 728 if debug:
  729 + fine_ranks_by_doc = {
  730 + str(hit.get("_id")): rank
  731 + for rank, hit in enumerate(hits, 1)
  732 + if hit.get("_id") is not None
  733 + }
  734 + fine_backend_name, fine_backend_cfg = get_rerank_backend_config(fine_cfg.service_profile)
720 735 fine_debug_info = {
  736 + "service_profile": fine_cfg.service_profile,
721 737 "service_url": get_rerank_service_url(profile=fine_cfg.service_profile),
  738 + "backend": fine_backend_name,
  739 + "model": fine_meta.get("model") if isinstance(fine_meta, dict) else None,
  740 + "backend_model_name": fine_backend_cfg.get("model_name"),
722 741 "query_template": fine_query_template,
723 742 "doc_template": fine_doc_template,
724 743 "query_text": str(fine_query_template).format_map({"query": rerank_query}),
725   - "docs": len(hits),
  744 + "docs_in": min(len(fine_scores), fine_input_window),
  745 + "docs_out": len(hits),
726 746 "top_n": fine_output_window,
727 747 "meta": fine_meta,
728 748 }
... ... @@ -768,12 +788,23 @@ class Searcher:
768 788  
769 789 if rerank_meta is not None:
770 790 if debug:
  791 + rerank_ranks_by_doc = {
  792 + str(hit.get("_id")): rank
  793 + for rank, hit in enumerate(es_response.get("hits", {}).get("hits") or [], 1)
  794 + if hit.get("_id") is not None
  795 + }
  796 + rerank_backend_name, rerank_backend_cfg = get_rerank_backend_config(rc.service_profile)
771 797 rerank_debug_info = {
  798 + "service_profile": rc.service_profile,
772 799 "service_url": get_rerank_service_url(profile=rc.service_profile),
  800 + "backend": rerank_backend_name,
  801 + "model": rerank_meta.get("model") if isinstance(rerank_meta, dict) else None,
  802 + "backend_model_name": rerank_backend_cfg.get("model_name"),
773 803 "query_template": effective_query_template,
774 804 "doc_template": effective_doc_template,
775 805 "query_text": str(effective_query_template).format_map({"query": rerank_query}),
776   - "docs": len(es_response.get("hits", {}).get("hits") or []),
  806 + "docs_in": len(final_input),
  807 + "docs_out": len(es_response.get("hits", {}).get("hits") or []),
777 808 "top_n": from_ + size,
778 809 "meta": rerank_meta,
779 810 "fusion": asdict(rc.fusion),
... ... @@ -1019,6 +1050,60 @@ class Searcher:
1019 1050 debug_entry["fine_score"] = fine_debug.get("fine_score")
1020 1051 debug_entry["rerank_input"] = fine_debug.get("rerank_input")
1021 1052  
  1053 + initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1054 + coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1055 + fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1056 + rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1057 + final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1058 +
  1059 + def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]:
  1060 + if previous_rank is None or current_rank is None:
  1061 + return None
  1062 + return previous_rank - current_rank
  1063 +
  1064 + debug_entry["ranking_funnel"] = {
  1065 + "es_recall": {
  1066 + "rank": initial_rank,
  1067 + "score": es_score,
  1068 + "normalized_score": normalized,
  1069 + "matched_queries": hit.get("matched_queries"),
  1070 + },
  1071 + "coarse_rank": {
  1072 + "rank": coarse_rank,
  1073 + "rank_change": _rank_change(initial_rank, coarse_rank),
  1074 + "score": coarse_debug.get("coarse_score") if coarse_debug else None,
  1075 + "text_score": coarse_debug.get("text_score") if coarse_debug else None,
  1076 + "knn_score": coarse_debug.get("knn_score") if coarse_debug else None,
  1077 + "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None,
  1078 + "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None,
  1079 + "signals": coarse_debug,
  1080 + },
  1081 + "fine_rank": {
  1082 + "rank": fine_rank,
  1083 + "rank_change": _rank_change(coarse_rank, fine_rank),
  1084 + "score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
  1085 + "rerank_input": fine_debug.get("rerank_input") if fine_debug else None,
  1086 + },
  1087 + "rerank": {
  1088 + "rank": rerank_rank,
  1089 + "rank_change": _rank_change(fine_rank, rerank_rank),
  1090 + "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"),
  1091 + "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"),
  1092 + "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"),
  1093 + "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"),
  1094 + "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"),
  1095 + "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None,
  1096 + "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None,
  1097 + "text_factor": rerank_debug.get("text_factor") if rerank_debug else None,
  1098 + "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None,
  1099 + "signals": rerank_debug,
  1100 + },
  1101 + "final_page": {
  1102 + "rank": final_rank,
  1103 + "rank_change": _rank_change(rerank_rank, final_rank),
  1104 + },
  1105 + }
  1106 +
1022 1107 if style_intent_debug:
1023 1108 debug_entry["style_intent_sku"] = style_intent_debug
1024 1109  
... ... @@ -1091,6 +1176,15 @@ class Searcher:
1091 1176 "coarse_rank": coarse_debug_info,
1092 1177 "fine_rank": fine_debug_info,
1093 1178 "rerank": rerank_debug_info,
  1179 + "ranking_funnel": {
  1180 + "es_recall": {
  1181 + "docs_out": es_fetch_size,
  1182 + "score_normalization_factor": es_score_normalization_factor,
  1183 + },
  1184 + "coarse_rank": coarse_debug_info,
  1185 + "fine_rank": fine_debug_info,
  1186 + "rerank": rerank_debug_info,
  1187 + },
1094 1188 "feature_flags": context.metadata.get('feature_flags', {}),
1095 1189 "stage_timings": {
1096 1190 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items()
... ...
tests/test_search_rerank_window.py
... ... @@ -299,6 +299,73 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
299 299 assert loaded.rerank.enabled is True
300 300  
301 301  
  302 +def test_config_loader_parses_named_rerank_instances(tmp_path: Path):
  303 + from config.loader import AppConfigLoader
  304 +
  305 + config_data = {
  306 + "es_index_name": "test_products",
  307 + "field_boosts": {"title.en": 3.0},
  308 + "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}],
  309 + "query_config": {"supported_languages": ["en"], "default_language": "en"},
  310 + "services": {
  311 + "translation": {
  312 + "service_url": "http://localhost:6005",
  313 + "timeout_sec": 3.0,
  314 + "default_model": "dummy-model",
  315 + "default_scene": "general",
  316 + "cache": {"ttl_seconds": 60, "sliding_expiration": True},
  317 + "capabilities": {
  318 + "dummy-model": {
  319 + "enabled": True,
  320 + "backend": "llm",
  321 + "model": "dummy-model",
  322 + "base_url": "http://localhost:6005/v1",
  323 + "timeout_sec": 3.0,
  324 + "use_cache": True,
  325 + }
  326 + },
  327 + },
  328 + "embedding": {
  329 + "provider": "http",
  330 + "providers": {"http": {"text_base_url": "http://localhost:6005", "image_base_url": "http://localhost:6008"}},
  331 + "backend": "tei",
  332 + "backends": {"tei": {"base_url": "http://localhost:8080", "model_id": "dummy-embedding-model"}},
  333 + },
  334 + "rerank": {
  335 + "provider": "http",
  336 + "providers": {
  337 + "http": {
  338 + "instances": {
  339 + "default": {"service_url": "http://localhost:6007/rerank"},
  340 + "fine": {"service_url": "http://localhost:6009/rerank"},
  341 + }
  342 + }
  343 + },
  344 + "default_instance": "default",
  345 + "instances": {
  346 + "default": {"port": 6007, "backend": "qwen3_vllm_score"},
  347 + "fine": {"port": 6009, "backend": "bge"},
  348 + },
  349 + "backends": {
  350 + "bge": {"model_name": "BAAI/bge-reranker-v2-m3"},
  351 + "qwen3_vllm_score": {"model_name": "Qwen/Qwen3-Reranker-0.6B"},
  352 + },
  353 + },
  354 + },
  355 + "spu_config": {"enabled": False},
  356 + "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []},
  357 + }
  358 + config_path = tmp_path / "config.yaml"
  359 + config_path.write_text(yaml.safe_dump(config_data), encoding="utf-8")
  360 +
  361 + loader = AppConfigLoader(config_file=config_path)
  362 + loaded = loader.load(validate=False)
  363 +
  364 + assert loaded.services.rerank.default_instance == "default"
  365 + assert loaded.services.rerank.get_instance("fine").port == 6009
  366 + assert loaded.services.rerank.get_instance("fine").backend == "bge"
  367 +
  368 +
302 369 def test_searcher_reranks_top_window_by_default(monkeypatch):
303 370 es_client = _FakeESClient()
304 371 searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client)
... ... @@ -354,6 +421,82 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
354 421 assert result.results[0].brief == "brief-20"
355 422  
356 423  
  424 +def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch):
  425 + es_client = _FakeESClient(total_hits=120)
  426 + searcher = _build_searcher(_build_search_config(rerank_enabled=True, rerank_window=20), es_client)
  427 + context = create_request_context(reqid="t-debug", uid="u-debug")
  428 +
  429 + monkeypatch.setattr(
  430 + "search.searcher.get_tenant_config_loader",
  431 + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
  432 + )
  433 +
  434 + def _fake_run_lightweight_rerank(**kwargs):
  435 + hits = kwargs["es_hits"]
  436 + scores = []
  437 + debug_rows = []
  438 + for idx, hit in enumerate(hits):
  439 + score = float(len(hits) - idx)
  440 + hit["_fine_score"] = score
  441 + scores.append(score)
  442 + debug_rows.append(
  443 + {
  444 + "doc_id": hit["_id"],
  445 + "fine_score": score,
  446 + "rerank_input": {"doc_preview": f"product-{hit['_id']}"},
  447 + }
  448 + )
  449 + hits.sort(key=lambda item: item["_fine_score"], reverse=True)
  450 + return scores, {"model": "fine-bge"}, debug_rows
  451 +
  452 + def _fake_run_rerank(**kwargs):
  453 + hits = kwargs["es_response"]["hits"]["hits"]
  454 + fused_debug = []
  455 + for idx, hit in enumerate(hits):
  456 + hit["_rerank_score"] = 10.0 - idx
  457 + hit["_fused_score"] = 100.0 - idx
  458 + hit["_text_score"] = hit.get("_score", 0.0)
  459 + hit["_knn_score"] = 0.0
  460 + fused_debug.append(
  461 + {
  462 + "doc_id": hit["_id"],
  463 + "rerank_score": hit["_rerank_score"],
  464 + "fine_score": hit.get("_fine_score"),
  465 + "text_score": hit["_text_score"],
  466 + "knn_score": 0.0,
  467 + "rerank_factor": 1.0,
  468 + "fine_factor": 1.0,
  469 + "text_factor": 1.0,
  470 + "knn_factor": 1.0,
  471 + "fused_score": hit["_fused_score"],
  472 + "matched_queries": {},
  473 + "rerank_input": {"doc_preview": f"product-{hit['_id']}"},
  474 + }
  475 + )
  476 + return kwargs["es_response"], {"model": "final-reranker"}, fused_debug
  477 +
  478 + monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
  479 + monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
  480 +
  481 + result = searcher.search(
  482 + query="toy",
  483 + tenant_id="162",
  484 + from_=0,
  485 + size=5,
  486 + context=context,
  487 + enable_rerank=True,
  488 + debug=True,
  489 + )
  490 +
  491 + assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80
  492 + assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20
  493 + first = result.debug_info["per_result"][0]["ranking_funnel"]
  494 + assert first["es_recall"]["rank"] is not None
  495 + assert first["coarse_rank"]["score"] is not None
  496 + assert first["fine_rank"]["score"] is not None
  497 + assert first["rerank"]["rerank_score"] is not None
  498 +
  499 +
357 500 def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch):
358 501 es_client = _FakeESClient()
359 502 searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client)
... ...