Commit daa2690ba5559d5c749855fe500db9ae2dc3a4f4
1 parent
16d28bf8
漏斗参数调优&呈现优化
Showing
16 changed files
with
765 additions
and
101 deletions
Show diff stats
config/config.yaml
| ... | ... | @@ -421,19 +421,31 @@ services: |
| 421 | 421 | normalize_embeddings: true |
| 422 | 422 | rerank: |
| 423 | 423 | provider: "http" |
| 424 | - base_url: "http://127.0.0.1:6007" | |
| 425 | 424 | providers: |
| 426 | 425 | http: |
| 427 | - base_url: "http://127.0.0.1:6007" | |
| 428 | - service_url: "http://127.0.0.1:6007/rerank" | |
| 429 | - service_urls: | |
| 430 | - default: "http://127.0.0.1:6007/rerank" | |
| 431 | - fine: "http://127.0.0.1:6009/rerank" | |
| 426 | + instances: | |
| 427 | + default: | |
| 428 | + base_url: "http://127.0.0.1:6007" | |
| 429 | + service_url: "http://127.0.0.1:6007/rerank" | |
| 430 | + fine: | |
| 431 | + base_url: "http://127.0.0.1:6009" | |
| 432 | + service_url: "http://127.0.0.1:6009/rerank" | |
| 432 | 433 | request: |
| 433 | 434 | max_docs: 1000 |
| 434 | 435 | normalize: true |
| 435 | - # 服务内后端(reranker 进程启动时读取) | |
| 436 | - backend: "qwen3_vllm_score" # bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank | |
| 436 | + default_instance: "default" | |
| 437 | + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 | |
| 438 | + instances: | |
| 439 | + default: | |
| 440 | + host: "0.0.0.0" | |
| 441 | + port: 6007 | |
| 442 | + backend: "qwen3_vllm_score" | |
| 443 | + runtime_dir: "./.runtime/reranker/default" | |
| 444 | + fine: | |
| 445 | + host: "0.0.0.0" | |
| 446 | + port: 6009 | |
| 447 | + backend: "bge" | |
| 448 | + runtime_dir: "./.runtime/reranker/fine" | |
| 437 | 449 | backends: |
| 438 | 450 | bge: |
| 439 | 451 | model_name: "BAAI/bge-reranker-v2-m3" | ... | ... |
config/env_config.py
| ... | ... | @@ -10,6 +10,7 @@ from __future__ import annotations |
| 10 | 10 | from typing import Any, Dict |
| 11 | 11 | |
| 12 | 12 | from config.loader import get_app_config |
| 13 | +from config.services_config import get_rerank_service_url | |
| 13 | 14 | |
| 14 | 15 | |
| 15 | 16 | def _app(): |
| ... | ... | @@ -92,10 +93,7 @@ INDEXER_BASE_URL = ( |
| 92 | 93 | ) |
| 93 | 94 | EMBEDDING_TEXT_SERVICE_URL = _app().services.embedding.get_provider_config().get("text_base_url") |
| 94 | 95 | EMBEDDING_IMAGE_SERVICE_URL = _app().services.embedding.get_provider_config().get("image_base_url") |
| 95 | -RERANKER_SERVICE_URL = ( | |
| 96 | - _app().services.rerank.get_provider_config().get("service_url") | |
| 97 | - or _app().services.rerank.get_provider_config().get("base_url") | |
| 98 | -) | |
| 96 | +RERANKER_SERVICE_URL = get_rerank_service_url() | |
| 99 | 97 | |
| 100 | 98 | |
| 101 | 99 | def get_es_config() -> Dict[str, Any]: | ... | ... |
config/loader.py
| ... | ... | @@ -43,6 +43,7 @@ from config.schema import ( |
| 43 | 43 | RerankConfig, |
| 44 | 44 | RerankFusionConfig, |
| 45 | 45 | RerankServiceConfig, |
| 46 | + RerankServiceInstanceConfig, | |
| 46 | 47 | RuntimeConfig, |
| 47 | 48 | SearchConfig, |
| 48 | 49 | SecretsConfig, |
| ... | ... | @@ -615,13 +616,61 @@ class AppConfigLoader: |
| 615 | 616 | rerank_providers = dict(rerank_raw.get("providers") or {}) |
| 616 | 617 | if rerank_provider not in rerank_providers: |
| 617 | 618 | raise ConfigurationError(f"services.rerank.providers.{rerank_provider} must be configured") |
| 618 | - rerank_backend = str(rerank_raw.get("backend") or "").strip().lower() | |
| 619 | 619 | rerank_backends = { |
| 620 | 620 | str(key).strip().lower(): dict(value) |
| 621 | 621 | for key, value in dict(rerank_raw.get("backends") or {}).items() |
| 622 | 622 | } |
| 623 | - if rerank_backend not in rerank_backends: | |
| 624 | - raise ConfigurationError(f"services.rerank.backends.{rerank_backend} must be configured") | |
| 623 | + default_instance = str(rerank_raw.get("default_instance") or "default").strip() or "default" | |
| 624 | + raw_instances = rerank_raw.get("instances") if isinstance(rerank_raw.get("instances"), dict) else {} | |
| 625 | + if not raw_instances: | |
| 626 | + legacy_backend = str(rerank_raw.get("backend") or "").strip().lower() | |
| 627 | + if legacy_backend not in rerank_backends: | |
| 628 | + raise ConfigurationError(f"services.rerank.backends.{legacy_backend} must be configured") | |
| 629 | + provider_cfg = dict(rerank_providers.get(rerank_provider) or {}) | |
| 630 | + raw_instances = { | |
| 631 | + default_instance: { | |
| 632 | + "host": "0.0.0.0", | |
| 633 | + "port": 6007, | |
| 634 | + "backend": legacy_backend, | |
| 635 | + "base_url": provider_cfg.get("base_url"), | |
| 636 | + "service_url": provider_cfg.get("service_url"), | |
| 637 | + } | |
| 638 | + } | |
| 639 | + rerank_instances = {} | |
| 640 | + for instance_name, instance_raw in raw_instances.items(): | |
| 641 | + if not isinstance(instance_raw, dict): | |
| 642 | + raise ConfigurationError(f"services.rerank.instances.{instance_name} must be a mapping") | |
| 643 | + normalized_instance_name = str(instance_name).strip() | |
| 644 | + backend_name = str(instance_raw.get("backend") or "").strip().lower() | |
| 645 | + if backend_name not in rerank_backends: | |
| 646 | + raise ConfigurationError( | |
| 647 | + f"services.rerank.instances.{normalized_instance_name}.backend must reference configured services.rerank.backends" | |
| 648 | + ) | |
| 649 | + port = int(instance_raw.get("port", 6007)) | |
| 650 | + rerank_instances[normalized_instance_name] = RerankServiceInstanceConfig( | |
| 651 | + host=str(instance_raw.get("host") or "0.0.0.0"), | |
| 652 | + port=port, | |
| 653 | + backend=backend_name, | |
| 654 | + runtime_dir=( | |
| 655 | + str(v) | |
| 656 | + if (v := instance_raw.get("runtime_dir")) not in (None, "") | |
| 657 | + else None | |
| 658 | + ), | |
| 659 | + base_url=( | |
| 660 | + str(v).rstrip("/") | |
| 661 | + if (v := instance_raw.get("base_url")) not in (None, "") | |
| 662 | + else None | |
| 663 | + ), | |
| 664 | + service_url=( | |
| 665 | + str(v).rstrip("/") | |
| 666 | + if (v := instance_raw.get("service_url")) not in (None, "") | |
| 667 | + else None | |
| 668 | + ), | |
| 669 | + ) | |
| 670 | + if default_instance not in rerank_instances: | |
| 671 | + raise ConfigurationError( | |
| 672 | + f"services.rerank.default_instance={default_instance!r} must exist in services.rerank.instances" | |
| 673 | + ) | |
| 625 | 674 | rerank_request = dict(rerank_raw.get("request") or {}) |
| 626 | 675 | rerank_request.setdefault("max_docs", 1000) |
| 627 | 676 | rerank_request.setdefault("normalize", True) |
| ... | ... | @@ -629,7 +678,8 @@ class AppConfigLoader: |
| 629 | 678 | rerank_config = RerankServiceConfig( |
| 630 | 679 | provider=rerank_provider, |
| 631 | 680 | providers=rerank_providers, |
| 632 | - backend=rerank_backend, | |
| 681 | + default_instance=default_instance, | |
| 682 | + instances=rerank_instances, | |
| 633 | 683 | backends=rerank_backends, |
| 634 | 684 | request=rerank_request, |
| 635 | 685 | ) |
| ... | ... | @@ -754,8 +804,19 @@ class AppConfigLoader: |
| 754 | 804 | errors.append("services.embedding.providers.<provider>.image_base_url is required") |
| 755 | 805 | |
| 756 | 806 | rerank_provider_cfg = app_config.services.rerank.get_provider_config() |
| 757 | - if not rerank_provider_cfg.get("service_url") and not rerank_provider_cfg.get("base_url"): | |
| 758 | - errors.append("services.rerank.providers.<provider>.service_url or base_url is required") | |
| 807 | + provider_instances = rerank_provider_cfg.get("instances") | |
| 808 | + if not isinstance(provider_instances, dict): | |
| 809 | + provider_instances = {} | |
| 810 | + for instance_name in app_config.services.rerank.instances: | |
| 811 | + instance_cfg = app_config.services.rerank.get_instance(instance_name) | |
| 812 | + provider_instance_cfg = provider_instances.get(instance_name) if isinstance(provider_instances, dict) else None | |
| 813 | + has_instance_url = False | |
| 814 | + if isinstance(provider_instance_cfg, dict): | |
| 815 | + has_instance_url = bool(provider_instance_cfg.get("service_url") or provider_instance_cfg.get("base_url")) | |
| 816 | + if not has_instance_url and not instance_cfg.service_url and not instance_cfg.base_url: | |
| 817 | + errors.append( | |
| 818 | + f"services.rerank instance {instance_name!r} must define service_url/base_url either under providers.<provider>.instances or services.rerank.instances" | |
| 819 | + ) | |
| 759 | 820 | |
| 760 | 821 | if errors: |
| 761 | 822 | raise ConfigurationError("Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors)) | ... | ... |
config/schema.py
| ... | ... | @@ -236,20 +236,41 @@ class EmbeddingServiceConfig: |
| 236 | 236 | |
| 237 | 237 | |
| 238 | 238 | @dataclass(frozen=True) |
| 239 | +class RerankServiceInstanceConfig: | |
| 240 | + """One named reranker service instance.""" | |
| 241 | + | |
| 242 | + host: str = "0.0.0.0" | |
| 243 | + port: int = 6007 | |
| 244 | + backend: str = "qwen3_vllm_score" | |
| 245 | + runtime_dir: Optional[str] = None | |
| 246 | + base_url: Optional[str] = None | |
| 247 | + service_url: Optional[str] = None | |
| 248 | + | |
| 249 | + | |
| 250 | +@dataclass(frozen=True) | |
| 239 | 251 | class RerankServiceConfig: |
| 240 | 252 | """Reranker service configuration.""" |
| 241 | 253 | |
| 242 | 254 | provider: str |
| 243 | 255 | providers: Dict[str, Any] |
| 244 | - backend: str | |
| 256 | + default_instance: str | |
| 257 | + instances: Dict[str, RerankServiceInstanceConfig] | |
| 245 | 258 | backends: Dict[str, Dict[str, Any]] |
| 246 | 259 | request: Dict[str, Any] |
| 247 | 260 | |
| 248 | 261 | def get_provider_config(self) -> Dict[str, Any]: |
| 249 | 262 | return dict(self.providers.get(self.provider, {}) or {}) |
| 250 | 263 | |
| 251 | - def get_backend_config(self) -> Dict[str, Any]: | |
| 252 | - return dict(self.backends.get(self.backend, {}) or {}) | |
| 264 | + def get_instance(self, name: Optional[str] = None) -> RerankServiceInstanceConfig: | |
| 265 | + instance_name = str(name or self.default_instance).strip() or self.default_instance | |
| 266 | + instance = self.instances.get(instance_name) | |
| 267 | + if instance is None: | |
| 268 | + raise KeyError(f"Unknown rerank service instance: {instance_name!r}") | |
| 269 | + return instance | |
| 270 | + | |
| 271 | + def get_backend_config(self, name: Optional[str] = None) -> Dict[str, Any]: | |
| 272 | + instance = self.get_instance(name) | |
| 273 | + return dict(self.backends.get(instance.backend, {}) or {}) | |
| 253 | 274 | |
| 254 | 275 | |
| 255 | 276 | @dataclass(frozen=True) | ... | ... |
config/services_config.py
| ... | ... | @@ -11,7 +11,12 @@ import os |
| 11 | 11 | from typing import Any, Dict, Tuple |
| 12 | 12 | |
| 13 | 13 | from config.loader import get_app_config |
| 14 | -from config.schema import EmbeddingServiceConfig, RerankServiceConfig, TranslationServiceConfig | |
| 14 | +from config.schema import ( | |
| 15 | + EmbeddingServiceConfig, | |
| 16 | + RerankServiceConfig, | |
| 17 | + RerankServiceInstanceConfig, | |
| 18 | + TranslationServiceConfig, | |
| 19 | +) | |
| 15 | 20 | |
| 16 | 21 | |
| 17 | 22 | def get_translation_config() -> Dict[str, Any]: |
| ... | ... | @@ -26,6 +31,16 @@ def get_rerank_config() -> RerankServiceConfig: |
| 26 | 31 | return get_app_config().services.rerank |
| 27 | 32 | |
| 28 | 33 | |
| 34 | +def get_rerank_instance_config(profile: str | None = None) -> RerankServiceInstanceConfig: | |
| 35 | + cfg = get_app_config().services.rerank | |
| 36 | + instance_name = str( | |
| 37 | + profile | |
| 38 | + or os.getenv("RERANK_INSTANCE") | |
| 39 | + or cfg.default_instance | |
| 40 | + ).strip() or cfg.default_instance | |
| 41 | + return cfg.get_instance(instance_name) | |
| 42 | + | |
| 43 | + | |
| 29 | 44 | def get_translation_base_url() -> str: |
| 30 | 45 | return get_app_config().services.translation.endpoint |
| 31 | 46 | |
| ... | ... | @@ -60,31 +75,56 @@ def get_embedding_image_backend_config() -> Tuple[str, Dict[str, Any]]: |
| 60 | 75 | return cfg.image_backend, cfg.get_image_backend_config() |
| 61 | 76 | |
| 62 | 77 | |
| 63 | -def get_rerank_backend_config() -> Tuple[str, Dict[str, Any]]: | |
| 78 | +def get_rerank_backend_config(profile: str | None = None) -> Tuple[str, Dict[str, Any]]: | |
| 64 | 79 | cfg = get_app_config().services.rerank |
| 65 | - backend = str(os.getenv("RERANK_BACKEND") or cfg.backend).strip() | |
| 66 | - if backend != cfg.backend: | |
| 80 | + instance = get_rerank_instance_config(profile) | |
| 81 | + backend = str(os.getenv("RERANK_BACKEND") or instance.backend).strip() | |
| 82 | + if backend != instance.backend: | |
| 67 | 83 | backend_cfg = cfg.backends.get(backend) |
| 68 | 84 | if backend_cfg is None: |
| 69 | 85 | raise ValueError(f"Unknown rerank backend override from RERANK_BACKEND: {backend!r}") |
| 70 | 86 | return backend, dict(backend_cfg) |
| 71 | - return cfg.backend, cfg.get_backend_config() | |
| 87 | + return instance.backend, cfg.get_backend_config(profile) | |
| 72 | 88 | |
| 73 | 89 | |
| 74 | 90 | def get_rerank_base_url(profile: str | None = None) -> str: |
| 75 | - provider_cfg = get_app_config().services.rerank.get_provider_config() | |
| 91 | + cfg = get_app_config().services.rerank | |
| 92 | + instance = get_rerank_instance_config(profile) | |
| 93 | + provider_cfg = cfg.get_provider_config() | |
| 94 | + profile_name = str(profile or os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance | |
| 95 | + | |
| 76 | 96 | base = None |
| 77 | - profile_name = str(profile).strip() if profile else "" | |
| 78 | - if profile_name: | |
| 79 | - service_urls = provider_cfg.get("service_urls") | |
| 80 | - if isinstance(service_urls, dict): | |
| 81 | - base = service_urls.get(profile_name) | |
| 97 | + provider_instances = provider_cfg.get("instances") | |
| 98 | + if isinstance(provider_instances, dict): | |
| 99 | + instance_provider_cfg = provider_instances.get(profile_name) | |
| 100 | + if isinstance(instance_provider_cfg, dict): | |
| 101 | + base = instance_provider_cfg.get("base_url") or instance_provider_cfg.get("service_url") | |
| 102 | + if not base: | |
| 103 | + base = instance.base_url or instance.service_url | |
| 82 | 104 | if not base: |
| 83 | 105 | base = provider_cfg.get("service_url") or provider_cfg.get("base_url") |
| 84 | 106 | if not base: |
| 85 | 107 | raise ValueError("Rerank service URL is not configured") |
| 86 | - return str(base).rstrip("/") | |
| 108 | + base = str(base).rstrip("/") | |
| 109 | + if base.endswith("/rerank"): | |
| 110 | + base = base[: -len("/rerank")] | |
| 111 | + return base | |
| 87 | 112 | |
| 88 | 113 | |
| 89 | 114 | def get_rerank_service_url(profile: str | None = None) -> str: |
| 90 | - return get_rerank_base_url(profile=profile) | |
| 115 | + cfg = get_app_config().services.rerank | |
| 116 | + instance = get_rerank_instance_config(profile) | |
| 117 | + provider_cfg = cfg.get_provider_config() | |
| 118 | + profile_name = str(profile or os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance | |
| 119 | + | |
| 120 | + service_url = None | |
| 121 | + provider_instances = provider_cfg.get("instances") | |
| 122 | + if isinstance(provider_instances, dict): | |
| 123 | + instance_provider_cfg = provider_instances.get(profile_name) | |
| 124 | + if isinstance(instance_provider_cfg, dict): | |
| 125 | + service_url = instance_provider_cfg.get("service_url") | |
| 126 | + if not service_url: | |
| 127 | + service_url = instance.service_url | |
| 128 | + if not service_url: | |
| 129 | + service_url = f"{get_rerank_base_url(profile=profile)}/rerank" | |
| 130 | + return str(service_url).rstrip("/") | ... | ... |
| ... | ... | @@ -0,0 +1,38 @@ |
| 1 | +这是上一轮检索效果优化的需求: | |
| 2 | +参考 | |
| 3 | +searcher.py | |
| 4 | +rerank_client.py | |
| 5 | +schema.py | |
| 6 | +es_query_builder.py | |
| 7 | +config.yaml | |
| 8 | +相关性检索优化说明.md | |
| 9 | + | |
| 10 | +在ES返回到rerank期间增加一轮粗排+一轮精排。 | |
| 11 | +1. ES召回,600 | |
| 12 | +2. 粗排:600->240。配置文件增加粗排相关配置,包括输入条数(配置为700,ES拉取的条数改为粗排输入条数),然后增加粗排的融合公式配置,参考现有的reranker融合公式即可、只是去掉其中的重排模型项。 | |
| 13 | +3. 现在的sku选择、为reranker生成title后缀这一套逻辑,是放在粗排后,因为精排也是一个reranker模型(只不过是一个轻量级的,bge-reranker),需要用这个title后缀。 | |
| 14 | +4. 精排:240-80,使用bge-reranker,但是,因为reranker只能选一个backend,考虑如何重构。现在,精排也是一个独立的进程、独立提供端口,服务。但是,因为跟重排逻辑是一致的(即使有部分不一致也应该分离不一致的点进行配置化),所以共用代码,只是根据需要启动两个实例,避免代码冗余。 | |
| 15 | +5. 重排:80,也是用当前的重排代码,调用单独的实例(即现在使用的实例),返回后,经过融合公式,到分页,也参考现在的融合公式,但是,加入一项精排模型打分。 | |
| 16 | +测试时,使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试 | |
| 17 | + | |
| 18 | +你已经完成了一般修改,已把三段排序链路接上了,主改动在 search/searcher.py、search/rerank_client.py、config/schema.py、config/loader.py、config/services_config.py 和 config/config.yaml。 | |
| 19 | + | |
| 20 | +现在链路是: | |
| 21 | +ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到 coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker,并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url,这样 fine/final 可以共用同一套服务代码,只起不同实例。 | |
| 22 | + | |
| 23 | +你是用了./.venv/bin/pytest -q tests/test_rerank_client.py tests/test_search_rerank_window.py 进行验证,结果是 18 passed。也按要求尝试了 ./scripts/service_ctl.sh restart reranker backend 做联调,但当前环境里只有 6007 的现有 reranker,配置里的 fine 端口 6009 还没对应实例,而且服务重启后进程状态不太稳定,没能完成完整线上链路验证。当前最关键的落地前置是把轻量 bge 实例补起来,再用同样脚本重启联调。 | |
| 24 | + | |
| 25 | +因此,需要回顾这个漏斗的代码,进行进一步的优化: | |
| 26 | +1. Fine Rank环节的信息都是N/A。fine 实例也需要在config.yaml中体现,并且需要被server_ctl.sh管理,参考现在是怎么管理reranker的。fine和reranker分别调用哪个模型(两者代码功能,但是只是启动不同的实例),请你深度思考后决定。这里是需要参考当前代码、根据实际需要(原来只有一个reranker、现在多了一个精排,也要调用一个重排模型,因此带来的必要的部分重构。 | |
| 27 | +2. Ranking Funnel、Fusion Factors、Signal Breakdown | |
| 28 | +这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。 | |
| 29 | +ES 召回的环节,展示Matched Queries各项打分、ES的总分、norm后打分、排序位置,等等关键信息 | |
| 30 | +粗排:粗排融合公式的各项输入、重要中间结果和参数、最后得分,排序位置以及上升/下降了多少。等等关键信息。 | |
| 31 | +精排:同样例举关键的输入、中间过程、输出、排序和位置变化等。 | |
| 32 | +reranker:类似 | |
| 33 | + | |
| 34 | +因为涉及的环节较多,非常要注意的一个点是:不要每次修改都在原来的基础上,为实现目标而打补丁,应该观察一下所涉及的代码现在是怎么做的,务必注意如何适当的清理掉现有逻辑,该如何对其进行修改,来达到目的,以达到代码的精简,避免冗余、分叉。 | |
| 35 | +特别是1关于将现在的单独的reranker服务,在不增加代码复杂度的情况下,如何可以通过配置启动多个实例、各自可以配置选择的模型等相关参数、分别服务于精排和重排,是设计的重中之重。这里需要思考配置如何拆分,需要多读相关代码、思考最佳实践。 | |
| 36 | + | |
| 37 | + | |
| 38 | + | ... | ... |
| ... | ... | @@ -0,0 +1,136 @@ |
| 1 | +我: | |
| 2 | +进行一轮检索效果优化: | |
| 3 | +参考 | |
| 4 | +[searcher.py](search/searcher.py) | |
| 5 | +[rerank_client.py](search/rerank_client.py) | |
| 6 | +[schema.py](config/schema.py) | |
| 7 | +[es_query_builder.py](search/es_query_builder.py) | |
| 8 | +[config.yaml](config/config.yaml) | |
| 9 | +[相关性检索优化说明.md](docs/相关性检索优化说明.md) | |
| 10 | + | |
| 11 | +在ES返回到rerank期间增加一轮粗排+一轮精排。 | |
| 12 | +1. ES召回,600 | |
| 13 | +2. 粗排:600->240。配置文件增加粗排相关配置,包括输入条数(配置为700,ES拉取的条数改为粗排输入条数),然后增加粗排的融合公式配置,参考现有的reranker融合公式即可、只是去掉其中的重排模型项。 | |
| 14 | +3. 现在的sku选择、为reranker生成title后缀这一套逻辑,是放在粗排后,因为精排也是一个reranker模型(只不过是一个轻量级的,bge-reranker),需要用这个title后缀。 | |
| 15 | +4. 精排:240-80,使用bge-reranker,但是,因为reranker只能选一个backend,考虑如何重构。现在,精排也是一个独立的进程、独立提供端口,服务。但是,因为跟重排逻辑是一致的(即使有部分不一致也应该分离不一致的点进行配置化),所以共用代码,只是根据需要启动两个实例,避免代码冗余。 | |
| 16 | +5. 重排:80,也是用当前的重排代码,调用单独的实例(即现在使用的实例),返回后,经过融合公式,到分页,也参考现在的融合公式,但是,加入一项精排模型打分。 | |
| 17 | +测试时,使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试 | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | +codex: | |
| 24 | +前端这轮我把调试展示重构成了“两层漏斗”: | |
| 25 | + | |
| 26 | +1. 整体漏斗 | |
| 27 | +在 [frontend/static/js/app.js](/data/saas-search/frontend/static/js/app.js) 里把原来散着的 `Query Analysis / Rerank / Stage Timings / ES Query` 重新组织成了: | |
| 28 | +- `Query Context` | |
| 29 | +- `Ranking Funnel` | |
| 30 | +- `Timing Breakdown` | |
| 31 | +- `Raw Payloads` | |
| 32 | + | |
| 33 | +其中 `Ranking Funnel` 现在按阶段卡片展示: | |
| 34 | +- `ES Recall` | |
| 35 | +- `Coarse Rank` | |
| 36 | +- `Fine Rank` | |
| 37 | +- `Final Rerank` | |
| 38 | +- `Page Return` | |
| 39 | + | |
| 40 | +每张卡都尽量只放调参最关键的信息:输入/输出条数、模板、service URL、top_n、fusion/meta。这样你看漏斗收缩、看每段服务表现、看融合参数会更直接。 | |
| 41 | + | |
| 42 | +2. 单条结果漏斗 | |
| 43 | +在结果卡片右侧的 debug 区,我改成了真正的单条漏斗卡片,按“分数结果”和“公式分解”分开看: | |
| 44 | +- 顶部先看 `Initial Rank / Final Rank / Rank Delta` | |
| 45 | +- 再看阶段分数:`ES / ES Norm / Coarse / Fine / Rerank / Fused` | |
| 46 | +- 再看融合因子:`coarse_text_factor / coarse_knn_factor / text_factor / knn_factor / fine_factor / rerank_factor` | |
| 47 | +- 再看信号拆解:`text_source / text_translation / text_primary / text_support / text_knn / image_knn` | |
| 48 | + | |
| 49 | +同时保留了可展开的: | |
| 50 | +- `Rerank Input` | |
| 51 | +- `Selected SKU` | |
| 52 | +- `Matched Queries` | |
| 53 | +- `Multilingual Fields` | |
| 54 | + | |
| 55 | +为了支撑前端,我顺手在 [search/searcher.py](/data/saas-search/search/searcher.py) 里补了单条 `coarse_score / coarse_text_factor / coarse_knn_factor` 到 `per_result`,这样单条卡片现在能把粗排也完整展示出来。 | |
| 56 | + | |
| 57 | +样式上在 [frontend/static/css/style.css](/data/saas-search/frontend/static/css/style.css) 加了阶段卡片、分数 pill、时延条形图和移动端适配。JS 语法我用 `node --check frontend/static/js/app.js` 过了,后端补充字段也用 `./.venv/bin/python -m py_compile search/searcher.py` 过了。 | |
| 58 | + | |
| 59 | +如果你愿意,我下一步可以继续把这个视图再往“调参工作台”方向推一层,比如再加: | |
| 60 | +- 漏斗阶段间的 drop rate | |
| 61 | +- 每条结果的 rank jump 高亮 | |
| 62 | +- 批量对比 top N 结果里某个因子分布 | |
| 63 | +- fusion 参数和结果联动的可视化摘要 | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | +我: | |
| 73 | +1. Fine Rank环节的信息都是N/A,是没有配置吗。fine rank是使用bge-reranker,复用当前reranker模型的代码,但是需要单独起一个服务、单独加载一个模型。 | |
| 74 | +2. Ranking Funnel、Fusion Factors、Signal Breakdown | |
| 75 | +这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。 | |
| 76 | +ES 召回的环节,展示Matched Queries各项打分、ES的总分、norm后打分、排序位置,等等关键信息 | |
| 77 | +粗排:粗排融合公式的各项输入、重要中间结果和参数、最后得分,排序位置以及上升/下降了多少。等等关键信息。 | |
| 78 | +精排:同样例举关键的输入、中间过程、输出、排序和位置变化等。 | |
| 79 | +reranker:类似 | |
| 80 | + | |
| 81 | +因为涉及的环节较多,非常要注意的一个点是:不要每次修改都在原来的基础上,为实现目标而打补丁,应该观察一下所涉及的代码现在是怎么做的,务必注意如何适当的清理掉现有逻辑,该如何对其进行修改,来达到目的,以达到代码的精简,避免冗余、分叉。 | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | +codex: | |
| 89 | +`Fine Rank` 之前显示 `N/A`,根因不是前端单纯没写,而是后端原来只有“精排成功拿到分数”时才会填那块 debug;一旦 `fine` 服务没起、没配 `service_url`、调用失败,前端只能看到空值。现在我把这层补齐了: | |
| 90 | + | |
| 91 | +- 顶层 `debug_info.fine_rank` 现在会始终带 `status / service_profile / service_url / input_window / output_window / query_template / doc_template` | |
| 92 | +- 失败时会带 `error` | |
| 93 | +- 成功时再补 `docs / top_n / meta` | |
| 94 | +- 所以你现在能区分: | |
| 95 | + - `disabled` | |
| 96 | + - `pending` | |
| 97 | + - `ok` | |
| 98 | + - `failed_no_scores` | |
| 99 | + - `failed` | |
| 100 | + | |
| 101 | +也就是说,`fine rank` 本身仍然需要你按设计单独起一个 `bge-reranker` 实例,这一层代码已经按“独立服务 + 共用 reranker 代码”的方式接好了,但服务本身还是要单独启动。 | |
| 102 | + | |
| 103 | +这轮我也把调试展示做了结构化重构,不再继续在旧视图上打补丁。现在是按阶段组织信息: | |
| 104 | + | |
| 105 | +- 整体漏斗: | |
| 106 | + - `ES Recall` | |
| 107 | + - `Coarse Rank` | |
| 108 | + - `Fine Rank` | |
| 109 | + - `Final Rerank` | |
| 110 | + - `Page Return` | |
| 111 | +- 单条结果: | |
| 112 | + - 用统一的 `ranking_funnel` 结构来表达每个阶段的 | |
| 113 | + - `rank` | |
| 114 | + - `delta` | |
| 115 | + - `score` | |
| 116 | + - 关键中间量 | |
| 117 | + - 关键 factor | |
| 118 | + - 输入 payload | |
| 119 | + | |
| 120 | +具体改动在: | |
| 121 | +- [search/searcher.py](/data/saas-search/search/searcher.py) | |
| 122 | +- [frontend/static/js/app.js](/data/saas-search/frontend/static/js/app.js) | |
| 123 | +- [frontend/static/css/style.css](/data/saas-search/frontend/static/css/style.css) | |
| 124 | + | |
| 125 | +单条结果现在按 stage card 展示,而不是把 `Ranking Funnel / Fusion Factors / Signal Breakdown` 拆成三块平铺,这样你可以顺着 ES -> 粗排 -> 精排 -> rerank 看每段的输入、过程、输出和位次变化,更适合调融合公式。 | |
| 126 | + | |
| 127 | +检查我也跑过了: | |
| 128 | +- `node --check frontend/static/js/app.js` | |
| 129 | +- `./.venv/bin/python -m py_compile search/searcher.py` | |
| 130 | +- `./.venv/bin/pytest -q tests/test_search_rerank_window.py` | |
| 131 | + | |
| 132 | +都通过了。 | |
| 133 | + | |
| 134 | +下一步如果你要,我建议直接继续做两件事中的一个: | |
| 135 | +1. 把 `fine` 实例真正拉起来,然后我再把前端里 `status=failed/pending` 的场景跑成真实数据。 | |
| 136 | +2. 在当前漏斗视图上继续加“阶段 rank 分布 / delta 高亮 / topN 因子对比”,把它做成更像调参工作台。 | |
| 0 | 137 | \ No newline at end of file | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -498,6 +498,12 @@ function renderJsonDetails(title, payload, open = false) { |
| 498 | 498 | function buildProductDebugHtml({ debug, result, spuId, tenantId }) { |
| 499 | 499 | const resultJson = customStringify(result); |
| 500 | 500 | const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; |
| 501 | + const funnel = debug.ranking_funnel || {}; | |
| 502 | + const esStage = funnel.es_recall || {}; | |
| 503 | + const coarseStage = funnel.coarse_rank || {}; | |
| 504 | + const fineStage = funnel.fine_rank || {}; | |
| 505 | + const rerankStage = funnel.rerank || {}; | |
| 506 | + const finalPageStage = funnel.final_page || {}; | |
| 501 | 507 | |
| 502 | 508 | const rankSummary = renderMetricList([ |
| 503 | 509 | { label: 'Initial Rank', value: debug.initial_rank ?? 'N/A' }, |
| ... | ... | @@ -507,33 +513,57 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) { |
| 507 | 513 | ]); |
| 508 | 514 | |
| 509 | 515 | const stageScores = renderScorePills([ |
| 510 | - { label: 'ES', value: formatDebugNumber(debug.es_score), tone: 'tone-es' }, | |
| 511 | - { label: 'ES Norm', value: formatDebugNumber(debug.es_score_normalized), tone: 'tone-neutral' }, | |
| 512 | - { label: 'Coarse', value: formatDebugNumber(debug.coarse_score), tone: 'tone-coarse' }, | |
| 513 | - { label: 'Fine', value: formatDebugNumber(debug.fine_score), tone: 'tone-fine' }, | |
| 514 | - { label: 'Rerank', value: formatDebugNumber(debug.rerank_score), tone: 'tone-rerank' }, | |
| 515 | - { label: 'Fused', value: formatDebugNumber(debug.fused_score), tone: 'tone-final' }, | |
| 516 | + { label: 'ES', value: formatDebugNumber(esStage.score ?? debug.es_score), tone: 'tone-es' }, | |
| 517 | + { label: 'ES Norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized), tone: 'tone-neutral' }, | |
| 518 | + { label: 'Coarse', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score), tone: 'tone-coarse' }, | |
| 519 | + { label: 'Fine', value: formatDebugNumber(fineStage.score ?? debug.fine_score), tone: 'tone-fine' }, | |
| 520 | + { label: 'Rerank', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score), tone: 'tone-rerank' }, | |
| 521 | + { label: 'Fused', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score), tone: 'tone-final' }, | |
| 516 | 522 | ]); |
| 517 | 523 | |
| 518 | - const factorMetrics = renderMetricList([ | |
| 519 | - { label: 'coarse_text_factor', value: formatDebugNumber(debug.coarse_text_factor) }, | |
| 520 | - { label: 'coarse_knn_factor', value: formatDebugNumber(debug.coarse_knn_factor) }, | |
| 521 | - { label: 'text_factor', value: formatDebugNumber(debug.text_factor) }, | |
| 522 | - { label: 'knn_factor', value: formatDebugNumber(debug.knn_factor) }, | |
| 523 | - { label: 'fine_factor', value: formatDebugNumber(debug.fine_factor) }, | |
| 524 | - { label: 'rerank_factor', value: formatDebugNumber(debug.rerank_factor) }, | |
| 525 | - ]); | |
| 526 | - | |
| 527 | - const signalMetrics = renderMetricList([ | |
| 528 | - { label: 'text_score', value: formatDebugNumber(debug.text_score) }, | |
| 529 | - { label: 'text_source', value: formatDebugNumber(debug.text_source_score) }, | |
| 530 | - { label: 'text_translation', value: formatDebugNumber(debug.text_translation_score) }, | |
| 531 | - { label: 'text_primary', value: formatDebugNumber(debug.text_primary_score) }, | |
| 532 | - { label: 'text_support', value: formatDebugNumber(debug.text_support_score) }, | |
| 533 | - { label: 'knn_score', value: formatDebugNumber(debug.knn_score) }, | |
| 534 | - { label: 'text_knn', value: formatDebugNumber(debug.text_knn_score) }, | |
| 535 | - { label: 'image_knn', value: formatDebugNumber(debug.image_knn_score) }, | |
| 536 | - ]); | |
| 524 | + const stageGrid = ` | |
| 525 | + <div class="debug-stage-grid"> | |
| 526 | + ${buildStageCard('ES Recall', 'Matched queries and ES raw score', [ | |
| 527 | + { label: 'rank', value: esStage.rank ?? debug.initial_rank ?? 'N/A' }, | |
| 528 | + { label: 'es_score', value: formatDebugNumber(esStage.score ?? debug.es_score) }, | |
| 529 | + { label: 'es_norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized) }, | |
| 530 | + ], renderJsonDetails('Matched Queries', esStage.matched_queries ?? debug.matched_queries, false))} | |
| 531 | + ${buildStageCard('Coarse Rank', 'Text + vector fusion', [ | |
| 532 | + { label: 'rank', value: coarseStage.rank ?? 'N/A' }, | |
| 533 | + { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' }, | |
| 534 | + { label: 'coarse_score', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score) }, | |
| 535 | + { label: 'text_score', value: formatDebugNumber(coarseStage.text_score ?? debug.text_score) }, | |
| 536 | + { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score ?? debug.text_source_score) }, | |
| 537 | + { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score ?? debug.text_translation_score) }, | |
| 538 | + { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score ?? debug.text_primary_score) }, | |
| 539 | + { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score ?? debug.text_support_score) }, | |
| 540 | + { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score ?? debug.knn_score) }, | |
| 541 | + { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score ?? debug.text_knn_score) }, | |
| 542 | + { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score ?? debug.image_knn_score) }, | |
| 543 | + { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor ?? debug.coarse_text_factor) }, | |
| 544 | + { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor ?? debug.coarse_knn_factor) }, | |
| 545 | + ], renderJsonDetails('Coarse Signals', coarseStage.signals, false))} | |
| 546 | + ${buildStageCard('Fine Rank', 'Lightweight reranker output', [ | |
| 547 | + { label: 'rank', value: fineStage.rank ?? 'N/A' }, | |
| 548 | + { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' }, | |
| 549 | + { label: 'fine_score', value: formatDebugNumber(fineStage.score ?? debug.fine_score) }, | |
| 550 | + ], renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, false))} | |
| 551 | + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ | |
| 552 | + { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' }, | |
| 553 | + { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' }, | |
| 554 | + { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) }, | |
| 555 | + { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) }, | |
| 556 | + { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) }, | |
| 557 | + { label: 'text_source', value: formatDebugNumber(rerankStage.signals?.text_source_score ?? debug.text_source_score) }, | |
| 558 | + { label: 'text_translation', value: formatDebugNumber(rerankStage.signals?.text_translation_score ?? debug.text_translation_score) }, | |
| 559 | + { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) }, | |
| 560 | + { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) }, | |
| 561 | + { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) }, | |
| 562 | + { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) }, | |
| 563 | + { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) }, | |
| 564 | + ], renderJsonDetails('Rerank Signals', rerankStage.signals, false))} | |
| 565 | + </div> | |
| 566 | + `; | |
| 537 | 567 | |
| 538 | 568 | const titlePayload = {}; |
| 539 | 569 | if (debug.title_multilingual) titlePayload.title = debug.title_multilingual; |
| ... | ... | @@ -545,13 +575,8 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) { |
| 545 | 575 | <div class="product-debug-title">Ranking Funnel</div> |
| 546 | 576 | ${rankSummary} |
| 547 | 577 | ${stageScores} |
| 548 | - <div class="product-debug-subtitle">Fusion Factors</div> | |
| 549 | - ${factorMetrics} | |
| 550 | - <div class="product-debug-subtitle">Signal Breakdown</div> | |
| 551 | - ${signalMetrics} | |
| 552 | - ${renderJsonDetails('Rerank Input', debug.rerank_input, true)} | |
| 578 | + ${stageGrid} | |
| 553 | 579 | ${renderJsonDetails('Selected SKU', debug.style_intent_sku, true)} |
| 554 | - ${renderJsonDetails('Matched Queries', debug.matched_queries, false)} | |
| 555 | 580 | ${renderJsonDetails('Multilingual Fields', titlePayload, false)} |
| 556 | 581 | <div class="product-debug-actions"> |
| 557 | 582 | <button type="button" class="product-debug-inline-result-btn" |
| ... | ... | @@ -1019,9 +1044,10 @@ function buildGlobalFunnelHtml(data, debugInfo) { |
| 1019 | 1044 | const featureFlags = debugInfo.feature_flags || {}; |
| 1020 | 1045 | const esResponse = debugInfo.es_response || {}; |
| 1021 | 1046 | const esQueryContext = debugInfo.es_query_context || {}; |
| 1022 | - const coarseInfo = debugInfo.coarse_rank || {}; | |
| 1023 | - const fineInfo = debugInfo.fine_rank || {}; | |
| 1024 | - const rerankInfo = debugInfo.rerank || {}; | |
| 1047 | + const rankingFunnel = debugInfo.ranking_funnel || {}; | |
| 1048 | + const coarseInfo = rankingFunnel.coarse_rank || debugInfo.coarse_rank || {}; | |
| 1049 | + const fineInfo = rankingFunnel.fine_rank || debugInfo.fine_rank || {}; | |
| 1050 | + const rerankInfo = rankingFunnel.rerank || debugInfo.rerank || {}; | |
| 1025 | 1051 | const translations = queryAnalysis.translations || {}; |
| 1026 | 1052 | |
| 1027 | 1053 | const summaryHtml = ` |
| ... | ... | @@ -1060,14 +1086,20 @@ function buildGlobalFunnelHtml(data, debugInfo) { |
| 1060 | 1086 | ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')} |
| 1061 | 1087 | ${buildStageCard('Fine Rank', 'Lightweight reranker', [ |
| 1062 | 1088 | { label: 'service_url', value: fineInfo.service_url || 'N/A' }, |
| 1063 | - { label: 'docs', value: fineInfo.docs ?? fineInfo.top_n ?? 'N/A' }, | |
| 1089 | + { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' }, | |
| 1090 | + { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' }, | |
| 1064 | 1091 | { label: 'top_n', value: fineInfo.top_n ?? 'N/A' }, |
| 1092 | + { label: 'backend', value: fineInfo.backend || 'N/A' }, | |
| 1093 | + { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' }, | |
| 1065 | 1094 | { label: 'query_template', value: fineInfo.query_template || 'N/A' }, |
| 1066 | 1095 | ], fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')} |
| 1067 | 1096 | ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ |
| 1068 | 1097 | { label: 'service_url', value: rerankInfo.service_url || 'N/A' }, |
| 1069 | - { label: 'docs', value: rerankInfo.docs ?? 'N/A' }, | |
| 1098 | + { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' }, | |
| 1099 | + { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' }, | |
| 1070 | 1100 | { label: 'top_n', value: rerankInfo.top_n ?? 'N/A' }, |
| 1101 | + { label: 'backend', value: rerankInfo.backend || 'N/A' }, | |
| 1102 | + { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' }, | |
| 1071 | 1103 | { label: 'query_template', value: rerankInfo.query_template || 'N/A' }, |
| 1072 | 1104 | ], `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)} |
| 1073 | 1105 | ${buildStageCard('Page Return', 'Final slice returned to UI', [ | ... | ... |
reranker/README.md
| ... | ... | @@ -71,7 +71,7 @@ Reranker 服务提供统一的 `/rerank` API,支持可插拔后端(BGE、Jin |
| 71 | 71 | - `qwen3_transformers_packed` -> `.venv-reranker-transformers-packed` |
| 72 | 72 | - `qwen3_gguf` -> `.venv-reranker-gguf` |
| 73 | 73 | - `qwen3_gguf_06b` -> `.venv-reranker-gguf-06b` |
| 74 | -- `bge` -> `.venv-reranker-bge` | |
| 74 | +- `bge` -> `.venv-reranker` | |
| 75 | 75 | - `dashscope_rerank` -> `.venv-reranker-dashscope` |
| 76 | 76 | |
| 77 | 77 | ... | ... |
reranker/config.py
| ... | ... | @@ -2,19 +2,29 @@ |
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +import os | |
| 6 | + | |
| 5 | 7 | from config.loader import get_app_config |
| 6 | 8 | |
| 7 | 9 | |
| 8 | 10 | class RerankerConfig(object): |
| 9 | 11 | def __init__(self) -> None: |
| 10 | 12 | app_config = get_app_config() |
| 11 | - runtime = app_config.runtime | |
| 12 | 13 | service = app_config.services.rerank |
| 13 | - backend = service.get_backend_config() | |
| 14 | + instance_name = str(os.getenv("RERANK_INSTANCE") or service.default_instance).strip() or service.default_instance | |
| 15 | + instance = service.get_instance(instance_name) | |
| 16 | + backend = service.get_backend_config(instance_name) | |
| 14 | 17 | request = service.request |
| 15 | 18 | |
| 16 | - self.HOST = runtime.reranker_host | |
| 17 | - self.PORT = runtime.reranker_port | |
| 19 | + self.INSTANCE = instance_name | |
| 20 | + self.HOST = str(os.getenv("RERANKER_HOST") or instance.host) | |
| 21 | + self.PORT = int(os.getenv("RERANKER_PORT") or instance.port) | |
| 22 | + self.BACKEND = str(os.getenv("RERANK_BACKEND") or instance.backend) | |
| 23 | + self.RUNTIME_DIR = str( | |
| 24 | + os.getenv("RERANKER_RUNTIME_DIR") | |
| 25 | + or instance.runtime_dir | |
| 26 | + or f"./.runtime/reranker/{instance_name}" | |
| 27 | + ) | |
| 18 | 28 | |
| 19 | 29 | self.MODEL_NAME = str(backend.get("model_name") or "Qwen/Qwen3-Reranker-0.6B") |
| 20 | 30 | self.DEVICE = backend.get("device") | ... | ... |
reranker/server.py
| ... | ... | @@ -6,7 +6,7 @@ POST /rerank |
| 6 | 6 | Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool } |
| 7 | 7 | Response: { "scores": [float], "meta": {...} } |
| 8 | 8 | |
| 9 | -Backend selected via config: services.rerank.backend | |
| 9 | +Backend selected via config: services.rerank.instances.<name>.backend | |
| 10 | 10 | (bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND. |
| 11 | 11 | """ |
| 12 | 12 | |
| ... | ... | @@ -76,14 +76,15 @@ class RerankResponse(BaseModel): |
| 76 | 76 | @app.on_event("startup") |
| 77 | 77 | def load_model() -> None: |
| 78 | 78 | global _reranker, _backend_name |
| 79 | - logger.info("Starting reranker service on port %s", CONFIG.PORT) | |
| 79 | + logger.info("Starting reranker service | instance=%s port=%s", CONFIG.INSTANCE, CONFIG.PORT) | |
| 80 | 80 | try: |
| 81 | - backend_name, backend_cfg = get_rerank_backend_config() | |
| 81 | + backend_name, backend_cfg = get_rerank_backend_config(CONFIG.INSTANCE) | |
| 82 | 82 | _backend_name = backend_name |
| 83 | 83 | _reranker = get_rerank_backend(backend_name, backend_cfg) |
| 84 | 84 | model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name) |
| 85 | 85 | logger.info( |
| 86 | - "Reranker ready | backend=%s model=%s", | |
| 86 | + "Reranker ready | instance=%s backend=%s model=%s", | |
| 87 | + CONFIG.INSTANCE, | |
| 87 | 88 | _backend_name, |
| 88 | 89 | model_info, |
| 89 | 90 | ) |
| ... | ... | @@ -101,6 +102,7 @@ def health() -> Dict[str, Any]: |
| 101 | 102 | ).get("model_name", _backend_name) |
| 102 | 103 | payload: Dict[str, Any] = { |
| 103 | 104 | "status": "ok" if _reranker is not None else "unavailable", |
| 105 | + "instance": CONFIG.INSTANCE, | |
| 104 | 106 | "model_loaded": _reranker is not None, |
| 105 | 107 | "model": model_info, |
| 106 | 108 | "backend": _backend_name, | ... | ... |
scripts/lib/reranker_backend_env.sh
| ... | ... | @@ -45,7 +45,7 @@ reranker_backend_venv_dir() { |
| 45 | 45 | qwen3_gguf_06b) printf '%s/.venv-reranker-gguf-06b\n' "${project_root}" ;; |
| 46 | 46 | qwen3_transformers) printf '%s/.venv-reranker-transformers\n' "${project_root}" ;; |
| 47 | 47 | qwen3_transformers_packed) printf '%s/.venv-reranker-transformers-packed\n' "${project_root}" ;; |
| 48 | - bge) printf '%s/.venv-reranker-bge\n' "${project_root}" ;; | |
| 48 | + bge) printf '%s/.venv-reranker\n' "${project_root}" ;; | |
| 49 | 49 | dashscope_rerank) printf '%s/.venv-reranker-dashscope\n' "${project_root}" ;; |
| 50 | 50 | *) printf '%s/.venv-reranker-%s\n' "${project_root}" "${backend}" ;; |
| 51 | 51 | esac | ... | ... |
scripts/service_ctl.sh
| ... | ... | @@ -16,14 +16,45 @@ mkdir -p "${LOG_DIR}" |
| 16 | 16 | source "${PROJECT_ROOT}/scripts/lib/load_env.sh" |
| 17 | 17 | |
| 18 | 18 | CORE_SERVICES=("backend" "indexer" "frontend") |
| 19 | -OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker") | |
| 19 | +OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker" "reranker-fine") | |
| 20 | 20 | FULL_SERVICES=("${OPTIONAL_SERVICES[@]}" "${CORE_SERVICES[@]}") |
| 21 | -STOP_ORDER_SERVICES=("frontend" "indexer" "backend" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei") | |
| 21 | +STOP_ORDER_SERVICES=("frontend" "indexer" "backend" "reranker-fine" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei") | |
| 22 | 22 | |
| 23 | 23 | all_services() { |
| 24 | 24 | echo "${FULL_SERVICES[@]}" |
| 25 | 25 | } |
| 26 | 26 | |
| 27 | +config_python_bin() { | |
| 28 | + if [ -x "${PROJECT_ROOT}/.venv/bin/python" ]; then | |
| 29 | + echo "${PROJECT_ROOT}/.venv/bin/python" | |
| 30 | + else | |
| 31 | + echo "${PYTHON:-python3}" | |
| 32 | + fi | |
| 33 | +} | |
| 34 | + | |
| 35 | +reranker_instance_for_service() { | |
| 36 | + local service="$1" | |
| 37 | + case "${service}" in | |
| 38 | + reranker) echo "default" ;; | |
| 39 | + reranker-fine) echo "fine" ;; | |
| 40 | + *) echo "" ;; | |
| 41 | + esac | |
| 42 | +} | |
| 43 | + | |
| 44 | +get_reranker_instance_port() { | |
| 45 | + local instance="$1" | |
| 46 | + local pybin | |
| 47 | + pybin="$(config_python_bin)" | |
| 48 | + RERANK_INSTANCE="${instance}" PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${pybin}" - <<'PY' | |
| 49 | +from config.loader import get_app_config | |
| 50 | +import os | |
| 51 | + | |
| 52 | +cfg = get_app_config().services.rerank | |
| 53 | +name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance | |
| 54 | +print(cfg.get_instance(name).port) | |
| 55 | +PY | |
| 56 | +} | |
| 57 | + | |
| 27 | 58 | get_port() { |
| 28 | 59 | local service="$1" |
| 29 | 60 | case "${service}" in |
| ... | ... | @@ -33,7 +64,20 @@ get_port() { |
| 33 | 64 | embedding) echo "${EMBEDDING_TEXT_PORT:-6005}" ;; |
| 34 | 65 | embedding-image) echo "${EMBEDDING_IMAGE_PORT:-6008}" ;; |
| 35 | 66 | translator) echo "${TRANSLATION_PORT:-6006}" ;; |
| 36 | - reranker) echo "${RERANKER_PORT:-6007}" ;; | |
| 67 | + reranker) | |
| 68 | + if [ -n "${RERANKER_PORT:-}" ]; then | |
| 69 | + echo "${RERANKER_PORT}" | |
| 70 | + else | |
| 71 | + get_reranker_instance_port "default" | |
| 72 | + fi | |
| 73 | + ;; | |
| 74 | + reranker-fine) | |
| 75 | + if [ -n "${RERANKER_FINE_PORT:-}" ]; then | |
| 76 | + echo "${RERANKER_FINE_PORT}" | |
| 77 | + else | |
| 78 | + get_reranker_instance_port "fine" | |
| 79 | + fi | |
| 80 | + ;; | |
| 37 | 81 | tei) echo "${TEI_PORT:-8080}" ;; |
| 38 | 82 | cnclip) echo "${CNCLIP_PORT:-51000}" ;; |
| 39 | 83 | *) echo "" ;; |
| ... | ... | @@ -70,6 +114,7 @@ service_start_cmd() { |
| 70 | 114 | embedding-image) echo "./scripts/start_embedding_image_service.sh" ;; |
| 71 | 115 | translator) echo "./scripts/start_translator.sh" ;; |
| 72 | 116 | reranker) echo "./scripts/start_reranker.sh" ;; |
| 117 | + reranker-fine) echo "./scripts/start_reranker.sh" ;; | |
| 73 | 118 | tei) echo "./scripts/start_tei_service.sh" ;; |
| 74 | 119 | cnclip) echo "./scripts/start_cnclip_service.sh" ;; |
| 75 | 120 | *) return 1 ;; |
| ... | ... | @@ -79,7 +124,7 @@ service_start_cmd() { |
| 79 | 124 | service_exists() { |
| 80 | 125 | local service="$1" |
| 81 | 126 | case "${service}" in |
| 82 | - backend|indexer|frontend|embedding|embedding-image|translator|reranker|tei|cnclip) return 0 ;; | |
| 127 | + backend|indexer|frontend|embedding|embedding-image|translator|reranker|reranker-fine|tei|cnclip) return 0 ;; | |
| 83 | 128 | *) return 1 ;; |
| 84 | 129 | esac |
| 85 | 130 | } |
| ... | ... | @@ -97,7 +142,7 @@ validate_targets() { |
| 97 | 142 | health_path_for_service() { |
| 98 | 143 | local service="$1" |
| 99 | 144 | case "${service}" in |
| 100 | - backend|indexer|embedding|embedding-image|translator|reranker|tei) echo "/health" ;; | |
| 145 | + backend|indexer|embedding|embedding-image|translator|reranker|reranker-fine|tei) echo "/health" ;; | |
| 101 | 146 | *) echo "" ;; |
| 102 | 147 | esac |
| 103 | 148 | } |
| ... | ... | @@ -505,7 +550,7 @@ get_cnclip_flow_device() { |
| 505 | 550 | start_health_retries_for_service() { |
| 506 | 551 | local service="$1" |
| 507 | 552 | case "${service}" in |
| 508 | - reranker) echo 90 ;; | |
| 553 | + reranker|reranker-fine) echo 90 ;; | |
| 509 | 554 | *) echo 30 ;; |
| 510 | 555 | esac |
| 511 | 556 | } |
| ... | ... | @@ -593,9 +638,15 @@ start_one() { |
| 593 | 638 | return 1 |
| 594 | 639 | fi |
| 595 | 640 | ;; |
| 596 | - backend|indexer|frontend|embedding|embedding-image|translator|reranker) | |
| 641 | + backend|indexer|frontend|embedding|embedding-image|translator|reranker|reranker-fine) | |
| 597 | 642 | echo "[start] ${service}" |
| 598 | - nohup "${cmd}" >> "${lf}" 2>&1 & | |
| 643 | + local rerank_instance="" | |
| 644 | + rerank_instance="$(reranker_instance_for_service "${service}")" | |
| 645 | + if [ -n "${rerank_instance}" ]; then | |
| 646 | + nohup env RERANK_INSTANCE="${rerank_instance}" "${cmd}" >> "${lf}" 2>&1 & | |
| 647 | + else | |
| 648 | + nohup "${cmd}" >> "${lf}" 2>&1 & | |
| 649 | + fi | |
| 599 | 650 | local pid=$! |
| 600 | 651 | echo "${pid}" > "${pf}" |
| 601 | 652 | wait_for_startup_health "${service}" "${pid}" "${lf}" |
| ... | ... | @@ -673,7 +724,7 @@ stop_one() { |
| 673 | 724 | fi |
| 674 | 725 | fi |
| 675 | 726 | |
| 676 | - if [ "${service}" = "reranker" ]; then | |
| 727 | + if [[ "${service}" == reranker* ]] && ! service_is_running "reranker" && ! service_is_running "reranker-fine"; then | |
| 677 | 728 | cleanup_reranker_orphans |
| 678 | 729 | fi |
| 679 | 730 | } |
| ... | ... | @@ -871,7 +922,7 @@ Special targets: |
| 871 | 922 | |
| 872 | 923 | Examples: |
| 873 | 924 | ./scripts/service_ctl.sh up all |
| 874 | - ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker | |
| 925 | + ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker reranker-fine | |
| 875 | 926 | ./scripts/service_ctl.sh up backend indexer frontend |
| 876 | 927 | ./scripts/service_ctl.sh restart |
| 877 | 928 | ./scripts/service_ctl.sh monitor-start all | ... | ... |
scripts/start_reranker.sh
| ... | ... | @@ -14,9 +14,31 @@ load_env_file "${PROJECT_ROOT}/.env" |
| 14 | 14 | # shellcheck source=scripts/lib/reranker_backend_env.sh |
| 15 | 15 | source "${PROJECT_ROOT}/scripts/lib/reranker_backend_env.sh" |
| 16 | 16 | |
| 17 | -RERANKER_HOST="${RERANKER_HOST:-0.0.0.0}" | |
| 18 | -RERANKER_PORT="${RERANKER_PORT:-6007}" | |
| 19 | -RERANK_BACKEND="${RERANK_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}" | |
| 17 | +CONFIG_PYTHON="${PROJECT_ROOT}/.venv/bin/python" | |
| 18 | +if [[ ! -x "${CONFIG_PYTHON}" ]]; then | |
| 19 | + CONFIG_PYTHON="${PYTHON:-python3}" | |
| 20 | +fi | |
| 21 | + | |
| 22 | +RERANK_INSTANCE="${RERANK_INSTANCE:-default}" | |
| 23 | + | |
| 24 | +read -r INSTANCE_HOST INSTANCE_PORT INSTANCE_BACKEND INSTANCE_RUNTIME_DIR <<EOF | |
| 25 | +$( | |
| 26 | + PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${CONFIG_PYTHON}" - <<'PY' | |
| 27 | +from config.loader import get_app_config | |
| 28 | +import os | |
| 29 | + | |
| 30 | +cfg = get_app_config().services.rerank | |
| 31 | +name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance | |
| 32 | +instance = cfg.get_instance(name) | |
| 33 | +runtime_dir = instance.runtime_dir or f"./.runtime/reranker/{name}" | |
| 34 | +print(instance.host, instance.port, instance.backend, runtime_dir) | |
| 35 | +PY | |
| 36 | +) | |
| 37 | +EOF | |
| 38 | + | |
| 39 | +RERANKER_HOST="${RERANKER_HOST:-${INSTANCE_HOST:-0.0.0.0}}" | |
| 40 | +RERANKER_PORT="${RERANKER_PORT:-${INSTANCE_PORT:-6007}}" | |
| 41 | +RERANK_BACKEND="${RERANK_BACKEND:-${INSTANCE_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}}" | |
| 20 | 42 | RERANKER_VENV="${RERANKER_VENV:-$(reranker_backend_venv_dir "${PROJECT_ROOT}" "${RERANK_BACKEND}")}" |
| 21 | 43 | PYTHON_BIN="${RERANKER_VENV}/bin/python" |
| 22 | 44 | |
| ... | ... | @@ -27,7 +49,10 @@ if [[ ! -x "${PYTHON_BIN}" ]]; then |
| 27 | 49 | fi |
| 28 | 50 | |
| 29 | 51 | # Keep vLLM/triton/torch caches out of system disk. |
| 30 | -RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker}" | |
| 52 | +RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${INSTANCE_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker/${RERANK_INSTANCE}}}" | |
| 53 | +if [[ "${RERANKER_RUNTIME_DIR}" != /* ]]; then | |
| 54 | + RERANKER_RUNTIME_DIR="${PROJECT_ROOT}/${RERANKER_RUNTIME_DIR#./}" | |
| 55 | +fi | |
| 31 | 56 | mkdir -p "${RERANKER_RUNTIME_DIR}/home" \ |
| 32 | 57 | "${RERANKER_RUNTIME_DIR}/cache" \ |
| 33 | 58 | "${RERANKER_RUNTIME_DIR}/config" \ |
| ... | ... | @@ -102,6 +127,7 @@ fi |
| 102 | 127 | echo "========================================" |
| 103 | 128 | echo "Starting Reranker Service" |
| 104 | 129 | echo "========================================" |
| 130 | +echo "Instance: ${RERANK_INSTANCE}" | |
| 105 | 131 | echo "Python: ${PYTHON_BIN}" |
| 106 | 132 | echo "Host: ${RERANKER_HOST}" |
| 107 | 133 | echo "Port: ${RERANKER_PORT}" | ... | ... |
search/searcher.py
| ... | ... | @@ -373,6 +373,9 @@ class Searcher: |
| 373 | 373 | |
| 374 | 374 | es_score_normalization_factor: Optional[float] = None |
| 375 | 375 | initial_ranks_by_doc: Dict[str, int] = {} |
| 376 | + coarse_ranks_by_doc: Dict[str, int] = {} | |
| 377 | + fine_ranks_by_doc: Dict[str, int] = {} | |
| 378 | + rerank_ranks_by_doc: Dict[str, int] = {} | |
| 376 | 379 | coarse_debug_info: Optional[Dict[str, Any]] = None |
| 377 | 380 | fine_debug_info: Optional[Dict[str, Any]] = None |
| 378 | 381 | rerank_debug_info: Optional[Dict[str, Any]] = None |
| ... | ... | @@ -626,7 +629,7 @@ class Searcher: |
| 626 | 629 | style_intent_decisions: Dict[str, SkuSelectionDecision] = {} |
| 627 | 630 | if do_rerank and in_rerank_window: |
| 628 | 631 | from dataclasses import asdict |
| 629 | - from config.services_config import get_rerank_service_url | |
| 632 | + from config.services_config import get_rerank_backend_config, get_rerank_service_url | |
| 630 | 633 | from .rerank_client import coarse_resort_hits, run_lightweight_rerank, run_rerank |
| 631 | 634 | |
| 632 | 635 | rerank_query = parsed_query.text_for_rerank() if parsed_query else query |
| ... | ... | @@ -642,11 +645,17 @@ class Searcher: |
| 642 | 645 | hits = hits[:coarse_output_window] |
| 643 | 646 | es_response.setdefault("hits", {})["hits"] = hits |
| 644 | 647 | if debug: |
| 645 | - coarse_debug_info = { | |
| 646 | - "docs_in": es_fetch_size, | |
| 647 | - "docs_out": len(hits), | |
| 648 | - "fusion": asdict(coarse_cfg.fusion), | |
| 648 | + coarse_ranks_by_doc = { | |
| 649 | + str(hit.get("_id")): rank | |
| 650 | + for rank, hit in enumerate(hits, 1) | |
| 651 | + if hit.get("_id") is not None | |
| 649 | 652 | } |
| 653 | + if debug: | |
| 654 | + coarse_debug_info = { | |
| 655 | + "docs_in": es_fetch_size, | |
| 656 | + "docs_out": len(hits), | |
| 657 | + "fusion": asdict(coarse_cfg.fusion), | |
| 658 | + } | |
| 650 | 659 | context.store_intermediate_result("coarse_rank_scores", coarse_debug) |
| 651 | 660 | context.logger.info( |
| 652 | 661 | "粗排完成 | docs_in=%s | docs_out=%s", |
| ... | ... | @@ -717,12 +726,23 @@ class Searcher: |
| 717 | 726 | hits = hits[:fine_output_window] |
| 718 | 727 | es_response["hits"]["hits"] = hits |
| 719 | 728 | if debug: |
| 729 | + fine_ranks_by_doc = { | |
| 730 | + str(hit.get("_id")): rank | |
| 731 | + for rank, hit in enumerate(hits, 1) | |
| 732 | + if hit.get("_id") is not None | |
| 733 | + } | |
| 734 | + fine_backend_name, fine_backend_cfg = get_rerank_backend_config(fine_cfg.service_profile) | |
| 720 | 735 | fine_debug_info = { |
| 736 | + "service_profile": fine_cfg.service_profile, | |
| 721 | 737 | "service_url": get_rerank_service_url(profile=fine_cfg.service_profile), |
| 738 | + "backend": fine_backend_name, | |
| 739 | + "model": fine_meta.get("model") if isinstance(fine_meta, dict) else None, | |
| 740 | + "backend_model_name": fine_backend_cfg.get("model_name"), | |
| 722 | 741 | "query_template": fine_query_template, |
| 723 | 742 | "doc_template": fine_doc_template, |
| 724 | 743 | "query_text": str(fine_query_template).format_map({"query": rerank_query}), |
| 725 | - "docs": len(hits), | |
| 744 | + "docs_in": min(len(fine_scores), fine_input_window), | |
| 745 | + "docs_out": len(hits), | |
| 726 | 746 | "top_n": fine_output_window, |
| 727 | 747 | "meta": fine_meta, |
| 728 | 748 | } |
| ... | ... | @@ -768,12 +788,23 @@ class Searcher: |
| 768 | 788 | |
| 769 | 789 | if rerank_meta is not None: |
| 770 | 790 | if debug: |
| 791 | + rerank_ranks_by_doc = { | |
| 792 | + str(hit.get("_id")): rank | |
| 793 | + for rank, hit in enumerate(es_response.get("hits", {}).get("hits") or [], 1) | |
| 794 | + if hit.get("_id") is not None | |
| 795 | + } | |
| 796 | + rerank_backend_name, rerank_backend_cfg = get_rerank_backend_config(rc.service_profile) | |
| 771 | 797 | rerank_debug_info = { |
| 798 | + "service_profile": rc.service_profile, | |
| 772 | 799 | "service_url": get_rerank_service_url(profile=rc.service_profile), |
| 800 | + "backend": rerank_backend_name, | |
| 801 | + "model": rerank_meta.get("model") if isinstance(rerank_meta, dict) else None, | |
| 802 | + "backend_model_name": rerank_backend_cfg.get("model_name"), | |
| 773 | 803 | "query_template": effective_query_template, |
| 774 | 804 | "doc_template": effective_doc_template, |
| 775 | 805 | "query_text": str(effective_query_template).format_map({"query": rerank_query}), |
| 776 | - "docs": len(es_response.get("hits", {}).get("hits") or []), | |
| 806 | + "docs_in": len(final_input), | |
| 807 | + "docs_out": len(es_response.get("hits", {}).get("hits") or []), | |
| 777 | 808 | "top_n": from_ + size, |
| 778 | 809 | "meta": rerank_meta, |
| 779 | 810 | "fusion": asdict(rc.fusion), |
| ... | ... | @@ -1019,6 +1050,60 @@ class Searcher: |
| 1019 | 1050 | debug_entry["fine_score"] = fine_debug.get("fine_score") |
| 1020 | 1051 | debug_entry["rerank_input"] = fine_debug.get("rerank_input") |
| 1021 | 1052 | |
| 1053 | + initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | |
| 1054 | + coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | |
| 1055 | + fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | |
| 1056 | + rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | |
| 1057 | + final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | |
| 1058 | + | |
| 1059 | + def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: | |
| 1060 | + if previous_rank is None or current_rank is None: | |
| 1061 | + return None | |
| 1062 | + return previous_rank - current_rank | |
| 1063 | + | |
| 1064 | + debug_entry["ranking_funnel"] = { | |
| 1065 | + "es_recall": { | |
| 1066 | + "rank": initial_rank, | |
| 1067 | + "score": es_score, | |
| 1068 | + "normalized_score": normalized, | |
| 1069 | + "matched_queries": hit.get("matched_queries"), | |
| 1070 | + }, | |
| 1071 | + "coarse_rank": { | |
| 1072 | + "rank": coarse_rank, | |
| 1073 | + "rank_change": _rank_change(initial_rank, coarse_rank), | |
| 1074 | + "score": coarse_debug.get("coarse_score") if coarse_debug else None, | |
| 1075 | + "text_score": coarse_debug.get("text_score") if coarse_debug else None, | |
| 1076 | + "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, | |
| 1077 | + "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None, | |
| 1078 | + "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None, | |
| 1079 | + "signals": coarse_debug, | |
| 1080 | + }, | |
| 1081 | + "fine_rank": { | |
| 1082 | + "rank": fine_rank, | |
| 1083 | + "rank_change": _rank_change(coarse_rank, fine_rank), | |
| 1084 | + "score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), | |
| 1085 | + "rerank_input": fine_debug.get("rerank_input") if fine_debug else None, | |
| 1086 | + }, | |
| 1087 | + "rerank": { | |
| 1088 | + "rank": rerank_rank, | |
| 1089 | + "rank_change": _rank_change(fine_rank, rerank_rank), | |
| 1090 | + "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), | |
| 1091 | + "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), | |
| 1092 | + "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), | |
| 1093 | + "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"), | |
| 1094 | + "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"), | |
| 1095 | + "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None, | |
| 1096 | + "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None, | |
| 1097 | + "text_factor": rerank_debug.get("text_factor") if rerank_debug else None, | |
| 1098 | + "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None, | |
| 1099 | + "signals": rerank_debug, | |
| 1100 | + }, | |
| 1101 | + "final_page": { | |
| 1102 | + "rank": final_rank, | |
| 1103 | + "rank_change": _rank_change(rerank_rank, final_rank), | |
| 1104 | + }, | |
| 1105 | + } | |
| 1106 | + | |
| 1022 | 1107 | if style_intent_debug: |
| 1023 | 1108 | debug_entry["style_intent_sku"] = style_intent_debug |
| 1024 | 1109 | |
| ... | ... | @@ -1091,6 +1176,15 @@ class Searcher: |
| 1091 | 1176 | "coarse_rank": coarse_debug_info, |
| 1092 | 1177 | "fine_rank": fine_debug_info, |
| 1093 | 1178 | "rerank": rerank_debug_info, |
| 1179 | + "ranking_funnel": { | |
| 1180 | + "es_recall": { | |
| 1181 | + "docs_out": es_fetch_size, | |
| 1182 | + "score_normalization_factor": es_score_normalization_factor, | |
| 1183 | + }, | |
| 1184 | + "coarse_rank": coarse_debug_info, | |
| 1185 | + "fine_rank": fine_debug_info, | |
| 1186 | + "rerank": rerank_debug_info, | |
| 1187 | + }, | |
| 1094 | 1188 | "feature_flags": context.metadata.get('feature_flags', {}), |
| 1095 | 1189 | "stage_timings": { |
| 1096 | 1190 | k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() | ... | ... |
tests/test_search_rerank_window.py
| ... | ... | @@ -299,6 +299,73 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): |
| 299 | 299 | assert loaded.rerank.enabled is True |
| 300 | 300 | |
| 301 | 301 | |
| 302 | +def test_config_loader_parses_named_rerank_instances(tmp_path: Path): | |
| 303 | + from config.loader import AppConfigLoader | |
| 304 | + | |
| 305 | + config_data = { | |
| 306 | + "es_index_name": "test_products", | |
| 307 | + "field_boosts": {"title.en": 3.0}, | |
| 308 | + "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}], | |
| 309 | + "query_config": {"supported_languages": ["en"], "default_language": "en"}, | |
| 310 | + "services": { | |
| 311 | + "translation": { | |
| 312 | + "service_url": "http://localhost:6005", | |
| 313 | + "timeout_sec": 3.0, | |
| 314 | + "default_model": "dummy-model", | |
| 315 | + "default_scene": "general", | |
| 316 | + "cache": {"ttl_seconds": 60, "sliding_expiration": True}, | |
| 317 | + "capabilities": { | |
| 318 | + "dummy-model": { | |
| 319 | + "enabled": True, | |
| 320 | + "backend": "llm", | |
| 321 | + "model": "dummy-model", | |
| 322 | + "base_url": "http://localhost:6005/v1", | |
| 323 | + "timeout_sec": 3.0, | |
| 324 | + "use_cache": True, | |
| 325 | + } | |
| 326 | + }, | |
| 327 | + }, | |
| 328 | + "embedding": { | |
| 329 | + "provider": "http", | |
| 330 | + "providers": {"http": {"text_base_url": "http://localhost:6005", "image_base_url": "http://localhost:6008"}}, | |
| 331 | + "backend": "tei", | |
| 332 | + "backends": {"tei": {"base_url": "http://localhost:8080", "model_id": "dummy-embedding-model"}}, | |
| 333 | + }, | |
| 334 | + "rerank": { | |
| 335 | + "provider": "http", | |
| 336 | + "providers": { | |
| 337 | + "http": { | |
| 338 | + "instances": { | |
| 339 | + "default": {"service_url": "http://localhost:6007/rerank"}, | |
| 340 | + "fine": {"service_url": "http://localhost:6009/rerank"}, | |
| 341 | + } | |
| 342 | + } | |
| 343 | + }, | |
| 344 | + "default_instance": "default", | |
| 345 | + "instances": { | |
| 346 | + "default": {"port": 6007, "backend": "qwen3_vllm_score"}, | |
| 347 | + "fine": {"port": 6009, "backend": "bge"}, | |
| 348 | + }, | |
| 349 | + "backends": { | |
| 350 | + "bge": {"model_name": "BAAI/bge-reranker-v2-m3"}, | |
| 351 | + "qwen3_vllm_score": {"model_name": "Qwen/Qwen3-Reranker-0.6B"}, | |
| 352 | + }, | |
| 353 | + }, | |
| 354 | + }, | |
| 355 | + "spu_config": {"enabled": False}, | |
| 356 | + "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []}, | |
| 357 | + } | |
| 358 | + config_path = tmp_path / "config.yaml" | |
| 359 | + config_path.write_text(yaml.safe_dump(config_data), encoding="utf-8") | |
| 360 | + | |
| 361 | + loader = AppConfigLoader(config_file=config_path) | |
| 362 | + loaded = loader.load(validate=False) | |
| 363 | + | |
| 364 | + assert loaded.services.rerank.default_instance == "default" | |
| 365 | + assert loaded.services.rerank.get_instance("fine").port == 6009 | |
| 366 | + assert loaded.services.rerank.get_instance("fine").backend == "bge" | |
| 367 | + | |
| 368 | + | |
| 302 | 369 | def test_searcher_reranks_top_window_by_default(monkeypatch): |
| 303 | 370 | es_client = _FakeESClient() |
| 304 | 371 | searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client) |
| ... | ... | @@ -354,6 +421,82 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): |
| 354 | 421 | assert result.results[0].brief == "brief-20" |
| 355 | 422 | |
| 356 | 423 | |
| 424 | +def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch): | |
| 425 | + es_client = _FakeESClient(total_hits=120) | |
| 426 | + searcher = _build_searcher(_build_search_config(rerank_enabled=True, rerank_window=20), es_client) | |
| 427 | + context = create_request_context(reqid="t-debug", uid="u-debug") | |
| 428 | + | |
| 429 | + monkeypatch.setattr( | |
| 430 | + "search.searcher.get_tenant_config_loader", | |
| 431 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}), | |
| 432 | + ) | |
| 433 | + | |
| 434 | + def _fake_run_lightweight_rerank(**kwargs): | |
| 435 | + hits = kwargs["es_hits"] | |
| 436 | + scores = [] | |
| 437 | + debug_rows = [] | |
| 438 | + for idx, hit in enumerate(hits): | |
| 439 | + score = float(len(hits) - idx) | |
| 440 | + hit["_fine_score"] = score | |
| 441 | + scores.append(score) | |
| 442 | + debug_rows.append( | |
| 443 | + { | |
| 444 | + "doc_id": hit["_id"], | |
| 445 | + "fine_score": score, | |
| 446 | + "rerank_input": {"doc_preview": f"product-{hit['_id']}"}, | |
| 447 | + } | |
| 448 | + ) | |
| 449 | + hits.sort(key=lambda item: item["_fine_score"], reverse=True) | |
| 450 | + return scores, {"model": "fine-bge"}, debug_rows | |
| 451 | + | |
| 452 | + def _fake_run_rerank(**kwargs): | |
| 453 | + hits = kwargs["es_response"]["hits"]["hits"] | |
| 454 | + fused_debug = [] | |
| 455 | + for idx, hit in enumerate(hits): | |
| 456 | + hit["_rerank_score"] = 10.0 - idx | |
| 457 | + hit["_fused_score"] = 100.0 - idx | |
| 458 | + hit["_text_score"] = hit.get("_score", 0.0) | |
| 459 | + hit["_knn_score"] = 0.0 | |
| 460 | + fused_debug.append( | |
| 461 | + { | |
| 462 | + "doc_id": hit["_id"], | |
| 463 | + "rerank_score": hit["_rerank_score"], | |
| 464 | + "fine_score": hit.get("_fine_score"), | |
| 465 | + "text_score": hit["_text_score"], | |
| 466 | + "knn_score": 0.0, | |
| 467 | + "rerank_factor": 1.0, | |
| 468 | + "fine_factor": 1.0, | |
| 469 | + "text_factor": 1.0, | |
| 470 | + "knn_factor": 1.0, | |
| 471 | + "fused_score": hit["_fused_score"], | |
| 472 | + "matched_queries": {}, | |
| 473 | + "rerank_input": {"doc_preview": f"product-{hit['_id']}"}, | |
| 474 | + } | |
| 475 | + ) | |
| 476 | + return kwargs["es_response"], {"model": "final-reranker"}, fused_debug | |
| 477 | + | |
| 478 | + monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank) | |
| 479 | + monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank) | |
| 480 | + | |
| 481 | + result = searcher.search( | |
| 482 | + query="toy", | |
| 483 | + tenant_id="162", | |
| 484 | + from_=0, | |
| 485 | + size=5, | |
| 486 | + context=context, | |
| 487 | + enable_rerank=True, | |
| 488 | + debug=True, | |
| 489 | + ) | |
| 490 | + | |
| 491 | + assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80 | |
| 492 | + assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20 | |
| 493 | + first = result.debug_info["per_result"][0]["ranking_funnel"] | |
| 494 | + assert first["es_recall"]["rank"] is not None | |
| 495 | + assert first["coarse_rank"]["score"] is not None | |
| 496 | + assert first["fine_rank"]["score"] is not None | |
| 497 | + assert first["rerank"]["rerank_score"] is not None | |
| 498 | + | |
| 499 | + | |
| 357 | 500 | def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch): |
| 358 | 501 | es_client = _FakeESClient() |
| 359 | 502 | searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client) | ... | ... |