Compare View

switch
from
...
to
 
Commits (5)
config/__init__.py
@@ -4,6 +4,9 @@ from config.config_loader import ConfigLoader, ConfigurationError @@ -4,6 +4,9 @@ from config.config_loader import ConfigLoader, ConfigurationError
4 from config.loader import AppConfigLoader, get_app_config, reload_app_config 4 from config.loader import AppConfigLoader, get_app_config, reload_app_config
5 from config.schema import ( 5 from config.schema import (
6 AppConfig, 6 AppConfig,
  7 + CoarseRankConfig,
  8 + CoarseRankFusionConfig,
  9 + FineRankConfig,
7 FunctionScoreConfig, 10 FunctionScoreConfig,
8 IndexConfig, 11 IndexConfig,
9 QueryConfig, 12 QueryConfig,
@@ -31,8 +34,11 @@ from config.utils import get_domain_fields, get_match_fields_for_index @@ -31,8 +34,11 @@ from config.utils import get_domain_fields, get_match_fields_for_index
31 __all__ = [ 34 __all__ = [
32 "AppConfig", 35 "AppConfig",
33 "AppConfigLoader", 36 "AppConfigLoader",
  37 + "CoarseRankConfig",
  38 + "CoarseRankFusionConfig",
34 "ConfigLoader", 39 "ConfigLoader",
35 "ConfigurationError", 40 "ConfigurationError",
  41 + "FineRankConfig",
36 "FunctionScoreConfig", 42 "FunctionScoreConfig",
37 "IndexConfig", 43 "IndexConfig",
38 "QueryConfig", 44 "QueryConfig",
config/config.yaml
@@ -100,16 +100,19 @@ query_config: @@ -100,16 +100,19 @@ query_config:
100 100
101 # 查询翻译模型(须与 services.translation.capabilities 中某项一致) 101 # 查询翻译模型(须与 services.translation.capabilities 中某项一致)
102 # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。 102 # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。
103 - # zh_to_en_model: "opus-mt-zh-en"  
104 - # en_to_zh_model: "opus-mt-en-zh"  
105 - # default_translation_model: "nllb-200-distilled-600m"  
106 - zh_to_en_model: "deepl"  
107 - en_to_zh_model: "deepl"  
108 - default_translation_model: "deepl" 103 + zh_to_en_model: "nllb-200-distilled-600m" # "opus-mt-zh-en"
  104 + en_to_zh_model: "nllb-200-distilled-600m" # "opus-mt-en-zh"
  105 + default_translation_model: "nllb-200-distilled-600m"
  106 + # zh_to_en_model: "deepl"
  107 + # en_to_zh_model: "deepl"
  108 + # default_translation_model: "deepl"
109 # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同) 109 # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同)
110 - zh_to_en_model__source_not_in_index: "deepl"  
111 - en_to_zh_model__source_not_in_index: "deepl"  
112 - default_translation_model__source_not_in_index: "deepl" 110 + zh_to_en_model__source_not_in_index: "nllb-200-distilled-600m"
  111 + en_to_zh_model__source_not_in_index: "nllb-200-distilled-600m"
  112 + default_translation_model__source_not_in_index: "nllb-200-distilled-600m"
  113 + # zh_to_en_model__source_not_in_index: "deepl"
  114 + # en_to_zh_model__source_not_in_index: "deepl"
  115 + # default_translation_model__source_not_in_index: "deepl"
113 116
114 # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。 117 # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。
115 # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。 118 # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。
@@ -153,8 +156,8 @@ query_config: @@ -153,8 +156,8 @@ query_config:
153 156
154 # 统一文本召回策略(主查询 + 翻译查询) 157 # 统一文本召回策略(主查询 + 翻译查询)
155 text_query_strategy: 158 text_query_strategy:
156 - base_minimum_should_match: "75%"  
157 - translation_minimum_should_match: "75%" 159 + base_minimum_should_match: "60%"
  160 + translation_minimum_should_match: "60%"
158 translation_boost: 0.75 161 translation_boost: 0.75
159 tie_breaker_base_query: 0.5 162 tie_breaker_base_query: 0.5
160 best_fields_boost: 2.0 163 best_fields_boost: 2.0
@@ -206,8 +209,18 @@ query_config: @@ -206,8 +209,18 @@ query_config:
206 - specifications 209 - specifications
207 - skus 210 - skus
208 211
209 - # KNN boost配置(向量召回的boost值)  
210 - knn_boost: 2.0 # Lower boost for embedding recall 212 + # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates)
  213 + knn_text_boost: 4
  214 + knn_image_boost: 4
  215 +
  216 + knn_text_k: 150
  217 + knn_text_num_candidates: 400
  218 +
  219 + knn_text_k_long: 300
  220 + knn_text_num_candidates_long: 720
  221 +
  222 + knn_image_k: 300
  223 + knn_image_num_candidates: 720
211 224
212 # Function Score配置(ES层打分规则) 225 # Function Score配置(ES层打分规则)
213 function_score: 226 function_score:
@@ -215,15 +228,40 @@ function_score: @@ -215,15 +228,40 @@ function_score:
215 boost_mode: "multiply" 228 boost_mode: "multiply"
216 functions: [] 229 functions: []
217 230
  231 +# 粗排配置(仅融合 ES 文本/向量信号,不调用模型)
  232 +coarse_rank:
  233 + enabled: true
  234 + input_window: 700
  235 + output_window: 240
  236 + fusion:
  237 + text_bias: 0.1
  238 + text_exponent: 0.35
  239 + knn_text_weight: 1.0
  240 + knn_image_weight: 1.0
  241 + knn_tie_breaker: 0.1
  242 + knn_bias: 0.6
  243 + knn_exponent: 0.0
  244 +
  245 +# 精排配置(轻量 reranker)
  246 +fine_rank:
  247 + enabled: true
  248 + input_window: 240
  249 + output_window: 80
  250 + timeout_sec: 10.0
  251 + rerank_query_template: "{query}"
  252 + rerank_doc_template: "{title}"
  253 + service_profile: "fine"
  254 +
218 # 重排配置(provider/URL 在 services.rerank) 255 # 重排配置(provider/URL 在 services.rerank)
219 rerank: 256 rerank:
220 enabled: true 257 enabled: true
221 - rerank_window: 400 258 + rerank_window: 80
222 timeout_sec: 15.0 259 timeout_sec: 15.0
223 weight_es: 0.4 260 weight_es: 0.4
224 weight_ai: 0.6 261 weight_ai: 0.6
225 rerank_query_template: "{query}" 262 rerank_query_template: "{query}"
226 rerank_doc_template: "{title}" 263 rerank_doc_template: "{title}"
  264 + service_profile: "default"
227 # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项) 265 # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项)
228 # 其中 knn_score 先做一层 dis_max: 266 # 其中 knn_score 先做一层 dis_max:
229 # max(knn_text_weight * text_knn, knn_image_weight * image_knn) 267 # max(knn_text_weight * text_knn, knn_image_weight * image_knn)
@@ -231,13 +269,15 @@ rerank: @@ -231,13 +269,15 @@ rerank:
231 fusion: 269 fusion:
232 rerank_bias: 0.00001 270 rerank_bias: 0.00001
233 rerank_exponent: 1.0 271 rerank_exponent: 1.0
  272 + fine_bias: 0.00001
  273 + fine_exponent: 1.0
234 text_bias: 0.1 274 text_bias: 0.1
235 text_exponent: 0.35 275 text_exponent: 0.35
236 knn_text_weight: 1.0 276 knn_text_weight: 1.0
237 knn_image_weight: 1.0 277 knn_image_weight: 1.0
238 knn_tie_breaker: 0.1 278 knn_tie_breaker: 0.1
239 knn_bias: 0.6 279 knn_bias: 0.6
240 - knn_exponent: 0.2 280 + knn_exponent: 0.0
241 281
242 # 可扩展服务/provider 注册表(单一配置源) 282 # 可扩展服务/provider 注册表(单一配置源)
243 services: 283 services:
@@ -381,16 +421,31 @@ services: @@ -381,16 +421,31 @@ services:
381 normalize_embeddings: true 421 normalize_embeddings: true
382 rerank: 422 rerank:
383 provider: "http" 423 provider: "http"
384 - base_url: "http://127.0.0.1:6007"  
385 providers: 424 providers:
386 http: 425 http:
387 - base_url: "http://127.0.0.1:6007"  
388 - service_url: "http://127.0.0.1:6007/rerank" 426 + instances:
  427 + default:
  428 + base_url: "http://127.0.0.1:6007"
  429 + service_url: "http://127.0.0.1:6007/rerank"
  430 + fine:
  431 + base_url: "http://127.0.0.1:6009"
  432 + service_url: "http://127.0.0.1:6009/rerank"
389 request: 433 request:
390 max_docs: 1000 434 max_docs: 1000
391 normalize: true 435 normalize: true
392 - # 服务内后端(reranker 进程启动时读取)  
393 - backend: "qwen3_vllm_score" # bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank 436 + default_instance: "default"
  437 + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
  438 + instances:
  439 + default:
  440 + host: "0.0.0.0"
  441 + port: 6007
  442 + backend: "qwen3_vllm_score"
  443 + runtime_dir: "./.runtime/reranker/default"
  444 + fine:
  445 + host: "0.0.0.0"
  446 + port: 6009
  447 + backend: "bge"
  448 + runtime_dir: "./.runtime/reranker/fine"
394 backends: 449 backends:
395 bge: 450 bge:
396 model_name: "BAAI/bge-reranker-v2-m3" 451 model_name: "BAAI/bge-reranker-v2-m3"
config/env_config.py
@@ -10,6 +10,7 @@ from __future__ import annotations @@ -10,6 +10,7 @@ from __future__ import annotations
10 from typing import Any, Dict 10 from typing import Any, Dict
11 11
12 from config.loader import get_app_config 12 from config.loader import get_app_config
  13 +from config.services_config import get_rerank_service_url
13 14
14 15
15 def _app(): 16 def _app():
@@ -92,10 +93,7 @@ INDEXER_BASE_URL = ( @@ -92,10 +93,7 @@ INDEXER_BASE_URL = (
92 ) 93 )
93 EMBEDDING_TEXT_SERVICE_URL = _app().services.embedding.get_provider_config().get("text_base_url") 94 EMBEDDING_TEXT_SERVICE_URL = _app().services.embedding.get_provider_config().get("text_base_url")
94 EMBEDDING_IMAGE_SERVICE_URL = _app().services.embedding.get_provider_config().get("image_base_url") 95 EMBEDDING_IMAGE_SERVICE_URL = _app().services.embedding.get_provider_config().get("image_base_url")
95 -RERANKER_SERVICE_URL = (  
96 - _app().services.rerank.get_provider_config().get("service_url")  
97 - or _app().services.rerank.get_provider_config().get("base_url")  
98 -) 96 +RERANKER_SERVICE_URL = get_rerank_service_url()
99 97
100 98
101 def get_es_config() -> Dict[str, Any]: 99 def get_es_config() -> Dict[str, Any]:
@@ -27,10 +27,13 @@ except Exception: # pragma: no cover @@ -27,10 +27,13 @@ except Exception: # pragma: no cover
27 from config.schema import ( 27 from config.schema import (
28 AppConfig, 28 AppConfig,
29 AssetsConfig, 29 AssetsConfig,
  30 + CoarseRankConfig,
  31 + CoarseRankFusionConfig,
30 ConfigMetadata, 32 ConfigMetadata,
31 DatabaseSettings, 33 DatabaseSettings,
32 ElasticsearchSettings, 34 ElasticsearchSettings,
33 EmbeddingServiceConfig, 35 EmbeddingServiceConfig,
  36 + FineRankConfig,
34 FunctionScoreConfig, 37 FunctionScoreConfig,
35 IndexConfig, 38 IndexConfig,
36 InfrastructureConfig, 39 InfrastructureConfig,
@@ -40,6 +43,7 @@ from config.schema import ( @@ -40,6 +43,7 @@ from config.schema import (
40 RerankConfig, 43 RerankConfig,
41 RerankFusionConfig, 44 RerankFusionConfig,
42 RerankServiceConfig, 45 RerankServiceConfig,
  46 + RerankServiceInstanceConfig,
43 RuntimeConfig, 47 RuntimeConfig,
44 SearchConfig, 48 SearchConfig,
45 SecretsConfig, 49 SecretsConfig,
@@ -376,7 +380,20 @@ class AppConfigLoader: @@ -376,7 +380,20 @@ class AppConfigLoader:
376 text_embedding_field=query_cfg.get("text_embedding_field"), 380 text_embedding_field=query_cfg.get("text_embedding_field"),
377 image_embedding_field=query_cfg.get("image_embedding_field"), 381 image_embedding_field=query_cfg.get("image_embedding_field"),
378 source_fields=query_cfg.get("source_fields"), 382 source_fields=query_cfg.get("source_fields"),
379 - knn_boost=float(query_cfg.get("knn_boost", 0.25)), 383 + knn_text_boost=float(
  384 + query_cfg.get("knn_text_boost", query_cfg.get("knn_boost", 0.25))
  385 + ),
  386 + knn_image_boost=float(
  387 + query_cfg.get("knn_image_boost", query_cfg.get("knn_boost", 0.25))
  388 + ),
  389 + knn_text_k=int(query_cfg.get("knn_text_k", 120)),
  390 + knn_text_num_candidates=int(query_cfg.get("knn_text_num_candidates", 400)),
  391 + knn_text_k_long=int(query_cfg.get("knn_text_k_long", 160)),
  392 + knn_text_num_candidates_long=int(
  393 + query_cfg.get("knn_text_num_candidates_long", 500)
  394 + ),
  395 + knn_image_k=int(query_cfg.get("knn_image_k", 120)),
  396 + knn_image_num_candidates=int(query_cfg.get("knn_image_num_candidates", 400)),
380 multilingual_fields=list( 397 multilingual_fields=list(
381 search_fields.get( 398 search_fields.get(
382 "multilingual_fields", 399 "multilingual_fields",
@@ -451,6 +468,11 @@ class AppConfigLoader: @@ -451,6 +468,11 @@ class AppConfigLoader:
451 ) 468 )
452 469
453 function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {} 470 function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {}
  471 + coarse_rank_cfg = raw.get("coarse_rank") if isinstance(raw.get("coarse_rank"), dict) else {}
  472 + coarse_fusion_raw = (
  473 + coarse_rank_cfg.get("fusion") if isinstance(coarse_rank_cfg.get("fusion"), dict) else {}
  474 + )
  475 + fine_rank_cfg = raw.get("fine_rank") if isinstance(raw.get("fine_rank"), dict) else {}
454 rerank_cfg = raw.get("rerank") if isinstance(raw.get("rerank"), dict) else {} 476 rerank_cfg = raw.get("rerank") if isinstance(raw.get("rerank"), dict) else {}
455 fusion_raw = rerank_cfg.get("fusion") if isinstance(rerank_cfg.get("fusion"), dict) else {} 477 fusion_raw = rerank_cfg.get("fusion") if isinstance(rerank_cfg.get("fusion"), dict) else {}
456 spu_cfg = raw.get("spu_config") if isinstance(raw.get("spu_config"), dict) else {} 478 spu_cfg = raw.get("spu_config") if isinstance(raw.get("spu_config"), dict) else {}
@@ -464,6 +486,33 @@ class AppConfigLoader: @@ -464,6 +486,33 @@ class AppConfigLoader:
464 boost_mode=str(function_score_cfg.get("boost_mode") or "multiply"), 486 boost_mode=str(function_score_cfg.get("boost_mode") or "multiply"),
465 functions=list(function_score_cfg.get("functions") or []), 487 functions=list(function_score_cfg.get("functions") or []),
466 ), 488 ),
  489 + coarse_rank=CoarseRankConfig(
  490 + enabled=bool(coarse_rank_cfg.get("enabled", True)),
  491 + input_window=int(coarse_rank_cfg.get("input_window", 700)),
  492 + output_window=int(coarse_rank_cfg.get("output_window", 240)),
  493 + fusion=CoarseRankFusionConfig(
  494 + text_bias=float(coarse_fusion_raw.get("text_bias", 0.1)),
  495 + text_exponent=float(coarse_fusion_raw.get("text_exponent", 0.35)),
  496 + knn_text_weight=float(coarse_fusion_raw.get("knn_text_weight", 1.0)),
  497 + knn_image_weight=float(coarse_fusion_raw.get("knn_image_weight", 1.0)),
  498 + knn_tie_breaker=float(coarse_fusion_raw.get("knn_tie_breaker", 0.0)),
  499 + knn_bias=float(coarse_fusion_raw.get("knn_bias", 0.6)),
  500 + knn_exponent=float(coarse_fusion_raw.get("knn_exponent", 0.2)),
  501 + ),
  502 + ),
  503 + fine_rank=FineRankConfig(
  504 + enabled=bool(fine_rank_cfg.get("enabled", True)),
  505 + input_window=int(fine_rank_cfg.get("input_window", 240)),
  506 + output_window=int(fine_rank_cfg.get("output_window", 80)),
  507 + timeout_sec=float(fine_rank_cfg.get("timeout_sec", 10.0)),
  508 + rerank_query_template=str(fine_rank_cfg.get("rerank_query_template") or "{query}"),
  509 + rerank_doc_template=str(fine_rank_cfg.get("rerank_doc_template") or "{title}"),
  510 + service_profile=(
  511 + str(v)
  512 + if (v := fine_rank_cfg.get("service_profile")) not in (None, "")
  513 + else "fine"
  514 + ),
  515 + ),
467 rerank=RerankConfig( 516 rerank=RerankConfig(
468 enabled=bool(rerank_cfg.get("enabled", True)), 517 enabled=bool(rerank_cfg.get("enabled", True)),
469 rerank_window=int(rerank_cfg.get("rerank_window", 384)), 518 rerank_window=int(rerank_cfg.get("rerank_window", 384)),
@@ -472,6 +521,11 @@ class AppConfigLoader: @@ -472,6 +521,11 @@ class AppConfigLoader:
472 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)), 521 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)),
473 rerank_query_template=str(rerank_cfg.get("rerank_query_template") or "{query}"), 522 rerank_query_template=str(rerank_cfg.get("rerank_query_template") or "{query}"),
474 rerank_doc_template=str(rerank_cfg.get("rerank_doc_template") or "{title}"), 523 rerank_doc_template=str(rerank_cfg.get("rerank_doc_template") or "{title}"),
  524 + service_profile=(
  525 + str(v)
  526 + if (v := rerank_cfg.get("service_profile")) not in (None, "")
  527 + else None
  528 + ),
475 fusion=RerankFusionConfig( 529 fusion=RerankFusionConfig(
476 rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)), 530 rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)),
477 rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)), 531 rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)),
@@ -482,6 +536,8 @@ class AppConfigLoader: @@ -482,6 +536,8 @@ class AppConfigLoader:
482 knn_tie_breaker=float(fusion_raw.get("knn_tie_breaker", 0.0)), 536 knn_tie_breaker=float(fusion_raw.get("knn_tie_breaker", 0.0)),
483 knn_bias=float(fusion_raw.get("knn_bias", 0.6)), 537 knn_bias=float(fusion_raw.get("knn_bias", 0.6)),
484 knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)), 538 knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)),
  539 + fine_bias=float(fusion_raw.get("fine_bias", 0.00001)),
  540 + fine_exponent=float(fusion_raw.get("fine_exponent", 1.0)),
485 ), 541 ),
486 ), 542 ),
487 spu_config=SPUConfig( 543 spu_config=SPUConfig(
@@ -560,13 +616,61 @@ class AppConfigLoader: @@ -560,13 +616,61 @@ class AppConfigLoader:
560 rerank_providers = dict(rerank_raw.get("providers") or {}) 616 rerank_providers = dict(rerank_raw.get("providers") or {})
561 if rerank_provider not in rerank_providers: 617 if rerank_provider not in rerank_providers:
562 raise ConfigurationError(f"services.rerank.providers.{rerank_provider} must be configured") 618 raise ConfigurationError(f"services.rerank.providers.{rerank_provider} must be configured")
563 - rerank_backend = str(rerank_raw.get("backend") or "").strip().lower()  
564 rerank_backends = { 619 rerank_backends = {
565 str(key).strip().lower(): dict(value) 620 str(key).strip().lower(): dict(value)
566 for key, value in dict(rerank_raw.get("backends") or {}).items() 621 for key, value in dict(rerank_raw.get("backends") or {}).items()
567 } 622 }
568 - if rerank_backend not in rerank_backends:  
569 - raise ConfigurationError(f"services.rerank.backends.{rerank_backend} must be configured") 623 + default_instance = str(rerank_raw.get("default_instance") or "default").strip() or "default"
  624 + raw_instances = rerank_raw.get("instances") if isinstance(rerank_raw.get("instances"), dict) else {}
  625 + if not raw_instances:
  626 + legacy_backend = str(rerank_raw.get("backend") or "").strip().lower()
  627 + if legacy_backend not in rerank_backends:
  628 + raise ConfigurationError(f"services.rerank.backends.{legacy_backend} must be configured")
  629 + provider_cfg = dict(rerank_providers.get(rerank_provider) or {})
  630 + raw_instances = {
  631 + default_instance: {
  632 + "host": "0.0.0.0",
  633 + "port": 6007,
  634 + "backend": legacy_backend,
  635 + "base_url": provider_cfg.get("base_url"),
  636 + "service_url": provider_cfg.get("service_url"),
  637 + }
  638 + }
  639 + rerank_instances = {}
  640 + for instance_name, instance_raw in raw_instances.items():
  641 + if not isinstance(instance_raw, dict):
  642 + raise ConfigurationError(f"services.rerank.instances.{instance_name} must be a mapping")
  643 + normalized_instance_name = str(instance_name).strip()
  644 + backend_name = str(instance_raw.get("backend") or "").strip().lower()
  645 + if backend_name not in rerank_backends:
  646 + raise ConfigurationError(
  647 + f"services.rerank.instances.{normalized_instance_name}.backend must reference configured services.rerank.backends"
  648 + )
  649 + port = int(instance_raw.get("port", 6007))
  650 + rerank_instances[normalized_instance_name] = RerankServiceInstanceConfig(
  651 + host=str(instance_raw.get("host") or "0.0.0.0"),
  652 + port=port,
  653 + backend=backend_name,
  654 + runtime_dir=(
  655 + str(v)
  656 + if (v := instance_raw.get("runtime_dir")) not in (None, "")
  657 + else None
  658 + ),
  659 + base_url=(
  660 + str(v).rstrip("/")
  661 + if (v := instance_raw.get("base_url")) not in (None, "")
  662 + else None
  663 + ),
  664 + service_url=(
  665 + str(v).rstrip("/")
  666 + if (v := instance_raw.get("service_url")) not in (None, "")
  667 + else None
  668 + ),
  669 + )
  670 + if default_instance not in rerank_instances:
  671 + raise ConfigurationError(
  672 + f"services.rerank.default_instance={default_instance!r} must exist in services.rerank.instances"
  673 + )
570 rerank_request = dict(rerank_raw.get("request") or {}) 674 rerank_request = dict(rerank_raw.get("request") or {})
571 rerank_request.setdefault("max_docs", 1000) 675 rerank_request.setdefault("max_docs", 1000)
572 rerank_request.setdefault("normalize", True) 676 rerank_request.setdefault("normalize", True)
@@ -574,7 +678,8 @@ class AppConfigLoader: @@ -574,7 +678,8 @@ class AppConfigLoader:
574 rerank_config = RerankServiceConfig( 678 rerank_config = RerankServiceConfig(
575 provider=rerank_provider, 679 provider=rerank_provider,
576 providers=rerank_providers, 680 providers=rerank_providers,
577 - backend=rerank_backend, 681 + default_instance=default_instance,
  682 + instances=rerank_instances,
578 backends=rerank_backends, 683 backends=rerank_backends,
579 request=rerank_request, 684 request=rerank_request,
580 ) 685 )
@@ -699,8 +804,19 @@ class AppConfigLoader: @@ -699,8 +804,19 @@ class AppConfigLoader:
699 errors.append("services.embedding.providers.<provider>.image_base_url is required") 804 errors.append("services.embedding.providers.<provider>.image_base_url is required")
700 805
701 rerank_provider_cfg = app_config.services.rerank.get_provider_config() 806 rerank_provider_cfg = app_config.services.rerank.get_provider_config()
702 - if not rerank_provider_cfg.get("service_url") and not rerank_provider_cfg.get("base_url"):  
703 - errors.append("services.rerank.providers.<provider>.service_url or base_url is required") 807 + provider_instances = rerank_provider_cfg.get("instances")
  808 + if not isinstance(provider_instances, dict):
  809 + provider_instances = {}
  810 + for instance_name in app_config.services.rerank.instances:
  811 + instance_cfg = app_config.services.rerank.get_instance(instance_name)
  812 + provider_instance_cfg = provider_instances.get(instance_name) if isinstance(provider_instances, dict) else None
  813 + has_instance_url = False
  814 + if isinstance(provider_instance_cfg, dict):
  815 + has_instance_url = bool(provider_instance_cfg.get("service_url") or provider_instance_cfg.get("base_url"))
  816 + if not has_instance_url and not instance_cfg.service_url and not instance_cfg.base_url:
  817 + errors.append(
  818 + f"services.rerank instance {instance_name!r} must define service_url/base_url either under providers.<provider>.instances or services.rerank.instances"
  819 + )
704 820
705 if errors: 821 if errors:
706 raise ConfigurationError("Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors)) 822 raise ConfigurationError("Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors))
@@ -34,7 +34,15 @@ class QueryConfig: @@ -34,7 +34,15 @@ class QueryConfig:
34 text_embedding_field: Optional[str] = "title_embedding" 34 text_embedding_field: Optional[str] = "title_embedding"
35 image_embedding_field: Optional[str] = None 35 image_embedding_field: Optional[str] = None
36 source_fields: Optional[List[str]] = None 36 source_fields: Optional[List[str]] = None
37 - knn_boost: float = 0.25 37 + # 文本向量 KNN 与多模态(图片)向量 KNN 各自 boost;未在 YAML 中写时由 loader 用 legacy knn_boost 回填
  38 + knn_text_boost: float = 20.0
  39 + knn_image_boost: float = 20.0
  40 + knn_text_k: int = 120
  41 + knn_text_num_candidates: int = 400
  42 + knn_text_k_long: int = 160
  43 + knn_text_num_candidates_long: int = 500
  44 + knn_image_k: int = 120
  45 + knn_image_num_candidates: int = 400
38 multilingual_fields: List[str] = field( 46 multilingual_fields: List[str] = field(
39 default_factory=lambda: [] 47 default_factory=lambda: []
40 ) 48 )
@@ -109,6 +117,48 @@ class RerankFusionConfig: @@ -109,6 +117,48 @@ class RerankFusionConfig:
109 knn_tie_breaker: float = 0.0 117 knn_tie_breaker: float = 0.0
110 knn_bias: float = 0.6 118 knn_bias: float = 0.6
111 knn_exponent: float = 0.2 119 knn_exponent: float = 0.2
  120 + fine_bias: float = 0.00001
  121 + fine_exponent: float = 1.0
  122 +
  123 +
  124 +@dataclass(frozen=True)
  125 +class CoarseRankFusionConfig:
  126 + """
  127 + Multiplicative fusion without model score:
  128 + fused = (max(text, 0) + text_bias) ** text_exponent
  129 + * (max(knn, 0) + knn_bias) ** knn_exponent
  130 + """
  131 +
  132 + text_bias: float = 0.1
  133 + text_exponent: float = 0.35
  134 + knn_text_weight: float = 1.0
  135 + knn_image_weight: float = 1.0
  136 + knn_tie_breaker: float = 0.0
  137 + knn_bias: float = 0.6
  138 + knn_exponent: float = 0.2
  139 +
  140 +
  141 +@dataclass(frozen=True)
  142 +class CoarseRankConfig:
  143 + """Search-time coarse ranking configuration."""
  144 +
  145 + enabled: bool = True
  146 + input_window: int = 700
  147 + output_window: int = 240
  148 + fusion: CoarseRankFusionConfig = field(default_factory=CoarseRankFusionConfig)
  149 +
  150 +
  151 +@dataclass(frozen=True)
  152 +class FineRankConfig:
  153 + """Search-time lightweight rerank configuration."""
  154 +
  155 + enabled: bool = True
  156 + input_window: int = 240
  157 + output_window: int = 80
  158 + timeout_sec: float = 10.0
  159 + rerank_query_template: str = "{query}"
  160 + rerank_doc_template: str = "{title}"
  161 + service_profile: Optional[str] = "fine"
112 162
113 163
114 @dataclass(frozen=True) 164 @dataclass(frozen=True)
@@ -122,6 +172,7 @@ class RerankConfig: @@ -122,6 +172,7 @@ class RerankConfig:
122 weight_ai: float = 0.6 172 weight_ai: float = 0.6
123 rerank_query_template: str = "{query}" 173 rerank_query_template: str = "{query}"
124 rerank_doc_template: str = "{title}" 174 rerank_doc_template: str = "{title}"
  175 + service_profile: Optional[str] = None
125 fusion: RerankFusionConfig = field(default_factory=RerankFusionConfig) 176 fusion: RerankFusionConfig = field(default_factory=RerankFusionConfig)
126 177
127 178
@@ -133,6 +184,8 @@ class SearchConfig: @@ -133,6 +184,8 @@ class SearchConfig:
133 indexes: List[IndexConfig] = field(default_factory=list) 184 indexes: List[IndexConfig] = field(default_factory=list)
134 query_config: QueryConfig = field(default_factory=QueryConfig) 185 query_config: QueryConfig = field(default_factory=QueryConfig)
135 function_score: FunctionScoreConfig = field(default_factory=FunctionScoreConfig) 186 function_score: FunctionScoreConfig = field(default_factory=FunctionScoreConfig)
  187 + coarse_rank: CoarseRankConfig = field(default_factory=CoarseRankConfig)
  188 + fine_rank: FineRankConfig = field(default_factory=FineRankConfig)
136 rerank: RerankConfig = field(default_factory=RerankConfig) 189 rerank: RerankConfig = field(default_factory=RerankConfig)
137 spu_config: SPUConfig = field(default_factory=SPUConfig) 190 spu_config: SPUConfig = field(default_factory=SPUConfig)
138 es_index_name: str = "search_products" 191 es_index_name: str = "search_products"
@@ -183,20 +236,41 @@ class EmbeddingServiceConfig: @@ -183,20 +236,41 @@ class EmbeddingServiceConfig:
183 236
184 237
185 @dataclass(frozen=True) 238 @dataclass(frozen=True)
  239 +class RerankServiceInstanceConfig:
  240 + """One named reranker service instance."""
  241 +
  242 + host: str = "0.0.0.0"
  243 + port: int = 6007
  244 + backend: str = "qwen3_vllm_score"
  245 + runtime_dir: Optional[str] = None
  246 + base_url: Optional[str] = None
  247 + service_url: Optional[str] = None
  248 +
  249 +
  250 +@dataclass(frozen=True)
186 class RerankServiceConfig: 251 class RerankServiceConfig:
187 """Reranker service configuration.""" 252 """Reranker service configuration."""
188 253
189 provider: str 254 provider: str
190 providers: Dict[str, Any] 255 providers: Dict[str, Any]
191 - backend: str 256 + default_instance: str
  257 + instances: Dict[str, RerankServiceInstanceConfig]
192 backends: Dict[str, Dict[str, Any]] 258 backends: Dict[str, Dict[str, Any]]
193 request: Dict[str, Any] 259 request: Dict[str, Any]
194 260
195 def get_provider_config(self) -> Dict[str, Any]: 261 def get_provider_config(self) -> Dict[str, Any]:
196 return dict(self.providers.get(self.provider, {}) or {}) 262 return dict(self.providers.get(self.provider, {}) or {})
197 263
198 - def get_backend_config(self) -> Dict[str, Any]:  
199 - return dict(self.backends.get(self.backend, {}) or {}) 264 + def get_instance(self, name: Optional[str] = None) -> RerankServiceInstanceConfig:
  265 + instance_name = str(name or self.default_instance).strip() or self.default_instance
  266 + instance = self.instances.get(instance_name)
  267 + if instance is None:
  268 + raise KeyError(f"Unknown rerank service instance: {instance_name!r}")
  269 + return instance
  270 +
  271 + def get_backend_config(self, name: Optional[str] = None) -> Dict[str, Any]:
  272 + instance = self.get_instance(name)
  273 + return dict(self.backends.get(instance.backend, {}) or {})
200 274
201 275
202 @dataclass(frozen=True) 276 @dataclass(frozen=True)
config/services_config.py
@@ -11,7 +11,12 @@ import os @@ -11,7 +11,12 @@ import os
11 from typing import Any, Dict, Tuple 11 from typing import Any, Dict, Tuple
12 12
13 from config.loader import get_app_config 13 from config.loader import get_app_config
14 -from config.schema import EmbeddingServiceConfig, RerankServiceConfig, TranslationServiceConfig 14 +from config.schema import (
  15 + EmbeddingServiceConfig,
  16 + RerankServiceConfig,
  17 + RerankServiceInstanceConfig,
  18 + TranslationServiceConfig,
  19 +)
15 20
16 21
17 def get_translation_config() -> Dict[str, Any]: 22 def get_translation_config() -> Dict[str, Any]:
@@ -26,6 +31,16 @@ def get_rerank_config() -&gt; RerankServiceConfig: @@ -26,6 +31,16 @@ def get_rerank_config() -&gt; RerankServiceConfig:
26 return get_app_config().services.rerank 31 return get_app_config().services.rerank
27 32
28 33
  34 +def get_rerank_instance_config(profile: str | None = None) -> RerankServiceInstanceConfig:
  35 + cfg = get_app_config().services.rerank
  36 + instance_name = str(
  37 + profile
  38 + or os.getenv("RERANK_INSTANCE")
  39 + or cfg.default_instance
  40 + ).strip() or cfg.default_instance
  41 + return cfg.get_instance(instance_name)
  42 +
  43 +
29 def get_translation_base_url() -> str: 44 def get_translation_base_url() -> str:
30 return get_app_config().services.translation.endpoint 45 return get_app_config().services.translation.endpoint
31 46
@@ -60,24 +75,56 @@ def get_embedding_image_backend_config() -&gt; Tuple[str, Dict[str, Any]]: @@ -60,24 +75,56 @@ def get_embedding_image_backend_config() -&gt; Tuple[str, Dict[str, Any]]:
60 return cfg.image_backend, cfg.get_image_backend_config() 75 return cfg.image_backend, cfg.get_image_backend_config()
61 76
62 77
63 -def get_rerank_backend_config() -> Tuple[str, Dict[str, Any]]: 78 +def get_rerank_backend_config(profile: str | None = None) -> Tuple[str, Dict[str, Any]]:
64 cfg = get_app_config().services.rerank 79 cfg = get_app_config().services.rerank
65 - backend = str(os.getenv("RERANK_BACKEND") or cfg.backend).strip()  
66 - if backend != cfg.backend: 80 + instance = get_rerank_instance_config(profile)
  81 + backend = str(os.getenv("RERANK_BACKEND") or instance.backend).strip()
  82 + if backend != instance.backend:
67 backend_cfg = cfg.backends.get(backend) 83 backend_cfg = cfg.backends.get(backend)
68 if backend_cfg is None: 84 if backend_cfg is None:
69 raise ValueError(f"Unknown rerank backend override from RERANK_BACKEND: {backend!r}") 85 raise ValueError(f"Unknown rerank backend override from RERANK_BACKEND: {backend!r}")
70 return backend, dict(backend_cfg) 86 return backend, dict(backend_cfg)
71 - return cfg.backend, cfg.get_backend_config() 87 + return instance.backend, cfg.get_backend_config(profile)
72 88
73 89
74 -def get_rerank_base_url() -> str:  
75 - provider_cfg = get_app_config().services.rerank.get_provider_config()  
76 - base = provider_cfg.get("service_url") or provider_cfg.get("base_url") 90 +def get_rerank_base_url(profile: str | None = None) -> str:
  91 + cfg = get_app_config().services.rerank
  92 + instance = get_rerank_instance_config(profile)
  93 + provider_cfg = cfg.get_provider_config()
  94 + profile_name = str(profile or os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  95 +
  96 + base = None
  97 + provider_instances = provider_cfg.get("instances")
  98 + if isinstance(provider_instances, dict):
  99 + instance_provider_cfg = provider_instances.get(profile_name)
  100 + if isinstance(instance_provider_cfg, dict):
  101 + base = instance_provider_cfg.get("base_url") or instance_provider_cfg.get("service_url")
  102 + if not base:
  103 + base = instance.base_url or instance.service_url
  104 + if not base:
  105 + base = provider_cfg.get("service_url") or provider_cfg.get("base_url")
77 if not base: 106 if not base:
78 raise ValueError("Rerank service URL is not configured") 107 raise ValueError("Rerank service URL is not configured")
79 - return str(base).rstrip("/") 108 + base = str(base).rstrip("/")
  109 + if base.endswith("/rerank"):
  110 + base = base[: -len("/rerank")]
  111 + return base
80 112
81 113
82 -def get_rerank_service_url() -> str:  
83 - return get_rerank_base_url() 114 +def get_rerank_service_url(profile: str | None = None) -> str:
  115 + cfg = get_app_config().services.rerank
  116 + instance = get_rerank_instance_config(profile)
  117 + provider_cfg = cfg.get_provider_config()
  118 + profile_name = str(profile or os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  119 +
  120 + service_url = None
  121 + provider_instances = provider_cfg.get("instances")
  122 + if isinstance(provider_instances, dict):
  123 + instance_provider_cfg = provider_instances.get(profile_name)
  124 + if isinstance(instance_provider_cfg, dict):
  125 + service_url = instance_provider_cfg.get("service_url")
  126 + if not service_url:
  127 + service_url = instance.service_url
  128 + if not service_url:
  129 + service_url = f"{get_rerank_base_url(profile=profile)}/rerank"
  130 + return str(service_url).rstrip("/")
context/request_context.py
@@ -26,6 +26,8 @@ class RequestContextStage(Enum): @@ -26,6 +26,8 @@ class RequestContextStage(Enum):
26 # ES 按 ID 回源分页详情回填 26 # ES 按 ID 回源分页详情回填
27 ELASTICSEARCH_PAGE_FILL = "elasticsearch_page_fill" 27 ELASTICSEARCH_PAGE_FILL = "elasticsearch_page_fill"
28 RESULT_PROCESSING = "result_processing" 28 RESULT_PROCESSING = "result_processing"
  29 + COARSE_RANKING = "coarse_ranking"
  30 + FINE_RANKING = "fine_ranking"
29 RERANKING = "reranking" 31 RERANKING = "reranking"
30 # 款式意图 SKU 预筛选(StyleSkuSelector.prepare_hits) 32 # 款式意图 SKU 预筛选(StyleSkuSelector.prepare_hits)
31 STYLE_SKU_PREPARE_HITS = "style_sku_prepare_hits" 33 STYLE_SKU_PREPARE_HITS = "style_sku_prepare_hits"
@@ -407,4 +409,4 @@ def clear_current_request_context() -&gt; None: @@ -407,4 +409,4 @@ def clear_current_request_context() -&gt; None:
407 reset_request_log_context(tokens) 409 reset_request_log_context(tokens)
408 delattr(threading.current_thread(), 'request_log_tokens') 410 delattr(threading.current_thread(), 'request_log_tokens')
409 if hasattr(threading.current_thread(), 'request_context'): 411 if hasattr(threading.current_thread(), 'request_context'):
410 - delattr(threading.current_thread(), 'request_context')  
411 \ No newline at end of file 412 \ No newline at end of file
  413 + delattr(threading.current_thread(), 'request_context')
docs/TODO-ES能力提升.md renamed to docs/issue-2026-03-21-ES能力提升.md
docs/TODO-意图判断-done.md renamed to docs/issue-2026-03-21-意图判断-done03-24.md
docs/issue-2026-03-26-ES文本搜索-补充多模态knn放入should-done-0327.md 0 → 100644
@@ -0,0 +1,72 @@ @@ -0,0 +1,72 @@
  1 +目前knn跟query里面是并列的层级,如下:
  2 +{
  3 + "size": 400,
  4 + "from": 0,
  5 + "query": {
  6 + "bool": {
  7 + "must": [...
  8 + ],
  9 + }
  10 + },
  11 + "knn": {
  12 + "field": "title_embedding",
  13 + "query_vector": [...],
  14 + "k": 120,
  15 + "num_candidates": 400,
  16 + "boost": 2,
  17 + "_name": "knn_query"
  18 + },
  19 +其中query的结构是这样的:
  20 +"query": {
  21 + "bool": {
  22 + "should": [
  23 + {
  24 + "bool": {
  25 + "_name": "base_query",
  26 +\# 原始query
  27 + }
  28 + },
  29 + {
  30 + "bool": {
  31 + "_name": "base_query_trans_zh",
  32 +\# 翻译query。有可能是base_query_trans_en,也有可能两者都有
  33 + "boost": 0.75
  34 + }
  35 + }
  36 + ],
  37 + "minimum_should_match": 1
  38 + }
  39 + },
  40 +我想把knn放到should里面,和base_query、base_query_trans_zh并列。
  41 +另外,现在过滤是在knn里面单独加了一遍:
  42 + "knn": {
  43 + "field": "title_embedding",
  44 + "query_vector": [...],
  45 + "k": 120,
  46 + "num_candidates": 400,
  47 + "boost": 2,
  48 + "_name": "knn_query",
  49 + "filter": {
  50 + "range": {
  51 + "min_price": {
  52 + "gte": 100,
  53 + "lt": 200
  54 + }
  55 + }
  56 + }
  57 + }
  58 +现在不需要了。因为knn在query的内层了。共用过滤。
  59 +
  60 +另外:
  61 +我需要再增加一个knn。
  62 +需要参考文本embedding获得的逻辑,
  63 +通过
  64 +curl -X POST "http://localhost:6008/embed/clip_text?normalize=true&priority=1" \
  65 + -H "Content-Type: application/json" \
  66 + -d '["纯棉短袖", "street tee"]'
  67 +(用 POST /embed/clip_text 生成多模态文本向量。和文本embedding获取方法类似。注意思考代码如何精简,不要冗余。)
  68 +得到文本的多模态embedding。
  69 +然后在这里补充一个多模态embedding,寻找图片相似的结果,对应的商品图片字段为image_embedding.vector。
  70 +重排融合:之前有knn的配置bias和exponential。现在,文本和图片的embedding相似需要融合,融合方式是dis_max,因此需要配置:
  71 +1)各自的权重和tie_breaker
  72 +2)整个向量方面的权重(bias和exponential)
0 \ No newline at end of file 73 \ No newline at end of file
docs/issue-2026-03-27-keywords限定-done-0327.txt 0 → 100644
@@ -0,0 +1,93 @@ @@ -0,0 +1,93 @@
  1 +@query/query_parser.py @scripts/es_debug_search.py
  2 +原始query、以及每一个翻译,都要有一个对应的keywords_query(token分词后,得到名词)
  3 +参考这段代码,获取每一个长度大于 1 的名词,然后用空格拼接起来,作为keywords_query
  4 +import hanlp
  5 +from typing import List, Tuple, Dict, Any
  6 +
  7 +class KeywordExtractor:
  8 + """
  9 + 基于 HanLP 的名词关键词提取器
  10 + """
  11 + def __init__(self):
  12 + # 加载带位置信息的分词模型(细粒度)
  13 + self.tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF)
  14 + self.tok.config.output_spans = True # 启用位置输出
  15 +
  16 + # 加载词性标注模型
  17 + self.pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)
  18 +
  19 + def extract_keywords(self, query: str) -> str:
  20 + """
  21 + 从查询中提取关键词(名词,长度 ≥ 2)
  22 +
  23 + Args:
  24 + query: 输入文本
  25 +
  26 + Returns:
  27 + 拼接后的关键词字符串,非连续词之间自动插入空格
  28 + """
  29 + query = query.strip()
  30 + # 分词结果带位置:[[word, start, end], ...]
  31 + tok_result_with_position = self.tok(query)
  32 + tok_result = [x[0] for x in tok_result_with_position]
  33 +
  34 + # 词性标注
  35 + pos_tag_result = list(zip(tok_result, self.pos_tag(tok_result)))
  36 +
  37 + # 需要忽略的词
  38 + ignore_keywords = ['玩具']
  39 +
  40 + keywords = []
  41 + last_end_pos = 0
  42 +
  43 + for (word, postag), (_, start_pos, end_pos) in zip(pos_tag_result, tok_result_with_position):
  44 + if len(word) >= 2 and postag.startswith('N'):
  45 + if word in ignore_keywords:
  46 + continue
  47 + # 如果当前词与上一个词在原文中不连续,插入空格
  48 + if start_pos != last_end_pos and keywords:
  49 + keywords.append(" ")
  50 + keywords.append(word)
  51 + last_end_pos = end_pos
  52 + # 可选:打印调试信息
  53 + # print(f'分词: {word} | 词性: {postag} | 起始: {start_pos} | 结束: {end_pos}')
  54 +
  55 + return "".join(keywords).strip()
  56 +
  57 +
  58 +最后,在组织检索表达式时,目前是每一个 query (base_query base_query_trans_en base_query_trans_zh 三种情况)。 会组成一个bool查询,以base_query为例:
  59 + "bool": {
  60 + "should": [
  61 + {
  62 + "bool": {
  63 + "_name": "base_query",
  64 + "must": [
  65 + {
  66 + "combined_fields": {
  67 +...
  68 + }
  69 + }
  70 + ],
  71 + "should": [
  72 + {
  73 + "multi_match": {
  74 +... "type": "best_fields",
  75 +...
  76 + },
  77 + {
  78 + "multi_match": {
  79 +...
  80 + "type": "phrase",
  81 +...
  82 + }
  83 + }
  84 + ]
  85 + }
  86 + },
  87 +
  88 +base_query_trans_en base_query_trans_zh 也是同样
  89 +
  90 +在这个布尔查询的must里面加一项:keywords,搜索的字段和combined_fields一样,命中比例要求50%
  91 +
  92 +
  93 +结合现有代码做出合理的设计,呈现简单清晰的数据接口,而不是打补丁
0 \ No newline at end of file 94 \ No newline at end of file
docs/issue-2026-03-27-添加粗排精排-第2轮-done-0327.txt 0 → 100644
@@ -0,0 +1,38 @@ @@ -0,0 +1,38 @@
  1 +这是上一轮检索效果优化的需求:
  2 +参考
  3 +searcher.py
  4 +rerank_client.py
  5 +schema.py
  6 +es_query_builder.py
  7 +config.yaml
  8 +相关性检索优化说明.md
  9 +
  10 +在ES返回到rerank期间增加一轮粗排+一轮精排。
  11 +1. ES召回,600
  12 +2. 粗排:600->240。配置文件增加粗排相关配置,包括输入条数(配置为700,ES拉取的条数改为粗排输入条数),然后增加粗排的融合公式配置,参考现有的reranker融合公式即可、只是去掉其中的重排模型项。
  13 +3. 现在的sku选择、为reranker生成title后缀这一套逻辑,是放在粗排后,因为精排也是一个reranker模型(只不过是一个轻量级的,bge-reranker),需要用这个title后缀。
  14 +4. 精排:240-80,使用bge-reranker,但是,因为reranker只能选一个backend,考虑如何重构。现在,精排也是一个独立的进程、独立提供端口,服务。但是,因为跟重排逻辑是一致的(即使有部分不一致也应该分离不一致的点进行配置化),所以共用代码,只是根据需要启动两个实例,避免代码冗余。
  15 +5. 重排:80,也是用当前的重排代码,调用单独的实例(即现在使用的实例),返回后,经过融合公式,到分页,也参考现在的融合公式,但是,加入一项精排模型打分。
  16 +测试时,使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试
  17 +
  18 +你已经完成了一般修改,已把三段排序链路接上了,主改动在 search/searcher.py、search/rerank_client.py、config/schema.py、config/loader.py、config/services_config.py 和 config/config.yaml。
  19 +
  20 +现在链路是:
  21 +ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到 coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker,并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url,这样 fine/final 可以共用同一套服务代码,只起不同实例。
  22 +
  23 +你是用了./.venv/bin/pytest -q tests/test_rerank_client.py tests/test_search_rerank_window.py 进行验证,结果是 18 passed。也按要求尝试了 ./scripts/service_ctl.sh restart reranker backend 做联调,但当前环境里只有 6007 的现有 reranker,配置里的 fine 端口 6009 还没对应实例,而且服务重启后进程状态不太稳定,没能完成完整线上链路验证。当前最关键的落地前置是把轻量 bge 实例补起来,再用同样脚本重启联调。
  24 +
  25 +因此,需要回顾这个漏斗的代码,进行进一步的优化:
  26 +1. Fine Rank环节的信息都是N/A。fine 实例也需要在config.yaml中体现,并且需要被server_ctl.sh管理,参考现在是怎么管理reranker的。fine和reranker分别调用哪个模型(两者代码功能,但是只是启动不同的实例),请你深度思考后决定。这里是需要参考当前代码、根据实际需要(原来只有一个reranker、现在多了一个精排,也要调用一个重排模型,因此带来的必要的部分重构。
  27 +2. Ranking Funnel、Fusion Factors、Signal Breakdown
  28 +这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。
  29 +ES 召回的环节,展示Matched Queries各项打分、ES的总分、norm后打分、排序位置,等等关键信息
  30 +粗排:粗排融合公式的各项输入、重要中间结果和参数、最后得分,排序位置以及上升/下降了多少。等等关键信息。
  31 +精排:同样例举关键的输入、中间过程、输出、排序和位置变化等。
  32 +reranker:类似
  33 +
  34 +因为涉及的环节较多,非常要注意的一个点是:不要每次修改都在原来的基础上,为实现目标而打补丁,应该观察一下所涉及的代码现在是怎么做的,务必注意如何适当的清理掉现有逻辑,该如何对其进行修改,来达到目的,以达到代码的精简,避免冗余、分叉。
  35 +特别是1关于将现在的单独的reranker服务,在不增加代码复杂度的情况下,如何可以通过配置启动多个实例、各自可以配置选择的模型等相关参数、分别服务于精排和重排,是设计的重中之重。这里需要思考配置如何拆分,需要多读相关代码、思考最佳实践。
  36 +
  37 +
  38 +
docs/issue-2026-03-27-添加粗排精排-第一轮-done-0327.txt 0 → 100644
@@ -0,0 +1,136 @@ @@ -0,0 +1,136 @@
  1 +我:
  2 +进行一轮检索效果优化:
  3 +参考
  4 +[searcher.py](search/searcher.py)
  5 +[rerank_client.py](search/rerank_client.py)
  6 +[schema.py](config/schema.py)
  7 +[es_query_builder.py](search/es_query_builder.py)
  8 +[config.yaml](config/config.yaml)
  9 +[相关性检索优化说明.md](docs/相关性检索优化说明.md)
  10 +
  11 +在ES返回到rerank期间增加一轮粗排+一轮精排。
  12 +1. ES召回,600
  13 +2. 粗排:600->240。配置文件增加粗排相关配置,包括输入条数(配置为700,ES拉取的条数改为粗排输入条数),然后增加粗排的融合公式配置,参考现有的reranker融合公式即可、只是去掉其中的重排模型项。
  14 +3. 现在的sku选择、为reranker生成title后缀这一套逻辑,是放在粗排后,因为精排也是一个reranker模型(只不过是一个轻量级的,bge-reranker),需要用这个title后缀。
  15 +4. 精排:240-80,使用bge-reranker,但是,因为reranker只能选一个backend,考虑如何重构。现在,精排也是一个独立的进程、独立提供端口,服务。但是,因为跟重排逻辑是一致的(即使有部分不一致也应该分离不一致的点进行配置化),所以共用代码,只是根据需要启动两个实例,避免代码冗余。
  16 +5. 重排:80,也是用当前的重排代码,调用单独的实例(即现在使用的实例),返回后,经过融合公式,到分页,也参考现在的融合公式,但是,加入一项精排模型打分。
  17 +测试时,使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试
  18 +
  19 +
  20 +
  21 +
  22 +
  23 +codex:
  24 +前端这轮我把调试展示重构成了“两层漏斗”:
  25 +
  26 +1. 整体漏斗
  27 +在 [frontend/static/js/app.js](/data/saas-search/frontend/static/js/app.js) 里把原来散着的 `Query Analysis / Rerank / Stage Timings / ES Query` 重新组织成了:
  28 +- `Query Context`
  29 +- `Ranking Funnel`
  30 +- `Timing Breakdown`
  31 +- `Raw Payloads`
  32 +
  33 +其中 `Ranking Funnel` 现在按阶段卡片展示:
  34 +- `ES Recall`
  35 +- `Coarse Rank`
  36 +- `Fine Rank`
  37 +- `Final Rerank`
  38 +- `Page Return`
  39 +
  40 +每张卡都尽量只放调参最关键的信息:输入/输出条数、模板、service URL、top_n、fusion/meta。这样你看漏斗收缩、看每段服务表现、看融合参数会更直接。
  41 +
  42 +2. 单条结果漏斗
  43 +在结果卡片右侧的 debug 区,我改成了真正的单条漏斗卡片,按“分数结果”和“公式分解”分开看:
  44 +- 顶部先看 `Initial Rank / Final Rank / Rank Delta`
  45 +- 再看阶段分数:`ES / ES Norm / Coarse / Fine / Rerank / Fused`
  46 +- 再看融合因子:`coarse_text_factor / coarse_knn_factor / text_factor / knn_factor / fine_factor / rerank_factor`
  47 +- 再看信号拆解:`text_source / text_translation / text_primary / text_support / text_knn / image_knn`
  48 +
  49 +同时保留了可展开的:
  50 +- `Rerank Input`
  51 +- `Selected SKU`
  52 +- `Matched Queries`
  53 +- `Multilingual Fields`
  54 +
  55 +为了支撑前端,我顺手在 [search/searcher.py](/data/saas-search/search/searcher.py) 里补了单条 `coarse_score / coarse_text_factor / coarse_knn_factor` 到 `per_result`,这样单条卡片现在能把粗排也完整展示出来。
  56 +
  57 +样式上在 [frontend/static/css/style.css](/data/saas-search/frontend/static/css/style.css) 加了阶段卡片、分数 pill、时延条形图和移动端适配。JS 语法我用 `node --check frontend/static/js/app.js` 过了,后端补充字段也用 `./.venv/bin/python -m py_compile search/searcher.py` 过了。
  58 +
  59 +如果你愿意,我下一步可以继续把这个视图再往“调参工作台”方向推一层,比如再加:
  60 +- 漏斗阶段间的 drop rate
  61 +- 每条结果的 rank jump 高亮
  62 +- 批量对比 top N 结果里某个因子分布
  63 +- fusion 参数和结果联动的可视化摘要
  64 +
  65 +
  66 +
  67 +
  68 +
  69 +
  70 +
  71 +
  72 +我:
  73 +1. Fine Rank环节的信息都是N/A,是没有配置吗。fine rank是使用bge-reranker,复用当前reranker模型的代码,但是需要单独起一个服务、单独加载一个模型。
  74 +2. Ranking Funnel、Fusion Factors、Signal Breakdown
  75 +这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。
  76 +ES 召回的环节,展示Matched Queries各项打分、ES的总分、norm后打分、排序位置,等等关键信息
  77 +粗排:粗排融合公式的各项输入、重要中间结果和参数、最后得分,排序位置以及上升/下降了多少。等等关键信息。
  78 +精排:同样例举关键的输入、中间过程、输出、排序和位置变化等。
  79 +reranker:类似
  80 +
  81 +因为涉及的环节较多,非常要注意的一个点是:不要每次修改都在原来的基础上,为实现目标而打补丁,应该观察一下所涉及的代码现在是怎么做的,务必注意如何适当的清理掉现有逻辑,该如何对其进行修改,来达到目的,以达到代码的精简,避免冗余、分叉。
  82 +
  83 +
  84 +
  85 +
  86 +
  87 +
  88 +codex:
  89 +`Fine Rank` 之前显示 `N/A`,根因不是前端单纯没写,而是后端原来只有“精排成功拿到分数”时才会填那块 debug;一旦 `fine` 服务没起、没配 `service_url`、调用失败,前端只能看到空值。现在我把这层补齐了:
  90 +
  91 +- 顶层 `debug_info.fine_rank` 现在会始终带 `status / service_profile / service_url / input_window / output_window / query_template / doc_template`
  92 +- 失败时会带 `error`
  93 +- 成功时再补 `docs / top_n / meta`
  94 +- 所以你现在能区分:
  95 + - `disabled`
  96 + - `pending`
  97 + - `ok`
  98 + - `failed_no_scores`
  99 + - `failed`
  100 +
  101 +也就是说,`fine rank` 本身仍然需要你按设计单独起一个 `bge-reranker` 实例,这一层代码已经按“独立服务 + 共用 reranker 代码”的方式接好了,但服务本身还是要单独启动。
  102 +
  103 +这轮我也把调试展示做了结构化重构,不再继续在旧视图上打补丁。现在是按阶段组织信息:
  104 +
  105 +- 整体漏斗:
  106 + - `ES Recall`
  107 + - `Coarse Rank`
  108 + - `Fine Rank`
  109 + - `Final Rerank`
  110 + - `Page Return`
  111 +- 单条结果:
  112 + - 用统一的 `ranking_funnel` 结构来表达每个阶段的
  113 + - `rank`
  114 + - `delta`
  115 + - `score`
  116 + - 关键中间量
  117 + - 关键 factor
  118 + - 输入 payload
  119 +
  120 +具体改动在:
  121 +- [search/searcher.py](/data/saas-search/search/searcher.py)
  122 +- [frontend/static/js/app.js](/data/saas-search/frontend/static/js/app.js)
  123 +- [frontend/static/css/style.css](/data/saas-search/frontend/static/css/style.css)
  124 +
  125 +单条结果现在按 stage card 展示,而不是把 `Ranking Funnel / Fusion Factors / Signal Breakdown` 拆成三块平铺,这样你可以顺着 ES -> 粗排 -> 精排 -> rerank 看每段的输入、过程、输出和位次变化,更适合调融合公式。
  126 +
  127 +检查我也跑过了:
  128 +- `node --check frontend/static/js/app.js`
  129 +- `./.venv/bin/python -m py_compile search/searcher.py`
  130 +- `./.venv/bin/pytest -q tests/test_search_rerank_window.py`
  131 +
  132 +都通过了。
  133 +
  134 +下一步如果你要,我建议直接继续做两件事中的一个:
  135 +1. 把 `fine` 实例真正拉起来,然后我再把前端里 `status=failed/pending` 的场景跑成真实数据。
  136 +2. 在当前漏斗视图上继续加“阶段 rank 分布 / delta 高亮 / topN 因子对比”,把它做成更像调参工作台。
0 \ No newline at end of file 137 \ No newline at end of file
docs/TODO.md renamed to docs/issue.md
docs/TODO.txt renamed to docs/issue.txt
docs/常用查询 - sql.sql
@@ -584,18 +584,14 @@ SELECT &#39;shoplazza_product_index_increment&#39;, COUNT(*) FROM shoplazza_product_inde @@ -584,18 +584,14 @@ SELECT &#39;shoplazza_product_index_increment&#39;, COUNT(*) FROM shoplazza_product_inde
584 " 584 "
585 585
586 # 执行删除 586 # 执行删除
587 -cd /data/saas-search && MYSQL_PWD='qY8tgodLoA&KT#yQ' mysql -h 10.200.16.14 -P 3316 -u root saas -N -e "  
588 -SELECT 'shoplazza_sync_log', COUNT(*) FROM shoplazza_sync_log WHERE tenant_id = 163  
589 -UNION ALL  
590 -SELECT 'shoplazza_product_index_increment', COUNT(*) FROM shoplazza_product_index_increment WHERE tenant_id = 163; 587 +cd /data/saas-search && MYSQL_PWD='qY8tgodLoA&KT#yQ' mysql -h 10.200.16.14 -P 3316 -u root saas -e "
  588 +SET SESSION sql_safe_updates = 0;
  589 +DELETE FROM shoplazza_sync_log WHERE tenant_id = 163;
  590 +SELECT ROW_COUNT() AS deleted_sync_log;
  591 +DELETE FROM shoplazza_product_index_increment WHERE tenant_id = 163;
  592 +SELECT ROW_COUNT() AS deleted_index_increment;
591 " 593 "
592 594
593 -# 再次统计 tenant_id=163 的行数  
594 -MYSQL_PWD='qY8tgodLoA&KT#yQ' mysql -h 10.200.16.14 -P 3316 -u root saas -N -e "  
595 -SELECT 'shoplazza_sync_log', COUNT(*) FROM shoplazza_sync_log WHERE tenant_id = 163  
596 -UNION ALL  
597 -SELECT 'shoplazza_product_index_increment', COUNT(*) FROM shoplazza_product_index_increment WHERE tenant_id = 163;  
598 -"  
599 ``` 595 ```
600 596
601 然后触发重新安装: 597 然后触发重新安装:
frontend/static/css/style.css
@@ -357,16 +357,22 @@ body { @@ -357,16 +357,22 @@ body {
357 color: #555; 357 color: #555;
358 border-left: 1px dashed #eee; 358 border-left: 1px dashed #eee;
359 padding-left: 12px; 359 padding-left: 12px;
360 - max-height: 260px; 360 + max-height: 540px;
361 overflow: auto; 361 overflow: auto;
362 } 362 }
363 363
364 .product-debug-title { 364 .product-debug-title {
365 font-weight: 600; 365 font-weight: 600;
366 - margin-bottom: 6px; 366 + margin-bottom: 8px;
367 color: #333; 367 color: #333;
368 } 368 }
369 369
  370 +.product-debug-subtitle {
  371 + margin: 10px 0 6px;
  372 + font-weight: 600;
  373 + color: #666;
  374 +}
  375 +
370 .product-debug-line { 376 .product-debug-line {
371 margin-bottom: 2px; 377 margin-bottom: 2px;
372 } 378 }
@@ -418,6 +424,191 @@ body { @@ -418,6 +424,191 @@ body {
418 word-break: break-word; 424 word-break: break-word;
419 } 425 }
420 426
  427 +.debug-panel {
  428 + display: flex;
  429 + flex-direction: column;
  430 + gap: 14px;
  431 + padding: 12px;
  432 + font-family: Menlo, Consolas, "Courier New", monospace;
  433 + font-size: 12px;
  434 +}
  435 +
  436 +.debug-section-block {
  437 + background: #fff;
  438 + border: 1px solid #e8e8e8;
  439 + border-radius: 10px;
  440 + padding: 14px;
  441 +}
  442 +
  443 +.debug-section-title {
  444 + font-size: 13px;
  445 + font-weight: 700;
  446 + color: #222;
  447 + margin-bottom: 10px;
  448 +}
  449 +
  450 +.debug-stage-grid {
  451 + display: grid;
  452 + grid-template-columns: repeat(auto-fit, minmax(210px, 1fr));
  453 + gap: 12px;
  454 +}
  455 +
  456 +.debug-stage-card {
  457 + border: 1px solid #ececec;
  458 + border-radius: 8px;
  459 + padding: 12px;
  460 + background: linear-gradient(180deg, #fff 0%, #fafafa 100%);
  461 +}
  462 +
  463 +.debug-stage-title {
  464 + font-size: 13px;
  465 + font-weight: 700;
  466 + color: #333;
  467 +}
  468 +
  469 +.debug-stage-subtitle {
  470 + margin: 4px 0 8px;
  471 + color: #888;
  472 + font-size: 11px;
  473 +}
  474 +
  475 +.debug-metrics {
  476 + display: grid;
  477 + grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
  478 + gap: 8px;
  479 +}
  480 +
  481 +.debug-metric {
  482 + padding: 8px 9px;
  483 + background: #f7f7f7;
  484 + border-radius: 6px;
  485 + border: 1px solid #efefef;
  486 +}
  487 +
  488 +.debug-metric-label {
  489 + font-size: 11px;
  490 + color: #777;
  491 + margin-bottom: 2px;
  492 +}
  493 +
  494 +.debug-metric-value {
  495 + color: #222;
  496 + font-weight: 600;
  497 + word-break: break-word;
  498 +}
  499 +
  500 +.debug-score-pills {
  501 + display: flex;
  502 + flex-wrap: wrap;
  503 + gap: 8px;
  504 + margin-bottom: 6px;
  505 +}
  506 +
  507 +.debug-score-pill {
  508 + display: inline-flex;
  509 + align-items: center;
  510 + gap: 8px;
  511 + padding: 6px 10px;
  512 + border-radius: 999px;
  513 + border: 1px solid #e3e3e3;
  514 + background: #f7f7f7;
  515 +}
  516 +
  517 +.debug-score-pill-label {
  518 + color: #666;
  519 +}
  520 +
  521 +.debug-score-pill-value {
  522 + color: #111;
  523 + font-weight: 700;
  524 +}
  525 +
  526 +.tone-es {
  527 + background: #f8f1ff;
  528 + border-color: #e6d5ff;
  529 +}
  530 +
  531 +.tone-coarse {
  532 + background: #eef8ff;
  533 + border-color: #cae8ff;
  534 +}
  535 +
  536 +.tone-fine {
  537 + background: #f3fbef;
  538 + border-color: #d8f1c8;
  539 +}
  540 +
  541 +.tone-rerank {
  542 + background: #fff4e8;
  543 + border-color: #ffd9b0;
  544 +}
  545 +
  546 +.tone-final {
  547 + background: #fff1f0;
  548 + border-color: #ffc9c4;
  549 +}
  550 +
  551 +.tone-neutral {
  552 + background: #f5f5f5;
  553 +}
  554 +
  555 +.debug-details {
  556 + margin-top: 10px;
  557 +}
  558 +
  559 +.debug-details summary {
  560 + cursor: pointer;
  561 + color: #555;
  562 + font-weight: 600;
  563 +}
  564 +
  565 +.debug-json-pre {
  566 + margin-top: 8px;
  567 + padding: 10px;
  568 + background: #f5f5f5;
  569 + border-radius: 6px;
  570 + overflow: auto;
  571 + max-height: 240px;
  572 + white-space: pre-wrap;
  573 + word-break: break-word;
  574 +}
  575 +
  576 +.debug-timing-list {
  577 + display: flex;
  578 + flex-direction: column;
  579 + gap: 8px;
  580 +}
  581 +
  582 +.debug-timing-row {
  583 + display: grid;
  584 + grid-template-columns: 220px 1fr 90px;
  585 + gap: 10px;
  586 + align-items: center;
  587 +}
  588 +
  589 +.debug-timing-label {
  590 + color: #444;
  591 +}
  592 +
  593 +.debug-timing-bar-wrap {
  594 + height: 10px;
  595 + background: #f0f0f0;
  596 + border-radius: 999px;
  597 + overflow: hidden;
  598 +}
  599 +
  600 +.debug-timing-bar {
  601 + height: 100%;
  602 + background: linear-gradient(90deg, #f39c12 0%, #e74c3c 100%);
  603 + border-radius: 999px;
  604 +}
  605 +
  606 +.debug-timing-value {
  607 + text-align: right;
  608 + color: #666;
  609 + font-weight: 600;
  610 +}
  611 +
421 .product-debug-link { 612 .product-debug-link {
422 display: inline-block; 613 display: inline-block;
423 margin-top: 0; 614 margin-top: 0;
@@ -687,10 +878,41 @@ footer span { @@ -687,10 +878,41 @@ footer span {
687 } 878 }
688 879
689 .product-grid { 880 .product-grid {
690 - grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));  
691 - gap: 15px;  
692 padding: 15px; 881 padding: 15px;
693 } 882 }
  883 +
  884 + .product-card {
  885 + flex-direction: column;
  886 + }
  887 +
  888 + .product-main {
  889 + width: 100%;
  890 + }
  891 +
  892 + .product-image-wrapper {
  893 + width: 100%;
  894 + max-width: 320px;
  895 + }
  896 +
  897 + .product-debug {
  898 + width: 100%;
  899 + border-left: none;
  900 + border-top: 1px dashed #eee;
  901 + padding-left: 0;
  902 + padding-top: 12px;
  903 + }
  904 +
  905 + .debug-stage-grid {
  906 + grid-template-columns: 1fr;
  907 + }
  908 +
  909 + .debug-timing-row {
  910 + grid-template-columns: 1fr;
  911 + }
  912 +
  913 + .debug-timing-value {
  914 + text-align: left;
  915 + }
694 916
695 .pagination { 917 .pagination {
696 padding: 20px 15px; 918 padding: 20px 15px;
@@ -699,10 +921,6 @@ footer span { @@ -699,10 +921,6 @@ footer span {
699 } 921 }
700 922
701 @media (max-width: 480px) { 923 @media (max-width: 480px) {
702 - .product-grid {  
703 - grid-template-columns: repeat(2, 1fr);  
704 - }  
705 -  
706 .header-left { 924 .header-left {
707 gap: 15px; 925 gap: 15px;
708 } 926 }
frontend/static/js/app.js
@@ -407,89 +407,12 @@ function displayResults(data) { @@ -407,89 +407,12 @@ function displayResults(data) {
407 407
408 let debugHtml = ''; 408 let debugHtml = '';
409 if (debug) { 409 if (debug) {
410 - const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? '');  
411 - const es_score_normalized = typeof debug.es_score_normalized === 'number'  
412 - ? debug.es_score_normalized.toFixed(4)  
413 - : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));  
414 - const rerankScore = typeof debug.rerank_score === 'number'  
415 - ? debug.rerank_score.toFixed(4)  
416 - : (debug.rerank_score == null ? '' : String(debug.rerank_score));  
417 -  
418 - const fusedScore = typeof debug.fused_score === 'number'  
419 - ? debug.fused_score.toFixed(4)  
420 - : (debug.fused_score == null ? '' : String(debug.fused_score));  
421 -  
422 - // Build multilingual title info  
423 - let titleLines = '';  
424 - if (debug.title_multilingual && typeof debug.title_multilingual === 'object') {  
425 - Object.entries(debug.title_multilingual).forEach(([lang, val]) => {  
426 - if (val) {  
427 - titleLines += `<div class="product-debug-line">title.${escapeHtml(String(lang))}: ${escapeHtml(String(val))}</div>`;  
428 - }  
429 - });  
430 - }  
431 -  
432 - const resultJson = customStringify(result);  
433 - const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;  
434 - const rerankInputHtml = debug.rerank_input  
435 - ? `  
436 - <details open>  
437 - <summary>Rerank input</summary>  
438 - <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre>  
439 - </details>  
440 - `  
441 - : '';  
442 - const styleIntentHtml = debug.style_intent_sku  
443 - ? `  
444 - <details open>  
445 - <summary>Selected SKU</summary>  
446 - <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre>  
447 - </details>  
448 - `  
449 - : '';  
450 - const matchedQueriesHtml = debug.matched_queries  
451 - ? `  
452 - <details open>  
453 - <summary>matched_queries</summary>  
454 - <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre>  
455 - </details>  
456 - `  
457 - : '';  
458 -  
459 - debugHtml = `  
460 - <div class="product-debug">  
461 - <div class="product-debug-title">Ranking Debug</div>  
462 - <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div>  
463 - <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>  
464 - <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>  
465 - <div class="product-debug-line">ES score: ${esScore}</div>  
466 - <div class="product-debug-line">ES normalized: ${es_score_normalized}</div>  
467 - <div class="product-debug-line">Rerank score: ${rerankScore}</div>  
468 - <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>  
469 - <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>  
470 - <div class="product-debug-line">text_factor: ${escapeHtml(String(debug.text_factor ?? ''))}</div>  
471 - <div class="product-debug-line">knn_score: ${escapeHtml(String(debug.knn_score ?? ''))}</div>  
472 - <div class="product-debug-line">knn_factor: ${escapeHtml(String(debug.knn_factor ?? ''))}</div>  
473 - <div class="product-debug-line">Fused score: ${fusedScore}</div>  
474 - ${titleLines}  
475 - ${rerankInputHtml}  
476 - ${styleIntentHtml}  
477 - ${matchedQueriesHtml}  
478 - <div class="product-debug-actions">  
479 - <button type="button" class="product-debug-inline-result-btn"  
480 - data-action="toggle-result-inline-doc"  
481 - data-result-json="${escapeAttr(resultJson)}">  
482 - 在结果中显示当前结果数据  
483 - </button>  
484 - <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">  
485 - 查看 ES 原始文档  
486 - </a>  
487 - </div>  
488 - <div class="product-result-doc-panel" hidden>  
489 - <pre class="product-result-doc-pre"></pre>  
490 - </div>  
491 - </div>  
492 - `; 410 + debugHtml = buildProductDebugHtml({
  411 + debug,
  412 + result,
  413 + spuId,
  414 + tenantId,
  415 + });
493 } 416 }
494 417
495 html += ` 418 html += `
@@ -527,6 +450,151 @@ function displayResults(data) { @@ -527,6 +450,151 @@ function displayResults(data) {
527 grid.innerHTML = html; 450 grid.innerHTML = html;
528 } 451 }
529 452
  453 +function formatDebugNumber(value, digits = 4) {
  454 + if (typeof value === 'number' && Number.isFinite(value)) {
  455 + return value.toFixed(digits);
  456 + }
  457 + return value == null || value === '' ? 'N/A' : String(value);
  458 +}
  459 +
  460 +function renderMetricList(items) {
  461 + const rows = items
  462 + .filter((item) => item && item.value !== undefined && item.value !== null && item.value !== '')
  463 + .map((item) => `
  464 + <div class="debug-metric">
  465 + <div class="debug-metric-label">${escapeHtml(item.label)}</div>
  466 + <div class="debug-metric-value">${escapeHtml(String(item.value))}</div>
  467 + </div>
  468 + `)
  469 + .join('');
  470 + return rows ? `<div class="debug-metrics">${rows}</div>` : '';
  471 +}
  472 +
  473 +function renderScorePills(items) {
  474 + const pills = items
  475 + .filter((item) => item && item.value !== undefined && item.value !== null && item.value !== '')
  476 + .map((item) => `
  477 + <div class="debug-score-pill ${item.tone || ''}">
  478 + <span class="debug-score-pill-label">${escapeHtml(item.label)}</span>
  479 + <span class="debug-score-pill-value">${escapeHtml(String(item.value))}</span>
  480 + </div>
  481 + `)
  482 + .join('');
  483 + return pills ? `<div class="debug-score-pills">${pills}</div>` : '';
  484 +}
  485 +
  486 +function renderJsonDetails(title, payload, open = false) {
  487 + if (!payload || (typeof payload === 'object' && Object.keys(payload).length === 0)) {
  488 + return '';
  489 + }
  490 + return `
  491 + <details class="debug-details" ${open ? 'open' : ''}>
  492 + <summary>${escapeHtml(title)}</summary>
  493 + <pre class="debug-json-pre">${escapeHtml(customStringify(payload))}</pre>
  494 + </details>
  495 + `;
  496 +}
  497 +
  498 +function buildProductDebugHtml({ debug, result, spuId, tenantId }) {
  499 + const resultJson = customStringify(result);
  500 + const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
  501 + const funnel = debug.ranking_funnel || {};
  502 + const esStage = funnel.es_recall || {};
  503 + const coarseStage = funnel.coarse_rank || {};
  504 + const fineStage = funnel.fine_rank || {};
  505 + const rerankStage = funnel.rerank || {};
  506 + const finalPageStage = funnel.final_page || {};
  507 +
  508 + const rankSummary = renderMetricList([
  509 + { label: 'Initial Rank', value: debug.initial_rank ?? 'N/A' },
  510 + { label: 'Final Rank', value: debug.final_rank ?? 'N/A' },
  511 + { label: 'Rank Delta', value: (debug.initial_rank && debug.final_rank) ? String(debug.initial_rank - debug.final_rank) : 'N/A' },
  512 + { label: 'SPU', value: spuId || 'N/A' },
  513 + ]);
  514 +
  515 + const stageScores = renderScorePills([
  516 + { label: 'ES', value: formatDebugNumber(esStage.score ?? debug.es_score), tone: 'tone-es' },
  517 + { label: 'ES Norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized), tone: 'tone-neutral' },
  518 + { label: 'Coarse', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score), tone: 'tone-coarse' },
  519 + { label: 'Fine', value: formatDebugNumber(fineStage.score ?? debug.fine_score), tone: 'tone-fine' },
  520 + { label: 'Rerank', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score), tone: 'tone-rerank' },
  521 + { label: 'Fused', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score), tone: 'tone-final' },
  522 + ]);
  523 +
  524 + const stageGrid = `
  525 + <div class="debug-stage-grid">
  526 + ${buildStageCard('ES Recall', 'Matched queries and ES raw score', [
  527 + { label: 'rank', value: esStage.rank ?? debug.initial_rank ?? 'N/A' },
  528 + { label: 'es_score', value: formatDebugNumber(esStage.score ?? debug.es_score) },
  529 + { label: 'es_norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized) },
  530 + ], renderJsonDetails('Matched Queries', esStage.matched_queries ?? debug.matched_queries, false))}
  531 + ${buildStageCard('Coarse Rank', 'Text + vector fusion', [
  532 + { label: 'rank', value: coarseStage.rank ?? 'N/A' },
  533 + { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' },
  534 + { label: 'coarse_score', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score) },
  535 + { label: 'text_score', value: formatDebugNumber(coarseStage.text_score ?? debug.text_score) },
  536 + { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score ?? debug.text_source_score) },
  537 + { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score ?? debug.text_translation_score) },
  538 + { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score ?? debug.text_primary_score) },
  539 + { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score ?? debug.text_support_score) },
  540 + { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score ?? debug.knn_score) },
  541 + { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score ?? debug.text_knn_score) },
  542 + { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score ?? debug.image_knn_score) },
  543 + { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor ?? debug.coarse_text_factor) },
  544 + { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor ?? debug.coarse_knn_factor) },
  545 + ], renderJsonDetails('Coarse Signals', coarseStage.signals, false))}
  546 + ${buildStageCard('Fine Rank', 'Lightweight reranker output', [
  547 + { label: 'rank', value: fineStage.rank ?? 'N/A' },
  548 + { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' },
  549 + { label: 'fine_score', value: formatDebugNumber(fineStage.score ?? debug.fine_score) },
  550 + ], renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, false))}
  551 + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
  552 + { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' },
  553 + { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' },
  554 + { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) },
  555 + { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) },
  556 + { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) },
  557 + { label: 'text_source', value: formatDebugNumber(rerankStage.signals?.text_source_score ?? debug.text_source_score) },
  558 + { label: 'text_translation', value: formatDebugNumber(rerankStage.signals?.text_translation_score ?? debug.text_translation_score) },
  559 + { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) },
  560 + { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) },
  561 + { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) },
  562 + { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) },
  563 + { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) },
  564 + ], renderJsonDetails('Rerank Signals', rerankStage.signals, false))}
  565 + </div>
  566 + `;
  567 +
  568 + const titlePayload = {};
  569 + if (debug.title_multilingual) titlePayload.title = debug.title_multilingual;
  570 + if (debug.brief_multilingual) titlePayload.brief = debug.brief_multilingual;
  571 + if (debug.vendor_multilingual) titlePayload.vendor = debug.vendor_multilingual;
  572 +
  573 + return `
  574 + <div class="product-debug">
  575 + <div class="product-debug-title">Ranking Funnel</div>
  576 + ${rankSummary}
  577 + ${stageScores}
  578 + ${stageGrid}
  579 + ${renderJsonDetails('Selected SKU', debug.style_intent_sku, true)}
  580 + ${renderJsonDetails('Multilingual Fields', titlePayload, false)}
  581 + <div class="product-debug-actions">
  582 + <button type="button" class="product-debug-inline-result-btn"
  583 + data-action="toggle-result-inline-doc"
  584 + data-result-json="${escapeAttr(resultJson)}">
  585 + 在结果中显示当前结果数据
  586 + </button>
  587 + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">
  588 + 查看 ES 原始文档
  589 + </a>
  590 + </div>
  591 + <div class="product-result-doc-panel" hidden>
  592 + <pre class="product-result-doc-pre"></pre>
  593 + </div>
  594 + </div>
  595 + `;
  596 +}
  597 +
530 // Display facets as filter tags (一级分类 + 三个属性分面) 598 // Display facets as filter tags (一级分类 + 三个属性分面)
531 function displayFacets(facets) { 599 function displayFacets(facets) {
532 if (!facets || !Array.isArray(facets)) { 600 if (!facets || !Array.isArray(facets)) {
@@ -919,127 +987,181 @@ function formatIntentDetectionHtml(intent) { @@ -919,127 +987,181 @@ function formatIntentDetectionHtml(intent) {
919 return block; 987 return block;
920 } 988 }
921 989
  990 +function buildStageCard(title, subtitle, metrics, extraHtml = '') {
  991 + return `
  992 + <div class="debug-stage-card">
  993 + <div class="debug-stage-title">${escapeHtml(title)}</div>
  994 + ${subtitle ? `<div class="debug-stage-subtitle">${escapeHtml(subtitle)}</div>` : ''}
  995 + ${renderMetricList(metrics)}
  996 + ${extraHtml}
  997 + </div>
  998 + `;
  999 +}
  1000 +
  1001 +function renderTimingBars(stageTimings) {
  1002 + if (!stageTimings || typeof stageTimings !== 'object') {
  1003 + return '';
  1004 + }
  1005 + const orderedStages = [
  1006 + 'query_parsing',
  1007 + 'query_building',
  1008 + 'elasticsearch_search_primary',
  1009 + 'coarse_ranking',
  1010 + 'style_sku_prepare_hits',
  1011 + 'fine_ranking',
  1012 + 'reranking',
  1013 + 'elasticsearch_page_fill',
  1014 + 'result_processing',
  1015 + 'total_search',
  1016 + ];
  1017 + const entries = Object.entries(stageTimings)
  1018 + .sort((a, b) => {
  1019 + const ai = orderedStages.indexOf(a[0]);
  1020 + const bi = orderedStages.indexOf(b[0]);
  1021 + return (ai === -1 ? 999 : ai) - (bi === -1 ? 999 : bi);
  1022 + });
  1023 + const total = Number(stageTimings.total_search || 0);
  1024 + return `
  1025 + <div class="debug-timing-list">
  1026 + ${entries.map(([stage, duration]) => {
  1027 + const numeric = Number(duration) || 0;
  1028 + const width = total > 0 ? Math.max(2, Math.round((numeric / total) * 100)) : 2;
  1029 + return `
  1030 + <div class="debug-timing-row">
  1031 + <div class="debug-timing-label">${escapeHtml(stage)}</div>
  1032 + <div class="debug-timing-bar-wrap"><div class="debug-timing-bar" style="width:${width}%"></div></div>
  1033 + <div class="debug-timing-value">${numeric.toFixed(2)}ms</div>
  1034 + </div>
  1035 + `;
  1036 + }).join('')}
  1037 + </div>
  1038 + `;
  1039 +}
  1040 +
  1041 +function buildGlobalFunnelHtml(data, debugInfo) {
  1042 + const queryAnalysis = debugInfo.query_analysis || {};
  1043 + const searchParams = debugInfo.search_params || {};
  1044 + const featureFlags = debugInfo.feature_flags || {};
  1045 + const esResponse = debugInfo.es_response || {};
  1046 + const esQueryContext = debugInfo.es_query_context || {};
  1047 + const rankingFunnel = debugInfo.ranking_funnel || {};
  1048 + const coarseInfo = rankingFunnel.coarse_rank || debugInfo.coarse_rank || {};
  1049 + const fineInfo = rankingFunnel.fine_rank || debugInfo.fine_rank || {};
  1050 + const rerankInfo = rankingFunnel.rerank || debugInfo.rerank || {};
  1051 + const translations = queryAnalysis.translations || {};
  1052 +
  1053 + const summaryHtml = `
  1054 + <div class="debug-section-block">
  1055 + <div class="debug-section-title">Query Context</div>
  1056 + ${renderMetricList([
  1057 + { label: 'original_query', value: queryAnalysis.original_query || 'N/A' },
  1058 + { label: 'rewritten_query', value: queryAnalysis.rewritten_query || 'N/A' },
  1059 + { label: 'detected_language', value: queryAnalysis.detected_language || 'N/A' },
  1060 + { label: 'index_languages', value: (queryAnalysis.index_languages || []).join(', ') || 'N/A' },
  1061 + { label: 'query_tokens', value: (queryAnalysis.query_tokens || []).join(', ') || 'N/A' },
  1062 + { label: 'translation_enabled', value: featureFlags.translation_enabled ? 'enabled' : 'disabled' },
  1063 + { label: 'embedding_enabled', value: featureFlags.embedding_enabled ? 'enabled' : 'disabled' },
  1064 + { label: 'style_intent_active', value: featureFlags.style_intent_active ? 'yes' : 'no' },
  1065 + ])}
  1066 + ${Object.keys(translations).length ? renderJsonDetails('Translations', translations, true) : ''}
  1067 + ${formatIntentDetectionHtml(queryAnalysis.intent_detection ?? queryAnalysis.style_intent_profile)}
  1068 + </div>
  1069 + `;
  1070 +
  1071 + const funnelHtml = `
  1072 + <div class="debug-section-block">
  1073 + <div class="debug-section-title">Ranking Funnel</div>
  1074 + <div class="debug-stage-grid">
  1075 + ${buildStageCard('ES Recall', 'First-pass retrieval', [
  1076 + { label: 'fetch_from', value: searchParams.es_fetch_from ?? 0 },
  1077 + { label: 'fetch_size', value: searchParams.es_fetch_size ?? 'N/A' },
  1078 + { label: 'total_hits', value: esResponse.total_hits ?? 'N/A' },
  1079 + { label: 'es_took_ms', value: esResponse.took_ms ?? 'N/A' },
  1080 + { label: 'include_named_queries_score', value: esQueryContext.include_named_queries_score ? 'yes' : 'no' },
  1081 + ])}
  1082 + ${buildStageCard('Coarse Rank', 'Lexical + vector fusion only', [
  1083 + { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' },
  1084 + { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' },
  1085 + { label: 'formula', value: 'text x knn' },
  1086 + ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')}
  1087 + ${buildStageCard('Fine Rank', 'Lightweight reranker', [
  1088 + { label: 'service_url', value: fineInfo.service_url || 'N/A' },
  1089 + { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' },
  1090 + { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' },
  1091 + { label: 'top_n', value: fineInfo.top_n ?? 'N/A' },
  1092 + { label: 'backend', value: fineInfo.backend || 'N/A' },
  1093 + { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' },
  1094 + { label: 'query_template', value: fineInfo.query_template || 'N/A' },
  1095 + ], fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')}
  1096 + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
  1097 + { label: 'service_url', value: rerankInfo.service_url || 'N/A' },
  1098 + { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' },
  1099 + { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' },
  1100 + { label: 'top_n', value: rerankInfo.top_n ?? 'N/A' },
  1101 + { label: 'backend', value: rerankInfo.backend || 'N/A' },
  1102 + { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' },
  1103 + { label: 'query_template', value: rerankInfo.query_template || 'N/A' },
  1104 + ], `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)}
  1105 + ${buildStageCard('Page Return', 'Final slice returned to UI', [
  1106 + { label: 'from', value: searchParams.from_ ?? 0 },
  1107 + { label: 'size', value: searchParams.size ?? 'N/A' },
  1108 + { label: 'returned', value: (data.results || []).length },
  1109 + { label: 'max_score', value: formatDebugNumber(esResponse.max_score, 3) },
  1110 + ])}
  1111 + </div>
  1112 + </div>
  1113 + `;
  1114 +
  1115 + const timingHtml = `
  1116 + <div class="debug-section-block">
  1117 + <div class="debug-section-title">Timing Breakdown</div>
  1118 + ${renderTimingBars(debugInfo.stage_timings)}
  1119 + </div>
  1120 + `;
  1121 +
  1122 + const rawPayloadHtml = `
  1123 + <div class="debug-section-block">
  1124 + <div class="debug-section-title">Raw Payloads</div>
  1125 + ${renderJsonDetails('ES Query DSL', debugInfo.es_query, false)}
  1126 + ${renderJsonDetails('ES Query Context', debugInfo.es_query_context, false)}
  1127 + ${renderJsonDetails('Search Params', debugInfo.search_params, false)}
  1128 + </div>
  1129 + `;
  1130 +
  1131 + return `
  1132 + <div class="debug-panel">
  1133 + ${summaryHtml}
  1134 + ${funnelHtml}
  1135 + ${timingHtml}
  1136 + ${rawPayloadHtml}
  1137 + </div>
  1138 + `;
  1139 +}
  1140 +
922 // Display debug info 1141 // Display debug info
923 function displayDebugInfo(data) { 1142 function displayDebugInfo(data) {
924 const debugInfoDiv = document.getElementById('debugInfo'); 1143 const debugInfoDiv = document.getElementById('debugInfo');
925 - 1144 +
926 if (!state.debug || !data.debug_info) { 1145 if (!state.debug || !data.debug_info) {
927 - // If debug mode is off or no debug info, show basic query info  
928 if (data.query_info) { 1146 if (data.query_info) {
929 - let html = '<div style="padding: 10px;">';  
930 - html += `<div><strong>original_query:</strong> ${escapeHtml(data.query_info.original_query || 'N/A')}</div>`;  
931 - html += `<div><strong>detected_language:</strong> ${escapeHtml(data.query_info.detected_language || 'N/A')}</div>`;  
932 - html += '</div>';  
933 - debugInfoDiv.innerHTML = html; 1147 + debugInfoDiv.innerHTML = `
  1148 + <div class="debug-panel">
  1149 + <div class="debug-section-block">
  1150 + <div class="debug-section-title">Query Context</div>
  1151 + ${renderMetricList([
  1152 + { label: 'original_query', value: data.query_info.original_query || 'N/A' },
  1153 + { label: 'detected_language', value: data.query_info.detected_language || 'N/A' },
  1154 + ])}
  1155 + </div>
  1156 + </div>
  1157 + `;
934 } else { 1158 } else {
935 debugInfoDiv.innerHTML = ''; 1159 debugInfoDiv.innerHTML = '';
936 } 1160 }
937 return; 1161 return;
938 } 1162 }
939 -  
940 - // Display comprehensive debug info when debug mode is on  
941 - const debugInfo = data.debug_info;  
942 - let html = '<div style="padding: 10px; font-family: monospace; font-size: 12px;">';  
943 -  
944 - // Query Analysis  
945 - if (debugInfo.query_analysis) {  
946 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Query Analysis:</strong>';  
947 - html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`;  
948 - html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`;  
949 - html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`;  
950 - html += `<div>detected_language: ${escapeHtml(debugInfo.query_analysis.detected_language || 'N/A')}</div>`;  
951 - html += `<div>index_languages: ${escapeHtml((debugInfo.query_analysis.index_languages || []).join(', ') || 'N/A')}</div>`;  
952 - html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`;  
953 -  
954 - if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) {  
955 - html += '<div>translations: ';  
956 - for (const [lang, translation] of Object.entries(debugInfo.query_analysis.translations)) {  
957 - if (translation) {  
958 - html += `${lang}: ${escapeHtml(translation)}; `;  
959 - }  
960 - }  
961 - html += '</div>';  
962 - }  
963 -  
964 - if (debugInfo.query_analysis.boolean_ast) {  
965 - html += `<div>boolean_ast: ${escapeHtml(debugInfo.query_analysis.boolean_ast)}</div>`;  
966 - }  
967 1163
968 - const intentPayload = debugInfo.query_analysis.intent_detection ?? debugInfo.query_analysis.style_intent_profile;  
969 - html += formatIntentDetectionHtml(intentPayload);  
970 -  
971 - html += '</div>';  
972 - }  
973 -  
974 - // Feature Flags  
975 - if (debugInfo.feature_flags) {  
976 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Feature Flags:</strong>';  
977 - html += `<div>translation_enabled: ${debugInfo.feature_flags.translation_enabled ? 'enabled' : 'disabled'}</div>`;  
978 - html += `<div>embedding_enabled: ${debugInfo.feature_flags.embedding_enabled ? 'enabled' : 'disabled'}</div>`;  
979 - html += `<div>rerank_enabled: ${debugInfo.feature_flags.rerank_enabled ? 'enabled' : 'disabled'}</div>`;  
980 - if (debugInfo.feature_flags.style_intent_enabled !== undefined) {  
981 - html += `<div>style_intent_enabled: ${debugInfo.feature_flags.style_intent_enabled ? 'enabled' : 'disabled'}</div>`;  
982 - }  
983 - if (debugInfo.feature_flags.style_intent_active !== undefined) {  
984 - html += `<div>style_intent_active: ${debugInfo.feature_flags.style_intent_active ? 'yes' : 'no'}</div>`;  
985 - }  
986 - html += '</div>';  
987 - }  
988 -  
989 - // ES Response  
990 - if (debugInfo.es_response) {  
991 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Response:</strong>';  
992 - html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`;  
993 - html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`;  
994 - html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`;  
995 - html += `<div>es_score_normalization_factor: ${escapeHtml(String(debugInfo.es_response.es_score_normalization_factor ?? ''))}</div>`;  
996 - html += '</div>';  
997 - }  
998 -  
999 - if (debugInfo.rerank) {  
1000 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Rerank:</strong>';  
1001 - html += `<div>query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}</div>`;  
1002 - html += `<div>doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}</div>`;  
1003 - html += `<div>query_text: ${escapeHtml(debugInfo.rerank.query_text || 'N/A')}</div>`;  
1004 - html += `<div>docs: ${escapeHtml(String(debugInfo.rerank.docs ?? ''))}</div>`;  
1005 - html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`;  
1006 - if (debugInfo.rerank.fusion) {  
1007 - html += '<div>fusion:</div>';  
1008 - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 160px;">${escapeHtml(customStringify(debugInfo.rerank.fusion))}</pre>`;  
1009 - }  
1010 - html += '</div>';  
1011 - }  
1012 -  
1013 - // Stage Timings  
1014 - if (debugInfo.stage_timings) {  
1015 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Stage Timings:</strong>';  
1016 - const bounds = debugInfo.stage_time_bounds_ms || {};  
1017 - for (const [stage, duration] of Object.entries(debugInfo.stage_timings)) {  
1018 - const b = bounds[stage];  
1019 - if (b && b.start_unix_ms != null && b.end_unix_ms != null) {  
1020 - html += `<div>${stage}: ${Number(duration).toFixed(2)}ms <span style="color:#666">(start ${b.start_unix_ms} → end ${b.end_unix_ms} unix ms)</span></div>`;  
1021 - } else {  
1022 - html += `<div>${stage}: ${Number(duration).toFixed(2)}ms</div>`;  
1023 - }  
1024 - }  
1025 - html += '</div>';  
1026 - }  
1027 -  
1028 - // ES Query  
1029 - if (debugInfo.es_query) {  
1030 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query DSL:</strong>';  
1031 - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 400px;">${escapeHtml(customStringify(debugInfo.es_query))}</pre>`;  
1032 - html += '</div>';  
1033 - }  
1034 -  
1035 - if (debugInfo.es_query_context) {  
1036 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query Context:</strong>';  
1037 - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 240px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`;  
1038 - html += '</div>';  
1039 - }  
1040 -  
1041 - html += '</div>';  
1042 - debugInfoDiv.innerHTML = html; 1164 + debugInfoDiv.innerHTML = buildGlobalFunnelHtml(data, data.debug_info);
1043 } 1165 }
1044 1166
1045 // Custom JSON stringify that compresses numeric arrays (like embeddings) to single line 1167 // Custom JSON stringify that compresses numeric arrays (like embeddings) to single line
@@ -1070,4 +1192,3 @@ function formatDate(dateStr) { @@ -1070,4 +1192,3 @@ function formatDate(dateStr) {
1070 return dateStr; 1192 return dateStr;
1071 } 1193 }
1072 } 1194 }
1073 -  
providers/rerank.py
@@ -57,7 +57,7 @@ class HttpRerankProvider: @@ -57,7 +57,7 @@ class HttpRerankProvider:
57 return None, None 57 return None, None
58 58
59 59
60 -def create_rerank_provider() -> HttpRerankProvider: 60 +def create_rerank_provider(service_profile: Optional[str] = None) -> HttpRerankProvider:
61 """Create rerank provider from services config.""" 61 """Create rerank provider from services config."""
62 cfg = get_rerank_config() 62 cfg = get_rerank_config()
63 provider = (cfg.provider or "http").strip().lower() 63 provider = (cfg.provider or "http").strip().lower()
@@ -65,5 +65,5 @@ def create_rerank_provider() -&gt; HttpRerankProvider: @@ -65,5 +65,5 @@ def create_rerank_provider() -&gt; HttpRerankProvider:
65 if provider != "http": 65 if provider != "http":
66 raise ValueError(f"Unsupported rerank provider: {provider}") 66 raise ValueError(f"Unsupported rerank provider: {provider}")
67 67
68 - url = get_rerank_service_url() 68 + url = get_rerank_service_url(profile=service_profile)
69 return HttpRerankProvider(service_url=url) 69 return HttpRerankProvider(service_url=url)
@@ -2,6 +2,7 @@ @@ -2,6 +2,7 @@
2 2
3 from .language_detector import LanguageDetector 3 from .language_detector import LanguageDetector
4 from .query_rewriter import QueryRewriter, QueryNormalizer 4 from .query_rewriter import QueryRewriter, QueryNormalizer
  5 +from .keyword_extractor import KEYWORDS_QUERY_BASE_KEY
5 from .query_parser import QueryParser, ParsedQuery 6 from .query_parser import QueryParser, ParsedQuery
6 7
7 __all__ = [ 8 __all__ = [
@@ -10,4 +11,5 @@ __all__ = [ @@ -10,4 +11,5 @@ __all__ = [
10 'QueryNormalizer', 11 'QueryNormalizer',
11 'QueryParser', 12 'QueryParser',
12 'ParsedQuery', 13 'ParsedQuery',
  14 + 'KEYWORDS_QUERY_BASE_KEY',
13 ] 15 ]
query/keyword_extractor.py 0 → 100644
@@ -0,0 +1,86 @@ @@ -0,0 +1,86 @@
  1 +"""
  2 +HanLP-based noun keyword string for lexical constraints (token POS starts with N, length >= 2).
  3 +
  4 +``ParsedQuery.keywords_queries`` uses the same key layout as text variants:
  5 +``KEYWORDS_QUERY_BASE_KEY`` for the rewritten source query, and ISO-like language
  6 +codes for each ``ParsedQuery.translations`` entry (non-empty extractions only).
  7 +"""
  8 +
  9 +from __future__ import annotations
  10 +
  11 +import logging
  12 +from typing import Any, Dict, List, Optional
  13 +
  14 +logger = logging.getLogger(__name__)
  15 +
  16 +import hanlp # type: ignore
  17 +
  18 +# Aligns with ``rewritten_query`` / ES ``base_query`` (not a language code).
  19 +KEYWORDS_QUERY_BASE_KEY = "base"
  20 +
  21 +
  22 +class KeywordExtractor:
  23 + """基于 HanLP 的名词关键词提取器(与分词位置对齐,非连续名词间插入空格)。"""
  24 +
  25 + def __init__(
  26 + self,
  27 + tokenizer: Optional[Any] = None,
  28 + *,
  29 + ignore_keywords: Optional[List[str]] = None,
  30 + ):
  31 + if tokenizer is not None:
  32 + self.tok = tokenizer
  33 + else:
  34 + self.tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF)
  35 + self.tok.config.output_spans = True
  36 + self.pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)
  37 + self.ignore_keywords = frozenset(ignore_keywords or ["玩具"])
  38 +
  39 + def extract_keywords(self, query: str) -> str:
  40 + """
  41 + 从查询中提取关键词(名词,长度 ≥ 2),以空格分隔非连续片段。
  42 + """
  43 + query = (query or "").strip()
  44 + if not query:
  45 + return ""
  46 + tok_result_with_position = self.tok(query)
  47 + tok_result = [x[0] for x in tok_result_with_position]
  48 + if not tok_result:
  49 + return ""
  50 + pos_tags = self.pos_tag(tok_result)
  51 + pos_tag_result = list(zip(tok_result, pos_tags))
  52 + keywords: List[str] = []
  53 + last_end_pos = 0
  54 + for (word, postag), (_, start_pos, end_pos) in zip(pos_tag_result, tok_result_with_position):
  55 + if len(word) >= 2 and str(postag).startswith("N"):
  56 + if word in self.ignore_keywords:
  57 + continue
  58 + if start_pos != last_end_pos and keywords:
  59 + keywords.append(" ")
  60 + keywords.append(word)
  61 + last_end_pos = end_pos
  62 + return "".join(keywords).strip()
  63 +
  64 +
  65 +def collect_keywords_queries(
  66 + extractor: KeywordExtractor,
  67 + rewritten_query: str,
  68 + translations: Dict[str, str],
  69 +) -> Dict[str, str]:
  70 + """
  71 + Build the keyword map for all lexical variants (base + translations).
  72 +
  73 + Omits entries when extraction yields an empty string.
  74 + """
  75 + out: Dict[str, str] = {}
  76 + base_kw = extractor.extract_keywords(rewritten_query)
  77 + if base_kw:
  78 + out[KEYWORDS_QUERY_BASE_KEY] = base_kw
  79 + for lang, text in translations.items():
  80 + lang_key = str(lang or "").strip().lower()
  81 + if not lang_key or not (text or "").strip():
  82 + continue
  83 + kw = extractor.extract_keywords(text)
  84 + if kw:
  85 + out[lang_key] = kw
  86 + return out
query/query_parser.py
@@ -27,6 +27,7 @@ from .product_title_exclusion import ( @@ -27,6 +27,7 @@ from .product_title_exclusion import (
27 from .query_rewriter import QueryRewriter, QueryNormalizer 27 from .query_rewriter import QueryRewriter, QueryNormalizer
28 from .style_intent import StyleIntentDetector, StyleIntentProfile, StyleIntentRegistry 28 from .style_intent import StyleIntentDetector, StyleIntentProfile, StyleIntentRegistry
29 from .tokenization import extract_token_strings, simple_tokenize_query 29 from .tokenization import extract_token_strings, simple_tokenize_query
  30 +from .keyword_extractor import KeywordExtractor, collect_keywords_queries
30 31
31 logger = logging.getLogger(__name__) 32 logger = logging.getLogger(__name__)
32 33
@@ -59,7 +60,14 @@ def rerank_query_text( @@ -59,7 +60,14 @@ def rerank_query_text(
59 60
60 @dataclass(slots=True) 61 @dataclass(slots=True)
61 class ParsedQuery: 62 class ParsedQuery:
62 - """Container for query parser facts.""" 63 + """
  64 + Container for query parser facts.
  65 +
  66 + ``keywords_queries`` parallels text variants: key ``base`` (see
  67 + ``keyword_extractor.KEYWORDS_QUERY_BASE_KEY``) for ``rewritten_query``,
  68 + and the same language codes as ``translations`` for each translated string.
  69 + Entries with no extracted nouns are omitted.
  70 + """
63 71
64 original_query: str 72 original_query: str
65 query_normalized: str 73 query_normalized: str
@@ -69,6 +77,7 @@ class ParsedQuery: @@ -69,6 +77,7 @@ class ParsedQuery:
69 query_vector: Optional[np.ndarray] = None 77 query_vector: Optional[np.ndarray] = None
70 image_query_vector: Optional[np.ndarray] = None 78 image_query_vector: Optional[np.ndarray] = None
71 query_tokens: List[str] = field(default_factory=list) 79 query_tokens: List[str] = field(default_factory=list)
  80 + keywords_queries: Dict[str, str] = field(default_factory=dict)
72 style_intent_profile: Optional[StyleIntentProfile] = None 81 style_intent_profile: Optional[StyleIntentProfile] = None
73 product_title_exclusion_profile: Optional[ProductTitleExclusionProfile] = None 82 product_title_exclusion_profile: Optional[ProductTitleExclusionProfile] = None
74 83
@@ -91,6 +100,7 @@ class ParsedQuery: @@ -91,6 +100,7 @@ class ParsedQuery:
91 "has_query_vector": self.query_vector is not None, 100 "has_query_vector": self.query_vector is not None,
92 "has_image_query_vector": self.image_query_vector is not None, 101 "has_image_query_vector": self.image_query_vector is not None,
93 "query_tokens": self.query_tokens, 102 "query_tokens": self.query_tokens,
  103 + "keywords_queries": dict(self.keywords_queries),
94 "style_intent_profile": ( 104 "style_intent_profile": (
95 self.style_intent_profile.to_dict() if self.style_intent_profile is not None else None 105 self.style_intent_profile.to_dict() if self.style_intent_profile is not None else None
96 ), 106 ),
@@ -138,6 +148,7 @@ class QueryParser: @@ -138,6 +148,7 @@ class QueryParser:
138 self.language_detector = LanguageDetector() 148 self.language_detector = LanguageDetector()
139 self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary) 149 self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary)
140 self._tokenizer = tokenizer or self._build_tokenizer() 150 self._tokenizer = tokenizer or self._build_tokenizer()
  151 + self._keyword_extractor = KeywordExtractor(tokenizer=self._tokenizer)
141 self.style_intent_registry = StyleIntentRegistry.from_query_config(config.query_config) 152 self.style_intent_registry = StyleIntentRegistry.from_query_config(config.query_config)
142 self.style_intent_detector = StyleIntentDetector( 153 self.style_intent_detector = StyleIntentDetector(
143 self.style_intent_registry, 154 self.style_intent_registry,
@@ -523,6 +534,16 @@ class QueryParser: @@ -523,6 +534,16 @@ class QueryParser:
523 if translations and context: 534 if translations and context:
524 context.store_intermediate_result("translations", translations) 535 context.store_intermediate_result("translations", translations)
525 536
  537 + keywords_queries: Dict[str, str] = {}
  538 + try:
  539 + keywords_queries = collect_keywords_queries(
  540 + self._keyword_extractor,
  541 + query_text,
  542 + translations,
  543 + )
  544 + except Exception as e:
  545 + log_info(f"Keyword extraction failed | Error: {e}")
  546 +
526 # Build result 547 # Build result
527 base_result = ParsedQuery( 548 base_result = ParsedQuery(
528 original_query=query, 549 original_query=query,
@@ -533,6 +554,7 @@ class QueryParser: @@ -533,6 +554,7 @@ class QueryParser:
533 query_vector=query_vector, 554 query_vector=query_vector,
534 image_query_vector=image_query_vector, 555 image_query_vector=image_query_vector,
535 query_tokens=query_tokens, 556 query_tokens=query_tokens,
  557 + keywords_queries=keywords_queries,
536 ) 558 )
537 style_intent_profile = self.style_intent_detector.detect(base_result) 559 style_intent_profile = self.style_intent_detector.detect(base_result)
538 product_title_exclusion_profile = self.product_title_exclusion_detector.detect(base_result) 560 product_title_exclusion_profile = self.product_title_exclusion_detector.detect(base_result)
@@ -555,6 +577,7 @@ class QueryParser: @@ -555,6 +577,7 @@ class QueryParser:
555 query_vector=query_vector, 577 query_vector=query_vector,
556 image_query_vector=image_query_vector, 578 image_query_vector=image_query_vector,
557 query_tokens=query_tokens, 579 query_tokens=query_tokens,
  580 + keywords_queries=keywords_queries,
558 style_intent_profile=style_intent_profile, 581 style_intent_profile=style_intent_profile,
559 product_title_exclusion_profile=product_title_exclusion_profile, 582 product_title_exclusion_profile=product_title_exclusion_profile,
560 ) 583 )
reranker/README.md
@@ -71,7 +71,7 @@ Reranker 服务提供统一的 `/rerank` API,支持可插拔后端(BGE、Jin @@ -71,7 +71,7 @@ Reranker 服务提供统一的 `/rerank` API,支持可插拔后端(BGE、Jin
71 - `qwen3_transformers_packed` -> `.venv-reranker-transformers-packed` 71 - `qwen3_transformers_packed` -> `.venv-reranker-transformers-packed`
72 - `qwen3_gguf` -> `.venv-reranker-gguf` 72 - `qwen3_gguf` -> `.venv-reranker-gguf`
73 - `qwen3_gguf_06b` -> `.venv-reranker-gguf-06b` 73 - `qwen3_gguf_06b` -> `.venv-reranker-gguf-06b`
74 -- `bge` -> `.venv-reranker-bge` 74 +- `bge` -> `.venv-reranker`
75 - `dashscope_rerank` -> `.venv-reranker-dashscope` 75 - `dashscope_rerank` -> `.venv-reranker-dashscope`
76 76
77 77
reranker/config.py
@@ -2,19 +2,29 @@ @@ -2,19 +2,29 @@
2 2
3 from __future__ import annotations 3 from __future__ import annotations
4 4
  5 +import os
  6 +
5 from config.loader import get_app_config 7 from config.loader import get_app_config
6 8
7 9
8 class RerankerConfig(object): 10 class RerankerConfig(object):
9 def __init__(self) -> None: 11 def __init__(self) -> None:
10 app_config = get_app_config() 12 app_config = get_app_config()
11 - runtime = app_config.runtime  
12 service = app_config.services.rerank 13 service = app_config.services.rerank
13 - backend = service.get_backend_config() 14 + instance_name = str(os.getenv("RERANK_INSTANCE") or service.default_instance).strip() or service.default_instance
  15 + instance = service.get_instance(instance_name)
  16 + backend = service.get_backend_config(instance_name)
14 request = service.request 17 request = service.request
15 18
16 - self.HOST = runtime.reranker_host  
17 - self.PORT = runtime.reranker_port 19 + self.INSTANCE = instance_name
  20 + self.HOST = str(os.getenv("RERANKER_HOST") or instance.host)
  21 + self.PORT = int(os.getenv("RERANKER_PORT") or instance.port)
  22 + self.BACKEND = str(os.getenv("RERANK_BACKEND") or instance.backend)
  23 + self.RUNTIME_DIR = str(
  24 + os.getenv("RERANKER_RUNTIME_DIR")
  25 + or instance.runtime_dir
  26 + or f"./.runtime/reranker/{instance_name}"
  27 + )
18 28
19 self.MODEL_NAME = str(backend.get("model_name") or "Qwen/Qwen3-Reranker-0.6B") 29 self.MODEL_NAME = str(backend.get("model_name") or "Qwen/Qwen3-Reranker-0.6B")
20 self.DEVICE = backend.get("device") 30 self.DEVICE = backend.get("device")
reranker/server.py
@@ -6,7 +6,7 @@ POST /rerank @@ -6,7 +6,7 @@ POST /rerank
6 Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool } 6 Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
7 Response: { "scores": [float], "meta": {...} } 7 Response: { "scores": [float], "meta": {...} }
8 8
9 -Backend selected via config: services.rerank.backend 9 +Backend selected via config: services.rerank.instances.<name>.backend
10 (bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND. 10 (bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND.
11 """ 11 """
12 12
@@ -76,14 +76,15 @@ class RerankResponse(BaseModel): @@ -76,14 +76,15 @@ class RerankResponse(BaseModel):
76 @app.on_event("startup") 76 @app.on_event("startup")
77 def load_model() -> None: 77 def load_model() -> None:
78 global _reranker, _backend_name 78 global _reranker, _backend_name
79 - logger.info("Starting reranker service on port %s", CONFIG.PORT) 79 + logger.info("Starting reranker service | instance=%s port=%s", CONFIG.INSTANCE, CONFIG.PORT)
80 try: 80 try:
81 - backend_name, backend_cfg = get_rerank_backend_config() 81 + backend_name, backend_cfg = get_rerank_backend_config(CONFIG.INSTANCE)
82 _backend_name = backend_name 82 _backend_name = backend_name
83 _reranker = get_rerank_backend(backend_name, backend_cfg) 83 _reranker = get_rerank_backend(backend_name, backend_cfg)
84 model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name) 84 model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
85 logger.info( 85 logger.info(
86 - "Reranker ready | backend=%s model=%s", 86 + "Reranker ready | instance=%s backend=%s model=%s",
  87 + CONFIG.INSTANCE,
87 _backend_name, 88 _backend_name,
88 model_info, 89 model_info,
89 ) 90 )
@@ -101,6 +102,7 @@ def health() -&gt; Dict[str, Any]: @@ -101,6 +102,7 @@ def health() -&gt; Dict[str, Any]:
101 ).get("model_name", _backend_name) 102 ).get("model_name", _backend_name)
102 payload: Dict[str, Any] = { 103 payload: Dict[str, Any] = {
103 "status": "ok" if _reranker is not None else "unavailable", 104 "status": "ok" if _reranker is not None else "unavailable",
  105 + "instance": CONFIG.INSTANCE,
104 "model_loaded": _reranker is not None, 106 "model_loaded": _reranker is not None,
105 "model": model_info, 107 "model": model_info,
106 "backend": _backend_name, 108 "backend": _backend_name,
scripts/experiments/english_query_bucketing_demo.py 0 → 100644
@@ -0,0 +1,554 @@ @@ -0,0 +1,554 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +Offline experiment: English query bucketing (intersection / boost / drop).
  4 +
  5 +Scheme A: spaCy noun_chunks + head + lemma + rule buckets
  6 +Scheme B: spaCy NP candidates + KeyBERT rerank → intersection vs boost
  7 +Scheme C: YAKE + spaCy noun/POS filter
  8 +
  9 +Run (after deps): python scripts/experiments/english_query_bucketing_demo.py
  10 +Optional: pip install -r scripts/experiments/requirements_query_bucketing_experiments.txt
  11 +"""
  12 +
  13 +from __future__ import annotations
  14 +
  15 +import argparse
  16 +import json
  17 +import re
  18 +import sys
  19 +from dataclasses import dataclass, field
  20 +from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
  21 +
  22 +
  23 +# --- shared -----------------------------------------------------------------
  24 +
  25 +_POSSESSIVE_RE = re.compile(r"(['’]s)\b", re.IGNORECASE)
  26 +
  27 +
  28 +def normalize_query(s: str) -> str:
  29 + s = (s or "").strip()
  30 + s = _POSSESSIVE_RE.sub("", s)
  31 + return s
  32 +
  33 +
  34 +@dataclass
  35 +class BucketResult:
  36 + intersection_terms: List[str] = field(default_factory=list)
  37 + boost_terms: List[str] = field(default_factory=list)
  38 + drop_terms: List[str] = field(default_factory=list)
  39 +
  40 + def to_dict(self) -> Dict[str, Any]:
  41 + return {
  42 + "intersection_terms": self.intersection_terms,
  43 + "boost_terms": self.boost_terms,
  44 + "drop_terms": self.drop_terms,
  45 + }
  46 +
  47 +
  48 +def _dedupe_preserve(seq: Sequence[str]) -> List[str]:
  49 + seen: Set[str] = set()
  50 + out: List[str] = []
  51 + for x in seq:
  52 + k = x.strip().lower()
  53 + if not k or k in seen:
  54 + continue
  55 + seen.add(k)
  56 + out.append(x.strip())
  57 + return out
  58 +
  59 +
  60 +# --- Scheme A: spaCy + rules -------------------------------------------------
  61 +
  62 +WEAK_BOOST_ADJS = frozenset(
  63 + {
  64 + "best",
  65 + "good",
  66 + "great",
  67 + "new",
  68 + "free",
  69 + "cheap",
  70 + "top",
  71 + "fine",
  72 + "real",
  73 + }
  74 +)
  75 +
  76 +FUNCTIONAL_DEP = frozenset(
  77 + {
  78 + "det",
  79 + "aux",
  80 + "auxpass",
  81 + "prep",
  82 + "mark",
  83 + "expl",
  84 + "cc",
  85 + "punct",
  86 + "case",
  87 + }
  88 +)
  89 +
  90 +# Second pobj under list-like INTJ roots often encodes audience/size (boost, not must-match).
  91 +_DEMOGRAPHIC_NOUNS = frozenset(
  92 + {
  93 + "women",
  94 + "woman",
  95 + "men",
  96 + "man",
  97 + "kids",
  98 + "kid",
  99 + "boys",
  100 + "boy",
  101 + "girls",
  102 + "girl",
  103 + "baby",
  104 + "babies",
  105 + "toddler",
  106 + "adult",
  107 + "adults",
  108 + }
  109 +)
  110 +
  111 +
  112 +def _lemma_lower(t) -> str:
  113 + return ((t.lemma_ or t.text) or "").lower().strip()
  114 +
  115 +
  116 +def _surface_lower(t) -> str:
  117 + """Lowercased surface form (keeps plural 'headphones' vs lemma 'headphone')."""
  118 + return (t.text or "").lower().strip()
  119 +
  120 +
  121 +_PRICE_PREP_LEMMAS = frozenset({"under", "over", "below", "above", "within", "between", "near"})
  122 +
  123 +
  124 +def bucket_scheme_a_spacy(query: str, nlp) -> BucketResult:
  125 + """
  126 + Dependency-first bucketing: noun_chunks alone mis-parse verbal queries like
  127 + "noise cancelling headphones" (ROOT verb). Prefer dobj / ROOT product nouns,
  128 + purpose PP (for …), and brand INTJ/PROPN.
  129 + """
  130 + import spacy # noqa: F401
  131 +
  132 + # Do not strip possessives ('s) before spaCy: it changes the parse tree
  133 + # (e.g. "women's running shoes size 8" vs "women running shoes size 8").
  134 + text = (query or "").strip()
  135 + doc = nlp(text)
  136 + intersection: Set[str] = set()
  137 + boost: Set[str] = set()
  138 + drop: Set[str] = set()
  139 +
  140 + stops = nlp.Defaults.stop_words | WEAK_BOOST_ADJS
  141 +
  142 + def mark_drop(t) -> None:
  143 + if not t.is_space and not t.is_punct:
  144 + drop.add(t.text.lower())
  145 +
  146 + # --- Drops: function words / question words ---
  147 + for token in doc:
  148 + if token.is_space or token.is_punct:
  149 + continue
  150 + lem = _lemma_lower(token)
  151 + if token.pos_ in ("DET", "PRON", "AUX", "ADP", "PART", "SCONJ", "CCONJ"):
  152 + mark_drop(token)
  153 + continue
  154 + if token.dep_ in FUNCTIONAL_DEP:
  155 + mark_drop(token)
  156 + continue
  157 + if token.pos_ == "ADV" and lem in {"where", "how", "when", "why", "what", "which"}:
  158 + mark_drop(token)
  159 + continue
  160 + if token.text.lower() in ("'s", "’s"):
  161 + mark_drop(token)
  162 + continue
  163 + if lem in stops and token.pos_ != "PROPN":
  164 + mark_drop(token)
  165 +
  166 + pobj_heads_to_demote: Set[int] = set()
  167 +
  168 + # Purpose / context: "for airplane travel" → boost phrase; demote bare head from intersection
  169 + for token in doc:
  170 + if token.dep_ == "prep" and token.text.lower() == "for":
  171 + for c in token.children:
  172 + if c.dep_ == "pobj" and c.pos_ in ("NOUN", "PROPN"):
  173 + span = doc[c.left_edge.i : c.right_edge.i + 1]
  174 + phrase = span.text.strip().lower()
  175 + if phrase:
  176 + boost.add(phrase)
  177 + pobj_heads_to_demote.add(c.i)
  178 +
  179 + # Price / range: "under 500 dollars" → boost only
  180 + for token in doc:
  181 + if token.dep_ != "prep" or _lemma_lower(token) not in _PRICE_PREP_LEMMAS:
  182 + continue
  183 + for c in token.children:
  184 + if c.dep_ == "pobj" and c.pos_ in ("NOUN", "PROPN"):
  185 + span = doc[c.left_edge.i : c.right_edge.i + 1]
  186 + phrase = span.text.strip().lower()
  187 + if phrase:
  188 + boost.add(phrase)
  189 + pobj_heads_to_demote.add(c.i)
  190 +
  191 + # Direct object product nouns (handles "noise cancelling … headphones")
  192 + for token in doc:
  193 + if token.dep_ == "dobj" and token.pos_ in ("NOUN", "PROPN"):
  194 + if token.i in pobj_heads_to_demote:
  195 + continue
  196 + intersection.add(_surface_lower(token))
  197 +
  198 + # Copular questions / definitions: "what is the best smartphone …"
  199 + for token in doc:
  200 + if token.dep_ != "nsubj" or token.pos_ not in ("NOUN", "PROPN"):
  201 + continue
  202 + h = token.head
  203 + if h.pos_ == "AUX" and h.dep_ == "ROOT":
  204 + intersection.add(_surface_lower(token))
  205 +
  206 + # Verbal ROOT: modifiers left of dobj → boost phrase (e.g. "noise cancelling")
  207 + roots = [t for t in doc if t.dep_ == "ROOT"]
  208 + if roots and roots[0].pos_ == "VERB":
  209 + root_v = roots[0]
  210 + for t in doc:
  211 + if t.dep_ != "dobj" or t.pos_ not in ("NOUN", "PROPN"):
  212 + continue
  213 + if t.i in pobj_heads_to_demote:
  214 + continue
  215 + parts: List[str] = []
  216 + for x in doc[: t.i]:
  217 + if x.is_punct or x.is_space:
  218 + continue
  219 + if x.pos_ in ("DET", "ADP", "PRON"):
  220 + continue
  221 + xl = _lemma_lower(x)
  222 + if xl in stops:
  223 + continue
  224 + parts.append(x.text.lower())
  225 + if len(parts) >= 1:
  226 + boost.add(" ".join(parts))
  227 +
  228 + # Brand / query lead: INTJ/PROPN ROOT (e.g. Nike …)
  229 + for token in doc:
  230 + if token.dep_ == "ROOT" and token.pos_ in ("INTJ", "PROPN"):
  231 + intersection.add(_surface_lower(token))
  232 + if token.pos_ == "PROPN":
  233 + intersection.add(_surface_lower(token))
  234 +
  235 + _DIMENSION_ROOTS = frozenset({"size", "width", "length", "height", "weight"})
  236 +
  237 + # "women's running shoes size 8" → shoes ∩, "size 8" boost (not size alone)
  238 + for token in doc:
  239 + if token.dep_ != "ROOT" or token.pos_ != "NOUN":
  240 + continue
  241 + if _lemma_lower(token) not in _DIMENSION_ROOTS:
  242 + continue
  243 + for c in token.children:
  244 + if c.dep_ == "nsubj" and c.pos_ in ("NOUN", "PROPN"):
  245 + intersection.add(_surface_lower(c))
  246 + for ch in c.children:
  247 + if ch.dep_ == "compound" and ch.pos_ in ("NOUN", "VERB", "ADJ"):
  248 + boost.add(_surface_lower(ch))
  249 + # Only the dimension head + numbers (not full subtree: left_edge/right_edge is huge)
  250 + dim_parts = [token.text.lower()]
  251 + for ch in token.children:
  252 + if ch.dep_ == "nummod":
  253 + dim_parts.append(ch.text.lower())
  254 + boost.add(" ".join(dim_parts))
  255 +
  256 + # ROOT noun product (e.g. "plastic toy car")
  257 + for token in doc:
  258 + if token.dep_ == "ROOT" and token.pos_ in ("NOUN", "PROPN"):
  259 + if _lemma_lower(token) in _DIMENSION_ROOTS and any(
  260 + c.dep_ == "nsubj" and c.pos_ in ("NOUN", "PROPN") for c in token.children
  261 + ):
  262 + continue
  263 + intersection.add(_surface_lower(token))
  264 + for c in token.children:
  265 + if c.dep_ == "compound" and c.pos_ == "NOUN":
  266 + boost.add(c.text.lower())
  267 + if token.i - token.left_edge.i >= 1:
  268 + comps = [x.text.lower() for x in doc[token.left_edge.i : token.i] if x.dep_ == "compound"]
  269 + if len(comps) >= 2:
  270 + boost.add(" ".join(comps))
  271 +
  272 + # List-like INTJ head with multiple pobj: first pobj = product head, rest often demographic
  273 + for token in doc:
  274 + if token.dep_ != "ROOT" or token.pos_ not in ("INTJ", "VERB", "NOUN"):
  275 + continue
  276 + pobjs = sorted(
  277 + [c for c in token.children if c.dep_ == "pobj" and c.pos_ in ("NOUN", "PROPN")],
  278 + key=lambda x: x.i,
  279 + )
  280 + if len(pobjs) >= 2 and token.pos_ == "INTJ":
  281 + intersection.add(_surface_lower(pobjs[0]))
  282 + for extra in pobjs[1:]:
  283 + if _lemma_lower(extra) in _DEMOGRAPHIC_NOUNS:
  284 + boost.add(_surface_lower(extra))
  285 + else:
  286 + intersection.add(_surface_lower(extra))
  287 + elif len(pobjs) == 1 and token.pos_ == "INTJ":
  288 + intersection.add(_surface_lower(pobjs[0]))
  289 +
  290 + # amod under pobj (running → shoes)
  291 + for token in doc:
  292 + if token.dep_ == "amod" and token.head.pos_ in ("NOUN", "PROPN"):
  293 + if token.pos_ == "VERB":
  294 + boost.add(_surface_lower(token))
  295 + elif token.pos_ == "ADJ":
  296 + boost.add(_lemma_lower(token))
  297 +
  298 + # Genitive possessor (women's shoes → women boost)
  299 + for token in doc:
  300 + if token.dep_ == "poss" and token.head.pos_ in ("NOUN", "PROPN"):
  301 + boost.add(_surface_lower(token))
  302 +
  303 + # noun_chunks fallback when no dobj/ROOT intersection yet
  304 + if not intersection:
  305 + for chunk in doc.noun_chunks:
  306 + head = chunk.root
  307 + if head.pos_ not in ("NOUN", "PROPN"):
  308 + continue
  309 + # Price / range: "under 500 dollars" → boost, not a product head
  310 + if head.dep_ == "pobj" and head.head.dep_ == "prep":
  311 + prep = head.head
  312 + if _lemma_lower(prep) in _PRICE_PREP_LEMMAS:
  313 + boost.add(chunk.text.strip().lower())
  314 + continue
  315 + hl = _surface_lower(head)
  316 + if hl:
  317 + intersection.add(hl)
  318 + for t in chunk:
  319 + if t == head or t.pos_ != "PROPN":
  320 + continue
  321 + intersection.add(_surface_lower(t))
  322 + for t in chunk:
  323 + if t == head:
  324 + continue
  325 + if t.pos_ == "ADJ" or (t.pos_ == "NOUN" and t.dep_ == "compound"):
  326 + boost.add(_lemma_lower(t))
  327 +
  328 + # Remove demoted pobj heads from intersection (purpose / price clause)
  329 + for i in pobj_heads_to_demote:
  330 + t = doc[i]
  331 + intersection.discard(_lemma_lower(t))
  332 + intersection.discard(_surface_lower(t))
  333 +
  334 + boost -= intersection
  335 + boost = {b for b in boost if b.lower() not in stops and b.strip()}
  336 +
  337 + return BucketResult(
  338 + intersection_terms=_dedupe_preserve(sorted(intersection)),
  339 + boost_terms=_dedupe_preserve(sorted(boost)),
  340 + drop_terms=_dedupe_preserve(sorted(drop)),
  341 + )
  342 +
  343 +
  344 +# --- Scheme B: spaCy candidates + KeyBERT -----------------------------------
  345 +
  346 +def _spacy_np_candidates(doc) -> List[str]:
  347 + phrases: List[str] = []
  348 + for chunk in doc.noun_chunks:
  349 + t = chunk.text.strip()
  350 + if len(t) < 2:
  351 + continue
  352 + root = chunk.root
  353 + if root.pos_ not in ("NOUN", "PROPN"):
  354 + continue
  355 + phrases.append(t)
  356 + return phrases
  357 +
  358 +
  359 +def bucket_scheme_b_keybert(query: str, nlp, kw_model) -> BucketResult:
  360 + text = (query or "").strip()
  361 + doc = nlp(text)
  362 + candidates = _spacy_np_candidates(doc)
  363 + if not candidates:
  364 + candidates = [text]
  365 +
  366 + # KeyBERT API: candidate_keywords=... (sentence-transformers backend)
  367 + try:
  368 + keywords = kw_model.extract_keywords(
  369 + text,
  370 + candidates=candidates,
  371 + top_n=min(8, max(4, len(candidates) + 2)),
  372 + )
  373 + except TypeError:
  374 + keywords = kw_model.extract_keywords(
  375 + text,
  376 + candidate_keywords=candidates,
  377 + top_n=min(8, max(4, len(candidates) + 2)),
  378 + )
  379 + ranked = [k[0].lower().strip() for k in (keywords or []) if k and k[0].strip()]
  380 +
  381 + intersection: List[str] = []
  382 + boost: List[str] = []
  383 + if ranked:
  384 + intersection.append(ranked[0])
  385 + if len(ranked) > 1:
  386 + boost.extend(ranked[1:])
  387 + # Add remaining spaCy heads not in lists
  388 + heads: List[str] = []
  389 + for ch in doc.noun_chunks:
  390 + h = ch.root
  391 + if h.pos_ in ("NOUN", "PROPN"):
  392 + heads.append(_surface_lower(h))
  393 + for h in heads:
  394 + if h and h not in intersection and h not in boost:
  395 + boost.append(h)
  396 + if not intersection and heads:
  397 + intersection.append(heads[0])
  398 + boost = [x for x in boost if x != heads[0]]
  399 +
  400 + drop_tokens: Set[str] = set()
  401 + stops = nlp.Defaults.stop_words | WEAK_BOOST_ADJS
  402 + for token in doc:
  403 + if token.is_punct:
  404 + continue
  405 + lem = (token.lemma_ or token.text).lower()
  406 + if token.pos_ in ("DET", "ADP", "PART", "PRON", "AUX") or lem in stops:
  407 + drop_tokens.add(token.text.lower())
  408 +
  409 + return BucketResult(
  410 + intersection_terms=_dedupe_preserve(intersection),
  411 + boost_terms=_dedupe_preserve(boost),
  412 + drop_terms=sorted(drop_tokens),
  413 + )
  414 +
  415 +
  416 +# --- Scheme C: YAKE + noun filter --------------------------------------------
  417 +
  418 +def bucket_scheme_c_yake(query: str, nlp, yake_extractor) -> BucketResult:
  419 + text = (query or "").strip()
  420 + doc = nlp(text)
  421 +
  422 + kws = yake_extractor.extract_keywords(text) # List[Tuple[str, float]] newest yake API may differ
  423 +
  424 + scored: List[Tuple[str, float]] = []
  425 + if kws and isinstance(kws[0], (list, tuple)) and len(kws[0]) >= 2:
  426 + scored = [(str(a).strip(), float(b)) for a, b in kws]
  427 + else:
  428 + # older yake returns list of tuples (kw, score)
  429 + scored = [(str(x[0]).strip(), float(x[1])) for x in kws]
  430 +
  431 + boost: List[str] = []
  432 + intersection: List[str] = []
  433 + for phrase, _score in sorted(scored, key=lambda x: x[1]): # lower score = more important in YAKE
  434 + phrase = phrase.lower().strip()
  435 + if not phrase or len(phrase) < 2:
  436 + continue
  437 + sub = nlp(phrase)
  438 + keep = False
  439 + head_noun = False
  440 + for t in sub:
  441 + if t.is_punct or t.is_space:
  442 + continue
  443 + if t.pos_ in ("NOUN", "PROPN"):
  444 + keep = True
  445 + if t.dep_ == "ROOT" or t == sub[-1]:
  446 + head_noun = True
  447 + if not keep:
  448 + continue
  449 + # top 1–2 important → intersection (very small)
  450 + if len(intersection) < 2 and head_noun and len(phrase.split()) <= 2:
  451 + intersection.append(phrase)
  452 + else:
  453 + boost.append(phrase)
  454 +
  455 + drop: Set[str] = set()
  456 + stops = nlp.Defaults.stop_words | WEAK_BOOST_ADJS
  457 + for token in doc:
  458 + if token.is_punct:
  459 + continue
  460 + lem = (token.lemma_ or token.text).lower()
  461 + if token.pos_ in ("DET", "ADP", "PART", "PRON", "AUX") or lem in stops:
  462 + drop.add(token.text.lower())
  463 +
  464 + return BucketResult(
  465 + intersection_terms=_dedupe_preserve(intersection),
  466 + boost_terms=_dedupe_preserve(boost),
  467 + drop_terms=sorted(drop),
  468 + )
  469 +
  470 +
  471 +# --- CLI ---------------------------------------------------------------------
  472 +
  473 +DEFAULT_QUERIES = [
  474 + "best noise cancelling headphones for airplane travel",
  475 + "nike running shoes women",
  476 + "plastic toy car",
  477 + "what is the best smartphone under 500 dollars",
  478 + "women's running shoes size 8",
  479 +]
  480 +
  481 +
  482 +def _load_spacy():
  483 + import spacy
  484 +
  485 + try:
  486 + return spacy.load("en_core_web_sm")
  487 + except OSError:
  488 + print(
  489 + "Missing model: run: python -m spacy download en_core_web_sm",
  490 + file=sys.stderr,
  491 + )
  492 + raise
  493 +
  494 +
  495 +def _load_keybert():
  496 + from keybert import KeyBERT
  497 +
  498 + # small & fast for demo; swap for larger if needed
  499 + return KeyBERT(model="paraphrase-MiniLM-L6-v2")
  500 +
  501 +
  502 +def _load_yake():
  503 + import yake
  504 +
  505 + return yake.KeywordExtractor(
  506 + lan="en",
  507 + n=3,
  508 + dedupLim=0.9,
  509 + top=20,
  510 + features=None,
  511 + )
  512 +
  513 +
  514 +def main() -> None:
  515 + parser = argparse.ArgumentParser(description="English query bucketing experiments")
  516 + parser.add_argument(
  517 + "--queries",
  518 + nargs="*",
  519 + default=DEFAULT_QUERIES,
  520 + help="Queries to run (default: built-in examples)",
  521 + )
  522 + parser.add_argument(
  523 + "--scheme",
  524 + choices=("a", "b", "c", "all"),
  525 + default="all",
  526 + )
  527 + args = parser.parse_args()
  528 +
  529 + nlp = _load_spacy()
  530 + kb = None
  531 + yk = None
  532 + if args.scheme in ("b", "all"):
  533 + kb = _load_keybert()
  534 + if args.scheme in ("c", "all"):
  535 + yk = _load_yake()
  536 +
  537 + for q in args.queries:
  538 + print("=" * 72)
  539 + print("QUERY:", q)
  540 + print("-" * 72)
  541 + if args.scheme in ("a", "all"):
  542 + ra = bucket_scheme_a_spacy(q, nlp)
  543 + print("A spaCy+rules:", json.dumps(ra.to_dict(), ensure_ascii=False))
  544 + if args.scheme in ("b", "all") and kb is not None:
  545 + rb = bucket_scheme_b_keybert(q, nlp, kb)
  546 + print("B spaCy+KeyBERT:", json.dumps(rb.to_dict(), ensure_ascii=False))
  547 + if args.scheme in ("c", "all") and yk is not None:
  548 + rc = bucket_scheme_c_yake(q, nlp, yk)
  549 + print("C YAKE+noun filter:", json.dumps(rc.to_dict(), ensure_ascii=False))
  550 + print()
  551 +
  552 +
  553 +if __name__ == "__main__":
  554 + main()
scripts/experiments/requirements_query_bucketing_experiments.txt 0 → 100644
@@ -0,0 +1,6 @@ @@ -0,0 +1,6 @@
  1 +# Optional: English query bucketing experiments
  2 +# After install: python -m spacy download en_core_web_sm
  3 +spacy>=3.7.0
  4 +keybert>=0.8.0
  5 +sentence-transformers>=2.2.0
  6 +yake>=0.4.8
scripts/lib/reranker_backend_env.sh
@@ -45,7 +45,7 @@ reranker_backend_venv_dir() { @@ -45,7 +45,7 @@ reranker_backend_venv_dir() {
45 qwen3_gguf_06b) printf '%s/.venv-reranker-gguf-06b\n' "${project_root}" ;; 45 qwen3_gguf_06b) printf '%s/.venv-reranker-gguf-06b\n' "${project_root}" ;;
46 qwen3_transformers) printf '%s/.venv-reranker-transformers\n' "${project_root}" ;; 46 qwen3_transformers) printf '%s/.venv-reranker-transformers\n' "${project_root}" ;;
47 qwen3_transformers_packed) printf '%s/.venv-reranker-transformers-packed\n' "${project_root}" ;; 47 qwen3_transformers_packed) printf '%s/.venv-reranker-transformers-packed\n' "${project_root}" ;;
48 - bge) printf '%s/.venv-reranker-bge\n' "${project_root}" ;; 48 + bge) printf '%s/.venv-reranker\n' "${project_root}" ;;
49 dashscope_rerank) printf '%s/.venv-reranker-dashscope\n' "${project_root}" ;; 49 dashscope_rerank) printf '%s/.venv-reranker-dashscope\n' "${project_root}" ;;
50 *) printf '%s/.venv-reranker-%s\n' "${project_root}" "${backend}" ;; 50 *) printf '%s/.venv-reranker-%s\n' "${project_root}" "${backend}" ;;
51 esac 51 esac
scripts/service_ctl.sh
@@ -16,14 +16,45 @@ mkdir -p &quot;${LOG_DIR}&quot; @@ -16,14 +16,45 @@ mkdir -p &quot;${LOG_DIR}&quot;
16 source "${PROJECT_ROOT}/scripts/lib/load_env.sh" 16 source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
17 17
18 CORE_SERVICES=("backend" "indexer" "frontend") 18 CORE_SERVICES=("backend" "indexer" "frontend")
19 -OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker") 19 +OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker" "reranker-fine")
20 FULL_SERVICES=("${OPTIONAL_SERVICES[@]}" "${CORE_SERVICES[@]}") 20 FULL_SERVICES=("${OPTIONAL_SERVICES[@]}" "${CORE_SERVICES[@]}")
21 -STOP_ORDER_SERVICES=("frontend" "indexer" "backend" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei") 21 +STOP_ORDER_SERVICES=("frontend" "indexer" "backend" "reranker-fine" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei")
22 22
23 all_services() { 23 all_services() {
24 echo "${FULL_SERVICES[@]}" 24 echo "${FULL_SERVICES[@]}"
25 } 25 }
26 26
  27 +config_python_bin() {
  28 + if [ -x "${PROJECT_ROOT}/.venv/bin/python" ]; then
  29 + echo "${PROJECT_ROOT}/.venv/bin/python"
  30 + else
  31 + echo "${PYTHON:-python3}"
  32 + fi
  33 +}
  34 +
  35 +reranker_instance_for_service() {
  36 + local service="$1"
  37 + case "${service}" in
  38 + reranker) echo "default" ;;
  39 + reranker-fine) echo "fine" ;;
  40 + *) echo "" ;;
  41 + esac
  42 +}
  43 +
  44 +get_reranker_instance_port() {
  45 + local instance="$1"
  46 + local pybin
  47 + pybin="$(config_python_bin)"
  48 + RERANK_INSTANCE="${instance}" PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${pybin}" - <<'PY'
  49 +from config.loader import get_app_config
  50 +import os
  51 +
  52 +cfg = get_app_config().services.rerank
  53 +name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  54 +print(cfg.get_instance(name).port)
  55 +PY
  56 +}
  57 +
27 get_port() { 58 get_port() {
28 local service="$1" 59 local service="$1"
29 case "${service}" in 60 case "${service}" in
@@ -33,7 +64,20 @@ get_port() { @@ -33,7 +64,20 @@ get_port() {
33 embedding) echo "${EMBEDDING_TEXT_PORT:-6005}" ;; 64 embedding) echo "${EMBEDDING_TEXT_PORT:-6005}" ;;
34 embedding-image) echo "${EMBEDDING_IMAGE_PORT:-6008}" ;; 65 embedding-image) echo "${EMBEDDING_IMAGE_PORT:-6008}" ;;
35 translator) echo "${TRANSLATION_PORT:-6006}" ;; 66 translator) echo "${TRANSLATION_PORT:-6006}" ;;
36 - reranker) echo "${RERANKER_PORT:-6007}" ;; 67 + reranker)
  68 + if [ -n "${RERANKER_PORT:-}" ]; then
  69 + echo "${RERANKER_PORT}"
  70 + else
  71 + get_reranker_instance_port "default"
  72 + fi
  73 + ;;
  74 + reranker-fine)
  75 + if [ -n "${RERANKER_FINE_PORT:-}" ]; then
  76 + echo "${RERANKER_FINE_PORT}"
  77 + else
  78 + get_reranker_instance_port "fine"
  79 + fi
  80 + ;;
37 tei) echo "${TEI_PORT:-8080}" ;; 81 tei) echo "${TEI_PORT:-8080}" ;;
38 cnclip) echo "${CNCLIP_PORT:-51000}" ;; 82 cnclip) echo "${CNCLIP_PORT:-51000}" ;;
39 *) echo "" ;; 83 *) echo "" ;;
@@ -70,6 +114,7 @@ service_start_cmd() { @@ -70,6 +114,7 @@ service_start_cmd() {
70 embedding-image) echo "./scripts/start_embedding_image_service.sh" ;; 114 embedding-image) echo "./scripts/start_embedding_image_service.sh" ;;
71 translator) echo "./scripts/start_translator.sh" ;; 115 translator) echo "./scripts/start_translator.sh" ;;
72 reranker) echo "./scripts/start_reranker.sh" ;; 116 reranker) echo "./scripts/start_reranker.sh" ;;
  117 + reranker-fine) echo "./scripts/start_reranker.sh" ;;
73 tei) echo "./scripts/start_tei_service.sh" ;; 118 tei) echo "./scripts/start_tei_service.sh" ;;
74 cnclip) echo "./scripts/start_cnclip_service.sh" ;; 119 cnclip) echo "./scripts/start_cnclip_service.sh" ;;
75 *) return 1 ;; 120 *) return 1 ;;
@@ -79,7 +124,7 @@ service_start_cmd() { @@ -79,7 +124,7 @@ service_start_cmd() {
79 service_exists() { 124 service_exists() {
80 local service="$1" 125 local service="$1"
81 case "${service}" in 126 case "${service}" in
82 - backend|indexer|frontend|embedding|embedding-image|translator|reranker|tei|cnclip) return 0 ;; 127 + backend|indexer|frontend|embedding|embedding-image|translator|reranker|reranker-fine|tei|cnclip) return 0 ;;
83 *) return 1 ;; 128 *) return 1 ;;
84 esac 129 esac
85 } 130 }
@@ -97,7 +142,7 @@ validate_targets() { @@ -97,7 +142,7 @@ validate_targets() {
97 health_path_for_service() { 142 health_path_for_service() {
98 local service="$1" 143 local service="$1"
99 case "${service}" in 144 case "${service}" in
100 - backend|indexer|embedding|embedding-image|translator|reranker|tei) echo "/health" ;; 145 + backend|indexer|embedding|embedding-image|translator|reranker|reranker-fine|tei) echo "/health" ;;
101 *) echo "" ;; 146 *) echo "" ;;
102 esac 147 esac
103 } 148 }
@@ -505,7 +550,7 @@ get_cnclip_flow_device() { @@ -505,7 +550,7 @@ get_cnclip_flow_device() {
505 start_health_retries_for_service() { 550 start_health_retries_for_service() {
506 local service="$1" 551 local service="$1"
507 case "${service}" in 552 case "${service}" in
508 - reranker) echo 90 ;; 553 + reranker|reranker-fine) echo 90 ;;
509 *) echo 30 ;; 554 *) echo 30 ;;
510 esac 555 esac
511 } 556 }
@@ -593,9 +638,15 @@ start_one() { @@ -593,9 +638,15 @@ start_one() {
593 return 1 638 return 1
594 fi 639 fi
595 ;; 640 ;;
596 - backend|indexer|frontend|embedding|embedding-image|translator|reranker) 641 + backend|indexer|frontend|embedding|embedding-image|translator|reranker|reranker-fine)
597 echo "[start] ${service}" 642 echo "[start] ${service}"
598 - nohup "${cmd}" >> "${lf}" 2>&1 & 643 + local rerank_instance=""
  644 + rerank_instance="$(reranker_instance_for_service "${service}")"
  645 + if [ -n "${rerank_instance}" ]; then
  646 + nohup env RERANK_INSTANCE="${rerank_instance}" "${cmd}" >> "${lf}" 2>&1 &
  647 + else
  648 + nohup "${cmd}" >> "${lf}" 2>&1 &
  649 + fi
599 local pid=$! 650 local pid=$!
600 echo "${pid}" > "${pf}" 651 echo "${pid}" > "${pf}"
601 wait_for_startup_health "${service}" "${pid}" "${lf}" 652 wait_for_startup_health "${service}" "${pid}" "${lf}"
@@ -673,7 +724,7 @@ stop_one() { @@ -673,7 +724,7 @@ stop_one() {
673 fi 724 fi
674 fi 725 fi
675 726
676 - if [ "${service}" = "reranker" ]; then 727 + if [[ "${service}" == reranker* ]] && ! service_is_running "reranker" && ! service_is_running "reranker-fine"; then
677 cleanup_reranker_orphans 728 cleanup_reranker_orphans
678 fi 729 fi
679 } 730 }
@@ -871,7 +922,7 @@ Special targets: @@ -871,7 +922,7 @@ Special targets:
871 922
872 Examples: 923 Examples:
873 ./scripts/service_ctl.sh up all 924 ./scripts/service_ctl.sh up all
874 - ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker 925 + ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker reranker-fine
875 ./scripts/service_ctl.sh up backend indexer frontend 926 ./scripts/service_ctl.sh up backend indexer frontend
876 ./scripts/service_ctl.sh restart 927 ./scripts/service_ctl.sh restart
877 ./scripts/service_ctl.sh monitor-start all 928 ./scripts/service_ctl.sh monitor-start all
scripts/start_reranker.sh
@@ -14,9 +14,31 @@ load_env_file &quot;${PROJECT_ROOT}/.env&quot; @@ -14,9 +14,31 @@ load_env_file &quot;${PROJECT_ROOT}/.env&quot;
14 # shellcheck source=scripts/lib/reranker_backend_env.sh 14 # shellcheck source=scripts/lib/reranker_backend_env.sh
15 source "${PROJECT_ROOT}/scripts/lib/reranker_backend_env.sh" 15 source "${PROJECT_ROOT}/scripts/lib/reranker_backend_env.sh"
16 16
17 -RERANKER_HOST="${RERANKER_HOST:-0.0.0.0}"  
18 -RERANKER_PORT="${RERANKER_PORT:-6007}"  
19 -RERANK_BACKEND="${RERANK_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}" 17 +CONFIG_PYTHON="${PROJECT_ROOT}/.venv/bin/python"
  18 +if [[ ! -x "${CONFIG_PYTHON}" ]]; then
  19 + CONFIG_PYTHON="${PYTHON:-python3}"
  20 +fi
  21 +
  22 +RERANK_INSTANCE="${RERANK_INSTANCE:-default}"
  23 +
  24 +read -r INSTANCE_HOST INSTANCE_PORT INSTANCE_BACKEND INSTANCE_RUNTIME_DIR <<EOF
  25 +$(
  26 + PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${CONFIG_PYTHON}" - <<'PY'
  27 +from config.loader import get_app_config
  28 +import os
  29 +
  30 +cfg = get_app_config().services.rerank
  31 +name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  32 +instance = cfg.get_instance(name)
  33 +runtime_dir = instance.runtime_dir or f"./.runtime/reranker/{name}"
  34 +print(instance.host, instance.port, instance.backend, runtime_dir)
  35 +PY
  36 +)
  37 +EOF
  38 +
  39 +RERANKER_HOST="${RERANKER_HOST:-${INSTANCE_HOST:-0.0.0.0}}"
  40 +RERANKER_PORT="${RERANKER_PORT:-${INSTANCE_PORT:-6007}}"
  41 +RERANK_BACKEND="${RERANK_BACKEND:-${INSTANCE_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}}"
20 RERANKER_VENV="${RERANKER_VENV:-$(reranker_backend_venv_dir "${PROJECT_ROOT}" "${RERANK_BACKEND}")}" 42 RERANKER_VENV="${RERANKER_VENV:-$(reranker_backend_venv_dir "${PROJECT_ROOT}" "${RERANK_BACKEND}")}"
21 PYTHON_BIN="${RERANKER_VENV}/bin/python" 43 PYTHON_BIN="${RERANKER_VENV}/bin/python"
22 44
@@ -27,7 +49,10 @@ if [[ ! -x &quot;${PYTHON_BIN}&quot; ]]; then @@ -27,7 +49,10 @@ if [[ ! -x &quot;${PYTHON_BIN}&quot; ]]; then
27 fi 49 fi
28 50
29 # Keep vLLM/triton/torch caches out of system disk. 51 # Keep vLLM/triton/torch caches out of system disk.
30 -RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker}" 52 +RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${INSTANCE_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker/${RERANK_INSTANCE}}}"
  53 +if [[ "${RERANKER_RUNTIME_DIR}" != /* ]]; then
  54 + RERANKER_RUNTIME_DIR="${PROJECT_ROOT}/${RERANKER_RUNTIME_DIR#./}"
  55 +fi
31 mkdir -p "${RERANKER_RUNTIME_DIR}/home" \ 56 mkdir -p "${RERANKER_RUNTIME_DIR}/home" \
32 "${RERANKER_RUNTIME_DIR}/cache" \ 57 "${RERANKER_RUNTIME_DIR}/cache" \
33 "${RERANKER_RUNTIME_DIR}/config" \ 58 "${RERANKER_RUNTIME_DIR}/config" \
@@ -102,6 +127,7 @@ fi @@ -102,6 +127,7 @@ fi
102 echo "========================================" 127 echo "========================================"
103 echo "Starting Reranker Service" 128 echo "Starting Reranker Service"
104 echo "========================================" 129 echo "========================================"
  130 +echo "Instance: ${RERANK_INSTANCE}"
105 echo "Python: ${PYTHON_BIN}" 131 echo "Python: ${PYTHON_BIN}"
106 echo "Host: ${RERANKER_HOST}" 132 echo "Host: ${RERANKER_HOST}"
107 echo "Port: ${RERANKER_PORT}" 133 echo "Port: ${RERANKER_PORT}"
scripts/temp_embed_tenant_image_urls.py 0 → 100644
@@ -0,0 +1,246 @@ @@ -0,0 +1,246 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +临时脚本:从 ES 遍历指定租户的 image_url,批量调用图片 embedding 服务。
  4 +5 进程并发,每请求最多 8 条 URL。日志打印到标准输出。
  5 +
  6 +用法:
  7 + source activate.sh # 会加载 .env,提供 ES_HOST / ES_USERNAME / ES_PASSWORD
  8 + python scripts/temp_embed_tenant_image_urls.py
  9 +
  10 +未 source 时脚本也会尝试加载项目根目录 .env。
  11 +"""
  12 +
  13 +from __future__ import annotations
  14 +
  15 +import json
  16 +import multiprocessing as mp
  17 +import os
  18 +import sys
  19 +import time
  20 +from dataclasses import dataclass
  21 +from pathlib import Path
  22 +from typing import Any, Dict, List, Optional, Tuple
  23 +from urllib.parse import urlencode
  24 +
  25 +import requests
  26 +from elasticsearch import Elasticsearch
  27 +from elasticsearch.helpers import scan
  28 +
  29 +# 未 source activate.sh 时仍可从项目根 .env 加载(与 ES_HOST / ES_USERNAME / ES_PASSWORD 一致)
  30 +try:
  31 + from dotenv import load_dotenv
  32 +
  33 + _ROOT = Path(__file__).resolve().parents[1]
  34 + load_dotenv(_ROOT / ".env")
  35 +except ImportError:
  36 + pass
  37 +
  38 +# ---------------------------------------------------------------------------
  39 +# 配置(可按需修改;默认与 .env 中 ES_* 一致,见 config/loader.py)
  40 +# ---------------------------------------------------------------------------
  41 +
  42 +# Elasticsearch(默认读环境变量:ES_HOST、ES_USERNAME、ES_PASSWORD)
  43 +ES_HOST: str = os.getenv("ES_HOST", "http://localhost:9200")
  44 +ES_USERNAME: Optional[str] = os.getenv("ES_USERNAME") or None
  45 +ES_PASSWORD: Optional[str] = os.getenv("ES_PASSWORD") or None
  46 +ES_INDEX: str = "search_products_tenant_163"
  47 +
  48 +# 租户(keyword 字段,字符串)
  49 +TENANT_ID: str = "163"
  50 +
  51 +# 图片 embedding 服务(与文档 7.1.2 一致)
  52 +EMBED_BASE_URL: str = "http://localhost:6008"
  53 +EMBED_PATH: str = "/embed/image"
  54 +EMBED_QUERY: Dict[str, Any] = {
  55 + "normalize": "true",
  56 + "priority": "1", # 与对接文档 curl 一致;批量离线可改为 "0"
  57 +}
  58 +
  59 +# 并发与批量
  60 +WORKER_PROCESSES: int = 5
  61 +URLS_PER_REQUEST: int = 8
  62 +
  63 +# HTTP
  64 +REQUEST_TIMEOUT_SEC: float = 120.0
  65 +
  66 +# ES scan(elasticsearch-py 8+/ES 9:`scan(..., query=...)` 会展开为 `client.search(**kwargs)`,
  67 +# 必须传与 Search API 一致的参数名,例如顶层 `query` = DSL 的 query 子句,不要用裸 `match_all`。)
  68 +SCROLL_CHUNK_SIZE: int = 500
  69 +
  70 +# ---------------------------------------------------------------------------
  71 +
  72 +
  73 +@dataclass
  74 +class BatchResult:
  75 + batch_index: int
  76 + url_count: int
  77 + ok: bool
  78 + status_code: Optional[int]
  79 + elapsed_sec: float
  80 + error: Optional[str] = None
  81 +
  82 +
  83 +def _build_embed_url() -> str:
  84 + q = urlencode(EMBED_QUERY)
  85 + return f"{EMBED_BASE_URL.rstrip('/')}{EMBED_PATH}?{q}"
  86 +
  87 +
  88 +def _process_batch(payload: Tuple[int, List[str]]) -> BatchResult:
  89 + batch_index, urls = payload
  90 + if not urls:
  91 + return BatchResult(batch_index, 0, True, None, 0.0, None)
  92 +
  93 + url = _build_embed_url()
  94 + t0 = time.perf_counter()
  95 + try:
  96 + resp = requests.post(
  97 + url,
  98 + headers={"Content-Type": "application/json"},
  99 + data=json.dumps(urls),
  100 + timeout=REQUEST_TIMEOUT_SEC,
  101 + )
  102 + elapsed = time.perf_counter() - t0
  103 + ok = resp.status_code == 200
  104 + err: Optional[str] = None
  105 + if ok:
  106 + try:
  107 + body = resp.json()
  108 + if not isinstance(body, list) or len(body) != len(urls):
  109 + ok = False
  110 + err = f"response length mismatch or not list: got {type(body).__name__}"
  111 + except Exception as e:
  112 + ok = False
  113 + err = f"json decode: {e}"
  114 + else:
  115 + err = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
  116 +
  117 + worker = mp.current_process().name
  118 + status = resp.status_code if resp else None
  119 + ms = elapsed * 1000.0
  120 + if ok:
  121 + print(
  122 + f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
  123 + f"http={status} elapsed_ms={ms:.2f} ok",
  124 + flush=True,
  125 + )
  126 + else:
  127 + print(
  128 + f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
  129 + f"http={status} elapsed_ms={ms:.2f} FAIL err={err}",
  130 + flush=True,
  131 + )
  132 + return BatchResult(batch_index, len(urls), ok, status, elapsed, err)
  133 + except Exception as e:
  134 + elapsed = time.perf_counter() - t0
  135 + worker = mp.current_process().name
  136 + print(
  137 + f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
  138 + f"http=None elapsed_ms={elapsed * 1000.0:.2f} FAIL err={e}",
  139 + flush=True,
  140 + )
  141 + return BatchResult(batch_index, len(urls), False, None, elapsed, str(e))
  142 +
  143 +
  144 +def _iter_image_urls(es: Elasticsearch) -> List[str]:
  145 + # 对应 search body: { "query": { "term": { "tenant_id": "..." } } }
  146 + search_kw: Dict[str, Any] = {
  147 + "query": {"term": {"tenant_id": TENANT_ID}},
  148 + "source_includes": ["image_url"],
  149 + }
  150 + urls: List[str] = []
  151 + for hit in scan(
  152 + es,
  153 + query=search_kw,
  154 + index=ES_INDEX,
  155 + size=SCROLL_CHUNK_SIZE,
  156 + ):
  157 + src = hit.get("_source") or {}
  158 + u = src.get("image_url")
  159 + if u is None:
  160 + continue
  161 + s = str(u).strip()
  162 + if not s:
  163 + continue
  164 + urls.append(s)
  165 + return urls
  166 +
  167 +
  168 +def main() -> int:
  169 + t_wall0 = time.perf_counter()
  170 +
  171 + auth = None
  172 + if ES_USERNAME and ES_PASSWORD:
  173 + auth = (ES_USERNAME, ES_PASSWORD)
  174 +
  175 + es = Elasticsearch([ES_HOST], basic_auth=auth)
  176 + if not es.ping():
  177 + print("ERROR: Elasticsearch ping failed", file=sys.stderr)
  178 + return 1
  179 +
  180 + print(
  181 + f"[main] ES={ES_HOST} basic_auth={'yes' if auth else 'no'} "
  182 + f"index={ES_INDEX} tenant_id={TENANT_ID} "
  183 + f"workers={WORKER_PROCESSES} urls_per_req={URLS_PER_REQUEST}",
  184 + flush=True,
  185 + )
  186 + print(f"[main] embed_url={_build_embed_url()}", flush=True)
  187 +
  188 + t_fetch0 = time.perf_counter()
  189 + all_urls = _iter_image_urls(es)
  190 + fetch_elapsed = time.perf_counter() - t_fetch0
  191 + print(
  192 + f"[main] collected image_url count={len(all_urls)} es_scan_elapsed_sec={fetch_elapsed:.3f}",
  193 + flush=True,
  194 + )
  195 +
  196 + batches: List[List[str]] = []
  197 + for i in range(0, len(all_urls), URLS_PER_REQUEST):
  198 + batches.append(all_urls[i : i + URLS_PER_REQUEST])
  199 +
  200 + if not batches:
  201 + print("[main] no URLs to process; done.", flush=True)
  202 + return 0
  203 +
  204 + tasks = [(idx, batch) for idx, batch in enumerate(batches)]
  205 + print(f"[main] batches={len(tasks)} (parallel processes={WORKER_PROCESSES})", flush=True)
  206 +
  207 + t_run0 = time.perf_counter()
  208 + total_urls = 0
  209 + success_urls = 0
  210 + failed_urls = 0
  211 + ok_batches = 0
  212 + fail_batches = 0
  213 + sum_req_sec = 0.0
  214 +
  215 + with mp.Pool(processes=WORKER_PROCESSES) as pool:
  216 + for res in pool.imap_unordered(_process_batch, tasks, chunksize=1):
  217 + total_urls += res.url_count
  218 + sum_req_sec += res.elapsed_sec
  219 + if res.ok:
  220 + ok_batches += 1
  221 + success_urls += res.url_count
  222 + else:
  223 + fail_batches += 1
  224 + failed_urls += res.url_count
  225 +
  226 + wall_total = time.perf_counter() - t_wall0
  227 + run_elapsed = time.perf_counter() - t_run0
  228 +
  229 + print("---------- summary ----------", flush=True)
  230 + print(f"tenant_id: {TENANT_ID}", flush=True)
  231 + print(f"total documents w/ url: {len(all_urls)}", flush=True)
  232 + print(f"total batches: {len(batches)}", flush=True)
  233 + print(f"batches succeeded: {ok_batches}", flush=True)
  234 + print(f"batches failed: {fail_batches}", flush=True)
  235 + print(f"urls (success path): {success_urls}", flush=True)
  236 + print(f"urls (failed path): {failed_urls}", flush=True)
  237 + print(f"ES scan elapsed (s): {fetch_elapsed:.3f}", flush=True)
  238 + print(f"embed phase wall (s): {run_elapsed:.3f}", flush=True)
  239 + print(f"sum request time (s): {sum_req_sec:.3f} (sequential sum, for reference)", flush=True)
  240 + print(f"total wall time (s): {wall_total:.3f}", flush=True)
  241 + print("-----------------------------", flush=True)
  242 + return 0 if fail_batches == 0 else 2
  243 +
  244 +
  245 +if __name__ == "__main__":
  246 + raise SystemExit(main())
search/es_query_builder.py
@@ -12,6 +12,7 @@ from typing import Dict, Any, List, Optional, Tuple @@ -12,6 +12,7 @@ from typing import Dict, Any, List, Optional, Tuple
12 12
13 import numpy as np 13 import numpy as np
14 from config import FunctionScoreConfig 14 from config import FunctionScoreConfig
  15 +from query.keyword_extractor import KEYWORDS_QUERY_BASE_KEY
15 16
16 17
17 class ESQueryBuilder: 18 class ESQueryBuilder:
@@ -29,9 +30,17 @@ class ESQueryBuilder: @@ -29,9 +30,17 @@ class ESQueryBuilder:
29 source_fields: Optional[List[str]] = None, 30 source_fields: Optional[List[str]] = None,
30 function_score_config: Optional[FunctionScoreConfig] = None, 31 function_score_config: Optional[FunctionScoreConfig] = None,
31 default_language: str = "en", 32 default_language: str = "en",
32 - knn_boost: float = 0.25, 33 + knn_text_boost: float = 20.0,
  34 + knn_image_boost: float = 20.0,
  35 + knn_text_k: int = 120,
  36 + knn_text_num_candidates: int = 400,
  37 + knn_text_k_long: int = 160,
  38 + knn_text_num_candidates_long: int = 500,
  39 + knn_image_k: int = 120,
  40 + knn_image_num_candidates: int = 400,
33 base_minimum_should_match: str = "70%", 41 base_minimum_should_match: str = "70%",
34 translation_minimum_should_match: str = "70%", 42 translation_minimum_should_match: str = "70%",
  43 + keywords_minimum_should_match: str = "50%",
35 translation_boost: float = 0.4, 44 translation_boost: float = 0.4,
36 tie_breaker_base_query: float = 0.9, 45 tie_breaker_base_query: float = 0.9,
37 best_fields_boosts: Optional[Dict[str, float]] = None, 46 best_fields_boosts: Optional[Dict[str, float]] = None,
@@ -55,7 +64,8 @@ class ESQueryBuilder: @@ -55,7 +64,8 @@ class ESQueryBuilder:
55 source_fields: Fields to return in search results (_source includes) 64 source_fields: Fields to return in search results (_source includes)
56 function_score_config: Function score configuration 65 function_score_config: Function score configuration
57 default_language: Default language to use when detection fails or returns "unknown" 66 default_language: Default language to use when detection fails or returns "unknown"
58 - knn_boost: Boost value for KNN (embedding recall) 67 + knn_text_boost: Boost for text-embedding KNN clause
  68 + knn_image_boost: Boost for image-embedding KNN clause
59 """ 69 """
60 self.match_fields = match_fields 70 self.match_fields = match_fields
61 self.field_boosts = field_boosts or {} 71 self.field_boosts = field_boosts or {}
@@ -67,9 +77,17 @@ class ESQueryBuilder: @@ -67,9 +77,17 @@ class ESQueryBuilder:
67 self.source_fields = source_fields 77 self.source_fields = source_fields
68 self.function_score_config = function_score_config 78 self.function_score_config = function_score_config
69 self.default_language = default_language 79 self.default_language = default_language
70 - self.knn_boost = knn_boost 80 + self.knn_text_boost = float(knn_text_boost)
  81 + self.knn_image_boost = float(knn_image_boost)
  82 + self.knn_text_k = int(knn_text_k)
  83 + self.knn_text_num_candidates = int(knn_text_num_candidates)
  84 + self.knn_text_k_long = int(knn_text_k_long)
  85 + self.knn_text_num_candidates_long = int(knn_text_num_candidates_long)
  86 + self.knn_image_k = int(knn_image_k)
  87 + self.knn_image_num_candidates = int(knn_image_num_candidates)
71 self.base_minimum_should_match = base_minimum_should_match 88 self.base_minimum_should_match = base_minimum_should_match
72 self.translation_minimum_should_match = translation_minimum_should_match 89 self.translation_minimum_should_match = translation_minimum_should_match
  90 + self.keywords_minimum_should_match = str(keywords_minimum_should_match)
73 self.translation_boost = float(translation_boost) 91 self.translation_boost = float(translation_boost)
74 self.tie_breaker_base_query = float(tie_breaker_base_query) 92 self.tie_breaker_base_query = float(tie_breaker_base_query)
75 default_best_fields = { 93 default_best_fields = {
@@ -171,8 +189,6 @@ class ESQueryBuilder: @@ -171,8 +189,6 @@ class ESQueryBuilder:
171 size: int = 10, 189 size: int = 10,
172 from_: int = 0, 190 from_: int = 0,
173 enable_knn: bool = True, 191 enable_knn: bool = True,
174 - knn_k: int = 50,  
175 - knn_num_candidates: int = 200,  
176 min_score: Optional[float] = None, 192 min_score: Optional[float] = None,
177 parsed_query: Optional[Any] = None, 193 parsed_query: Optional[Any] = None,
178 ) -> Dict[str, Any]: 194 ) -> Dict[str, Any]:
@@ -195,8 +211,6 @@ class ESQueryBuilder: @@ -195,8 +211,6 @@ class ESQueryBuilder:
195 size: Number of results 211 size: Number of results
196 from_: Offset for pagination 212 from_: Offset for pagination
197 enable_knn: Whether to use KNN search 213 enable_knn: Whether to use KNN search
198 - knn_k: K value for KNN  
199 - knn_num_candidates: Number of candidates for KNN  
200 min_score: Minimum score threshold 214 min_score: Minimum score threshold
201 215
202 Returns: 216 Returns:
@@ -234,41 +248,37 @@ class ESQueryBuilder: @@ -234,41 +248,37 @@ class ESQueryBuilder:
234 filter_clauses.append(product_title_exclusion_filter) 248 filter_clauses.append(product_title_exclusion_filter)
235 249
236 # 3. Add KNN search clauses alongside lexical clauses under the same bool.should 250 # 3. Add KNN search clauses alongside lexical clauses under the same bool.should
237 - # Adjust KNN k, num_candidates, boost by query_tokens (short query: less KNN; long: more)  
238 - final_knn_k, final_knn_num_candidates = knn_k, knn_num_candidates 251 + # Text KNN: k / num_candidates from config; long queries use *_long and higher boost
239 if has_embedding: 252 if has_embedding:
240 - knn_boost = self.knn_boost 253 + text_knn_boost = self.knn_text_boost
  254 + final_knn_k = self.knn_text_k
  255 + final_knn_num_candidates = self.knn_text_num_candidates
241 if parsed_query: 256 if parsed_query:
242 query_tokens = getattr(parsed_query, 'query_tokens', None) or [] 257 query_tokens = getattr(parsed_query, 'query_tokens', None) or []
243 token_count = len(query_tokens) 258 token_count = len(query_tokens)
244 if token_count >= 5: 259 if token_count >= 5:
245 - final_knn_k, final_knn_num_candidates = 160, 500  
246 - knn_boost = self.knn_boost * 1.4 # Higher weight for long queries  
247 - else:  
248 - final_knn_k, final_knn_num_candidates = 120, 400  
249 - else:  
250 - final_knn_k, final_knn_num_candidates = 120, 400 260 + final_knn_k = self.knn_text_k_long
  261 + final_knn_num_candidates = self.knn_text_num_candidates_long
  262 + text_knn_boost = self.knn_text_boost * 1.4
251 recall_clauses.append({ 263 recall_clauses.append({
252 "knn": { 264 "knn": {
253 "field": self.text_embedding_field, 265 "field": self.text_embedding_field,
254 "query_vector": query_vector.tolist(), 266 "query_vector": query_vector.tolist(),
255 "k": final_knn_k, 267 "k": final_knn_k,
256 "num_candidates": final_knn_num_candidates, 268 "num_candidates": final_knn_num_candidates,
257 - "boost": knn_boost, 269 + "boost": text_knn_boost,
258 "_name": "knn_query", 270 "_name": "knn_query",
259 } 271 }
260 }) 272 })
261 273
262 if has_image_embedding: 274 if has_image_embedding:
263 - image_knn_k = max(final_knn_k, 120)  
264 - image_knn_num_candidates = max(final_knn_num_candidates, 400)  
265 recall_clauses.append({ 275 recall_clauses.append({
266 "knn": { 276 "knn": {
267 "field": self.image_embedding_field, 277 "field": self.image_embedding_field,
268 "query_vector": image_query_vector.tolist(), 278 "query_vector": image_query_vector.tolist(),
269 - "k": image_knn_k,  
270 - "num_candidates": image_knn_num_candidates,  
271 - "boost": self.knn_boost, 279 + "k": self.knn_image_k,
  280 + "num_candidates": self.knn_image_num_candidates,
  281 + "boost": self.knn_image_boost,
272 "_name": "image_knn_query", 282 "_name": "image_knn_query",
273 } 283 }
274 }) 284 })
@@ -498,6 +508,7 @@ class ESQueryBuilder: @@ -498,6 +508,7 @@ class ESQueryBuilder:
498 clause_name: str, 508 clause_name: str,
499 *, 509 *,
500 is_source: bool, 510 is_source: bool,
  511 + keywords_query: Optional[str] = None,
501 ) -> Optional[Dict[str, Any]]: 512 ) -> Optional[Dict[str, Any]]:
502 combined_fields = self._match_field_strings(lang) 513 combined_fields = self._match_field_strings(lang)
503 if not combined_fields: 514 if not combined_fields:
@@ -505,6 +516,26 @@ class ESQueryBuilder: @@ -505,6 +516,26 @@ class ESQueryBuilder:
505 minimum_should_match = ( 516 minimum_should_match = (
506 self.base_minimum_should_match if is_source else self.translation_minimum_should_match 517 self.base_minimum_should_match if is_source else self.translation_minimum_should_match
507 ) 518 )
  519 + must_clauses: List[Dict[str, Any]] = [
  520 + {
  521 + "combined_fields": {
  522 + "query": lang_query,
  523 + "fields": combined_fields,
  524 + "minimum_should_match": minimum_should_match,
  525 + }
  526 + }
  527 + ]
  528 + kw = (keywords_query or "").strip()
  529 + if kw:
  530 + must_clauses.append(
  531 + {
  532 + "combined_fields": {
  533 + "query": kw,
  534 + "fields": combined_fields,
  535 + "minimum_should_match": self.keywords_minimum_should_match,
  536 + }
  537 + }
  538 + )
508 should_clauses = [ 539 should_clauses = [
509 clause 540 clause
510 for clause in ( 541 for clause in (
@@ -516,15 +547,7 @@ class ESQueryBuilder: @@ -516,15 +547,7 @@ class ESQueryBuilder:
516 clause: Dict[str, Any] = { 547 clause: Dict[str, Any] = {
517 "bool": { 548 "bool": {
518 "_name": clause_name, 549 "_name": clause_name,
519 - "must": [  
520 - {  
521 - "combined_fields": {  
522 - "query": lang_query,  
523 - "fields": combined_fields,  
524 - "minimum_should_match": minimum_should_match,  
525 - }  
526 - }  
527 - ], 550 + "must": must_clauses,
528 } 551 }
529 } 552 }
530 if should_clauses: 553 if should_clauses:
@@ -565,6 +588,11 @@ class ESQueryBuilder: @@ -565,6 +588,11 @@ class ESQueryBuilder:
565 base_query_text = ( 588 base_query_text = (
566 getattr(parsed_query, "rewritten_query", None) if parsed_query else None 589 getattr(parsed_query, "rewritten_query", None) if parsed_query else None
567 ) or query_text 590 ) or query_text
  591 + kw_by_variant: Dict[str, str] = (
  592 + getattr(parsed_query, "keywords_queries", None) or {}
  593 + if parsed_query
  594 + else {}
  595 + )
568 596
569 if base_query_text: 597 if base_query_text:
570 base_clause = self._build_lexical_language_clause( 598 base_clause = self._build_lexical_language_clause(
@@ -572,6 +600,7 @@ class ESQueryBuilder: @@ -572,6 +600,7 @@ class ESQueryBuilder:
572 base_query_text, 600 base_query_text,
573 "base_query", 601 "base_query",
574 is_source=True, 602 is_source=True,
  603 + keywords_query=(kw_by_variant.get(KEYWORDS_QUERY_BASE_KEY) or "").strip(),
575 ) 604 )
576 if base_clause: 605 if base_clause:
577 should_clauses.append(base_clause) 606 should_clauses.append(base_clause)
@@ -583,11 +612,13 @@ class ESQueryBuilder: @@ -583,11 +612,13 @@ class ESQueryBuilder:
583 continue 612 continue
584 if normalized_lang == source_lang and normalized_text == base_query_text: 613 if normalized_lang == source_lang and normalized_text == base_query_text:
585 continue 614 continue
  615 + trans_kw = (kw_by_variant.get(normalized_lang) or "").strip()
586 trans_clause = self._build_lexical_language_clause( 616 trans_clause = self._build_lexical_language_clause(
587 normalized_lang, 617 normalized_lang,
588 normalized_text, 618 normalized_text,
589 f"base_query_trans_{normalized_lang}", 619 f"base_query_trans_{normalized_lang}",
590 is_source=False, 620 is_source=False,
  621 + keywords_query=trans_kw,
591 ) 622 )
592 if trans_clause: 623 if trans_clause:
593 should_clauses.append(trans_clause) 624 should_clauses.append(trans_clause)
search/rerank_client.py
@@ -10,7 +10,7 @@ @@ -10,7 +10,7 @@
10 from typing import Dict, Any, List, Optional, Tuple 10 from typing import Dict, Any, List, Optional, Tuple
11 import logging 11 import logging
12 12
13 -from config.schema import RerankFusionConfig 13 +from config.schema import CoarseRankFusionConfig, RerankFusionConfig
14 from providers import create_rerank_provider 14 from providers import create_rerank_provider
15 15
16 logger = logging.getLogger(__name__) 16 logger = logging.getLogger(__name__)
@@ -120,6 +120,7 @@ def call_rerank_service( @@ -120,6 +120,7 @@ def call_rerank_service(
120 docs: List[str], 120 docs: List[str],
121 timeout_sec: float = DEFAULT_TIMEOUT_SEC, 121 timeout_sec: float = DEFAULT_TIMEOUT_SEC,
122 top_n: Optional[int] = None, 122 top_n: Optional[int] = None,
  123 + service_profile: Optional[str] = None,
123 ) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]]]: 124 ) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]]]:
124 """ 125 """
125 调用重排服务 POST /rerank,返回分数列表与 meta。 126 调用重排服务 POST /rerank,返回分数列表与 meta。
@@ -128,7 +129,7 @@ def call_rerank_service( @@ -128,7 +129,7 @@ def call_rerank_service(
128 if not docs: 129 if not docs:
129 return [], {} 130 return [], {}
130 try: 131 try:
131 - client = create_rerank_provider() 132 + client = create_rerank_provider(service_profile=service_profile)
132 return client.rerank(query=query, docs=docs, timeout_sec=timeout_sec, top_n=top_n) 133 return client.rerank(query=query, docs=docs, timeout_sec=timeout_sec, top_n=top_n)
133 except Exception as e: 134 except Exception as e:
134 logger.warning("Rerank request failed: %s", e, exc_info=True) 135 logger.warning("Rerank request failed: %s", e, exc_info=True)
@@ -240,24 +241,105 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa @@ -240,24 +241,105 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
240 241
241 def _multiply_fusion_factors( 242 def _multiply_fusion_factors(
242 rerank_score: float, 243 rerank_score: float,
  244 + fine_score: Optional[float],
243 text_score: float, 245 text_score: float,
244 knn_score: float, 246 knn_score: float,
245 fusion: RerankFusionConfig, 247 fusion: RerankFusionConfig,
246 -) -> Tuple[float, float, float, float]:  
247 - """(rerank_factor, text_factor, knn_factor, fused_without_style_boost).""" 248 +) -> Tuple[float, float, float, float, float]:
  249 + """(rerank_factor, fine_factor, text_factor, knn_factor, fused_without_style_boost)."""
248 r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent 250 r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
  251 + if fine_score is None:
  252 + f = 1.0
  253 + else:
  254 + f = (max(fine_score, 0.0) + fusion.fine_bias) ** fusion.fine_exponent
249 t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent 255 t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
250 k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent 256 k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
251 - return r, t, k, r * t * k 257 + return r, f, t, k, r * f * t * k
  258 +
  259 +
  260 +def _multiply_coarse_fusion_factors(
  261 + text_score: float,
  262 + knn_score: float,
  263 + fusion: CoarseRankFusionConfig,
  264 +) -> Tuple[float, float, float]:
  265 + text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
  266 + knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
  267 + return text_factor, knn_factor, text_factor * knn_factor
252 268
253 269
254 def _has_selected_sku(hit: Dict[str, Any]) -> bool: 270 def _has_selected_sku(hit: Dict[str, Any]) -> bool:
255 return bool(str(hit.get("_style_rerank_suffix") or "").strip()) 271 return bool(str(hit.get("_style_rerank_suffix") or "").strip())
256 272
257 273
  274 +def coarse_resort_hits(
  275 + es_hits: List[Dict[str, Any]],
  276 + fusion: Optional[CoarseRankFusionConfig] = None,
  277 + debug: bool = False,
  278 +) -> List[Dict[str, Any]]:
  279 + """Coarse rank with text/knn fusion only."""
  280 + if not es_hits:
  281 + return []
  282 +
  283 + f = fusion or CoarseRankFusionConfig()
  284 + coarse_debug: List[Dict[str, Any]] = [] if debug else []
  285 + for hit in es_hits:
  286 + es_score = _to_score(hit.get("_score"))
  287 + matched_queries = hit.get("matched_queries")
  288 + knn_components = _collect_knn_score_components(matched_queries, f)
  289 + text_components = _collect_text_score_components(matched_queries, es_score)
  290 + text_score = text_components["text_score"]
  291 + knn_score = knn_components["knn_score"]
  292 + text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors(
  293 + text_score=text_score,
  294 + knn_score=knn_score,
  295 + fusion=f,
  296 + )
  297 +
  298 + hit["_text_score"] = text_score
  299 + hit["_knn_score"] = knn_score
  300 + hit["_text_knn_score"] = knn_components["text_knn_score"]
  301 + hit["_image_knn_score"] = knn_components["image_knn_score"]
  302 + hit["_coarse_score"] = coarse_score
  303 +
  304 + if debug:
  305 + coarse_debug.append(
  306 + {
  307 + "doc_id": hit.get("_id"),
  308 + "es_score": es_score,
  309 + "text_score": text_score,
  310 + "text_source_score": text_components["source_score"],
  311 + "text_translation_score": text_components["translation_score"],
  312 + "text_weighted_source_score": text_components["weighted_source_score"],
  313 + "text_weighted_translation_score": text_components["weighted_translation_score"],
  314 + "text_primary_score": text_components["primary_text_score"],
  315 + "text_support_score": text_components["support_text_score"],
  316 + "text_score_fallback_to_es": (
  317 + text_score == es_score
  318 + and text_components["source_score"] <= 0.0
  319 + and text_components["translation_score"] <= 0.0
  320 + ),
  321 + "text_knn_score": knn_components["text_knn_score"],
  322 + "image_knn_score": knn_components["image_knn_score"],
  323 + "weighted_text_knn_score": knn_components["weighted_text_knn_score"],
  324 + "weighted_image_knn_score": knn_components["weighted_image_knn_score"],
  325 + "knn_primary_score": knn_components["primary_knn_score"],
  326 + "knn_support_score": knn_components["support_knn_score"],
  327 + "knn_score": knn_score,
  328 + "coarse_text_factor": text_factor,
  329 + "coarse_knn_factor": knn_factor,
  330 + "coarse_score": coarse_score,
  331 + "matched_queries": matched_queries,
  332 + }
  333 + )
  334 +
  335 + es_hits.sort(key=lambda h: h.get("_coarse_score", h.get("_score", 0.0)), reverse=True)
  336 + return coarse_debug
  337 +
  338 +
258 def fuse_scores_and_resort( 339 def fuse_scores_and_resort(
259 es_hits: List[Dict[str, Any]], 340 es_hits: List[Dict[str, Any]],
260 rerank_scores: List[float], 341 rerank_scores: List[float],
  342 + fine_scores: Optional[List[float]] = None,
261 weight_es: float = DEFAULT_WEIGHT_ES, 343 weight_es: float = DEFAULT_WEIGHT_ES,
262 weight_ai: float = DEFAULT_WEIGHT_AI, 344 weight_ai: float = DEFAULT_WEIGHT_AI,
263 fusion: Optional[RerankFusionConfig] = None, 345 fusion: Optional[RerankFusionConfig] = None,
@@ -290,6 +372,8 @@ def fuse_scores_and_resort( @@ -290,6 +372,8 @@ def fuse_scores_and_resort(
290 n = len(es_hits) 372 n = len(es_hits)
291 if n == 0 or len(rerank_scores) != n: 373 if n == 0 or len(rerank_scores) != n:
292 return [] 374 return []
  375 + if fine_scores is not None and len(fine_scores) != n:
  376 + fine_scores = None
293 377
294 f = fusion or RerankFusionConfig() 378 f = fusion or RerankFusionConfig()
295 fused_debug: List[Dict[str, Any]] = [] if debug else [] 379 fused_debug: List[Dict[str, Any]] = [] if debug else []
@@ -297,13 +381,14 @@ def fuse_scores_and_resort( @@ -297,13 +381,14 @@ def fuse_scores_and_resort(
297 for idx, hit in enumerate(es_hits): 381 for idx, hit in enumerate(es_hits):
298 es_score = _to_score(hit.get("_score")) 382 es_score = _to_score(hit.get("_score"))
299 rerank_score = _to_score(rerank_scores[idx]) 383 rerank_score = _to_score(rerank_scores[idx])
  384 + fine_score = _to_score(fine_scores[idx]) if fine_scores is not None else _to_score(hit.get("_fine_score"))
300 matched_queries = hit.get("matched_queries") 385 matched_queries = hit.get("matched_queries")
301 knn_components = _collect_knn_score_components(matched_queries, f) 386 knn_components = _collect_knn_score_components(matched_queries, f)
302 knn_score = knn_components["knn_score"] 387 knn_score = knn_components["knn_score"]
303 text_components = _collect_text_score_components(matched_queries, es_score) 388 text_components = _collect_text_score_components(matched_queries, es_score)
304 text_score = text_components["text_score"] 389 text_score = text_components["text_score"]
305 - rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(  
306 - rerank_score, text_score, knn_score, f 390 + rerank_factor, fine_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
  391 + rerank_score, fine_score if fine_scores is not None or "_fine_score" in hit else None, text_score, knn_score, f
307 ) 392 )
308 sku_selected = _has_selected_sku(hit) 393 sku_selected = _has_selected_sku(hit)
309 style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 394 style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
@@ -311,6 +396,7 @@ def fuse_scores_and_resort( @@ -311,6 +396,7 @@ def fuse_scores_and_resort(
311 396
312 hit["_original_score"] = hit.get("_score") 397 hit["_original_score"] = hit.get("_score")
313 hit["_rerank_score"] = rerank_score 398 hit["_rerank_score"] = rerank_score
  399 + hit["_fine_score"] = fine_score
314 hit["_text_score"] = text_score 400 hit["_text_score"] = text_score
315 hit["_knn_score"] = knn_score 401 hit["_knn_score"] = knn_score
316 hit["_text_knn_score"] = knn_components["text_knn_score"] 402 hit["_text_knn_score"] = knn_components["text_knn_score"]
@@ -330,6 +416,7 @@ def fuse_scores_and_resort( @@ -330,6 +416,7 @@ def fuse_scores_and_resort(
330 "doc_id": hit.get("_id"), 416 "doc_id": hit.get("_id"),
331 "es_score": es_score, 417 "es_score": es_score,
332 "rerank_score": rerank_score, 418 "rerank_score": rerank_score,
  419 + "fine_score": fine_score,
333 "text_score": text_score, 420 "text_score": text_score,
334 "text_source_score": text_components["source_score"], 421 "text_source_score": text_components["source_score"],
335 "text_translation_score": text_components["translation_score"], 422 "text_translation_score": text_components["translation_score"],
@@ -350,6 +437,7 @@ def fuse_scores_and_resort( @@ -350,6 +437,7 @@ def fuse_scores_and_resort(
350 "knn_support_score": knn_components["support_knn_score"], 437 "knn_support_score": knn_components["support_knn_score"],
351 "knn_score": knn_score, 438 "knn_score": knn_score,
352 "rerank_factor": rerank_factor, 439 "rerank_factor": rerank_factor,
  440 + "fine_factor": fine_factor,
353 "text_factor": text_factor, 441 "text_factor": text_factor,
354 "knn_factor": knn_factor, 442 "knn_factor": knn_factor,
355 "style_intent_selected_sku": sku_selected, 443 "style_intent_selected_sku": sku_selected,
@@ -381,6 +469,8 @@ def run_rerank( @@ -381,6 +469,8 @@ def run_rerank(
381 debug: bool = False, 469 debug: bool = False,
382 fusion: Optional[RerankFusionConfig] = None, 470 fusion: Optional[RerankFusionConfig] = None,
383 style_intent_selected_sku_boost: float = 1.2, 471 style_intent_selected_sku_boost: float = 1.2,
  472 + fine_scores: Optional[List[float]] = None,
  473 + service_profile: Optional[str] = None,
384 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: 474 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
385 """ 475 """
386 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 476 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
@@ -404,6 +494,7 @@ def run_rerank( @@ -404,6 +494,7 @@ def run_rerank(
404 docs, 494 docs,
405 timeout_sec=timeout_sec, 495 timeout_sec=timeout_sec,
406 top_n=top_n, 496 top_n=top_n,
  497 + service_profile=service_profile,
407 ) 498 )
408 499
409 if scores is None or len(scores) != len(hits): 500 if scores is None or len(scores) != len(hits):
@@ -412,6 +503,7 @@ def run_rerank( @@ -412,6 +503,7 @@ def run_rerank(
412 fused_debug = fuse_scores_and_resort( 503 fused_debug = fuse_scores_and_resort(
413 hits, 504 hits,
414 scores, 505 scores,
  506 + fine_scores=fine_scores,
415 weight_es=weight_es, 507 weight_es=weight_es,
416 weight_ai=weight_ai, 508 weight_ai=weight_ai,
417 fusion=fusion, 509 fusion=fusion,
@@ -427,3 +519,53 @@ def run_rerank( @@ -427,3 +519,53 @@ def run_rerank(
427 es_response["hits"]["max_score"] = top 519 es_response["hits"]["max_score"] = top
428 520
429 return es_response, meta, fused_debug 521 return es_response, meta, fused_debug
  522 +
  523 +
  524 +def run_lightweight_rerank(
  525 + query: str,
  526 + es_hits: List[Dict[str, Any]],
  527 + language: str = "zh",
  528 + timeout_sec: float = DEFAULT_TIMEOUT_SEC,
  529 + rerank_query_template: str = "{query}",
  530 + rerank_doc_template: str = "{title}",
  531 + top_n: Optional[int] = None,
  532 + debug: bool = False,
  533 + service_profile: Optional[str] = "fine",
  534 +) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
  535 + """Call lightweight reranker and attach scores to hits without final fusion."""
  536 + if not es_hits:
  537 + return [], {}, []
  538 +
  539 + query_text = str(rerank_query_template).format_map({"query": query})
  540 + rerank_debug_rows: Optional[List[Dict[str, Any]]] = [] if debug else None
  541 + docs = build_docs_from_hits(
  542 + es_hits,
  543 + language=language,
  544 + doc_template=rerank_doc_template,
  545 + debug_rows=rerank_debug_rows,
  546 + )
  547 + scores, meta = call_rerank_service(
  548 + query_text,
  549 + docs,
  550 + timeout_sec=timeout_sec,
  551 + top_n=top_n,
  552 + service_profile=service_profile,
  553 + )
  554 + if scores is None or len(scores) != len(es_hits):
  555 + return None, None, []
  556 +
  557 + debug_rows: List[Dict[str, Any]] = [] if debug else []
  558 + for idx, hit in enumerate(es_hits):
  559 + fine_score = _to_score(scores[idx])
  560 + hit["_fine_score"] = fine_score
  561 + if debug:
  562 + row: Dict[str, Any] = {
  563 + "doc_id": hit.get("_id"),
  564 + "fine_score": fine_score,
  565 + }
  566 + if rerank_debug_rows is not None and idx < len(rerank_debug_rows):
  567 + row["rerank_input"] = rerank_debug_rows[idx]
  568 + debug_rows.append(row)
  569 +
  570 + es_hits.sort(key=lambda h: h.get("_fine_score", 0.0), reverse=True)
  571 + return scores, meta, debug_rows
search/searcher.py
@@ -133,7 +133,14 @@ class Searcher: @@ -133,7 +133,14 @@ class Searcher:
133 source_fields=self.source_fields, 133 source_fields=self.source_fields,
134 function_score_config=self.config.function_score, 134 function_score_config=self.config.function_score,
135 default_language=self.config.query_config.default_language, 135 default_language=self.config.query_config.default_language,
136 - knn_boost=self.config.query_config.knn_boost, 136 + knn_text_boost=self.config.query_config.knn_text_boost,
  137 + knn_image_boost=self.config.query_config.knn_image_boost,
  138 + knn_text_k=self.config.query_config.knn_text_k,
  139 + knn_text_num_candidates=self.config.query_config.knn_text_num_candidates,
  140 + knn_text_k_long=self.config.query_config.knn_text_k_long,
  141 + knn_text_num_candidates_long=self.config.query_config.knn_text_num_candidates_long,
  142 + knn_image_k=self.config.query_config.knn_image_k,
  143 + knn_image_num_candidates=self.config.query_config.knn_image_num_candidates,
137 base_minimum_should_match=self.config.query_config.base_minimum_should_match, 144 base_minimum_should_match=self.config.query_config.base_minimum_should_match,
138 translation_minimum_should_match=self.config.query_config.translation_minimum_should_match, 145 translation_minimum_should_match=self.config.query_config.translation_minimum_should_match,
139 translation_boost=self.config.query_config.translation_boost, 146 translation_boost=self.config.query_config.translation_boost,
@@ -244,6 +251,30 @@ class Searcher: @@ -244,6 +251,30 @@ class Searcher:
244 return hits_by_id, int(resp.get("took", 0) or 0) 251 return hits_by_id, int(resp.get("took", 0) or 0)
245 252
246 @staticmethod 253 @staticmethod
  254 + def _restore_hits_in_doc_order(
  255 + doc_ids: List[str],
  256 + hits_by_id: Dict[str, Dict[str, Any]],
  257 + ) -> List[Dict[str, Any]]:
  258 + ordered_hits: List[Dict[str, Any]] = []
  259 + for doc_id in doc_ids:
  260 + hit = hits_by_id.get(str(doc_id))
  261 + if hit is not None:
  262 + ordered_hits.append(hit)
  263 + return ordered_hits
  264 +
  265 + @staticmethod
  266 + def _merge_source_specs(*source_specs: Any) -> Optional[Dict[str, Any]]:
  267 + includes: set[str] = set()
  268 + for source_spec in source_specs:
  269 + if not isinstance(source_spec, dict):
  270 + continue
  271 + for field_name in source_spec.get("includes") or []:
  272 + includes.add(str(field_name))
  273 + if not includes:
  274 + return None
  275 + return {"includes": sorted(includes)}
  276 +
  277 + @staticmethod
247 def _has_style_intent(parsed_query: Optional[ParsedQuery]) -> bool: 278 def _has_style_intent(parsed_query: Optional[ParsedQuery]) -> bool:
248 profile = getattr(parsed_query, "style_intent_profile", None) 279 profile = getattr(parsed_query, "style_intent_profile", None)
249 return bool(getattr(profile, "is_active", False)) 280 return bool(getattr(profile, "is_active", False))
@@ -320,20 +351,33 @@ class Searcher: @@ -320,20 +351,33 @@ class Searcher:
320 index_langs = tenant_cfg.get("index_languages") or [] 351 index_langs = tenant_cfg.get("index_languages") or []
321 enable_translation = len(index_langs) > 0 352 enable_translation = len(index_langs) > 0
322 enable_embedding = self.config.query_config.enable_text_embedding 353 enable_embedding = self.config.query_config.enable_text_embedding
  354 + coarse_cfg = self.config.coarse_rank
  355 + fine_cfg = self.config.fine_rank
323 rc = self.config.rerank 356 rc = self.config.rerank
324 effective_query_template = rerank_query_template or rc.rerank_query_template 357 effective_query_template = rerank_query_template or rc.rerank_query_template
325 effective_doc_template = rerank_doc_template or rc.rerank_doc_template 358 effective_doc_template = rerank_doc_template or rc.rerank_doc_template
  359 + fine_query_template = fine_cfg.rerank_query_template or effective_query_template
  360 + fine_doc_template = fine_cfg.rerank_doc_template or effective_doc_template
326 # 重排开关优先级:请求参数显式传值 > 服务端配置(默认开启) 361 # 重排开关优先级:请求参数显式传值 > 服务端配置(默认开启)
327 rerank_enabled_by_config = bool(rc.enabled) 362 rerank_enabled_by_config = bool(rc.enabled)
328 do_rerank = rerank_enabled_by_config if enable_rerank is None else bool(enable_rerank) 363 do_rerank = rerank_enabled_by_config if enable_rerank is None else bool(enable_rerank)
329 rerank_window = rc.rerank_window 364 rerank_window = rc.rerank_window
  365 + coarse_input_window = max(rerank_window, int(coarse_cfg.input_window))
  366 + coarse_output_window = max(rerank_window, int(coarse_cfg.output_window))
  367 + fine_input_window = max(rerank_window, int(fine_cfg.input_window))
  368 + fine_output_window = max(rerank_window, int(fine_cfg.output_window))
330 # 若开启重排且请求范围在窗口内:从 ES 取前 rerank_window 条、重排后再按 from/size 分页;否则不重排,按原 from/size 查 ES 369 # 若开启重排且请求范围在窗口内:从 ES 取前 rerank_window 条、重排后再按 from/size 分页;否则不重排,按原 from/size 查 ES
331 in_rerank_window = do_rerank and (from_ + size) <= rerank_window 370 in_rerank_window = do_rerank and (from_ + size) <= rerank_window
332 es_fetch_from = 0 if in_rerank_window else from_ 371 es_fetch_from = 0 if in_rerank_window else from_
333 - es_fetch_size = rerank_window if in_rerank_window else size 372 + es_fetch_size = coarse_input_window if in_rerank_window else size
334 373
335 es_score_normalization_factor: Optional[float] = None 374 es_score_normalization_factor: Optional[float] = None
336 initial_ranks_by_doc: Dict[str, int] = {} 375 initial_ranks_by_doc: Dict[str, int] = {}
  376 + coarse_ranks_by_doc: Dict[str, int] = {}
  377 + fine_ranks_by_doc: Dict[str, int] = {}
  378 + rerank_ranks_by_doc: Dict[str, int] = {}
  379 + coarse_debug_info: Optional[Dict[str, Any]] = None
  380 + fine_debug_info: Optional[Dict[str, Any]] = None
337 rerank_debug_info: Optional[Dict[str, Any]] = None 381 rerank_debug_info: Optional[Dict[str, Any]] = None
338 382
339 # Start timing 383 # Start timing
@@ -360,12 +404,19 @@ class Searcher: @@ -360,12 +404,19 @@ class Searcher:
360 'enable_rerank_request': enable_rerank, 404 'enable_rerank_request': enable_rerank,
361 'rerank_query_template': effective_query_template, 405 'rerank_query_template': effective_query_template,
362 'rerank_doc_template': effective_doc_template, 406 'rerank_doc_template': effective_doc_template,
  407 + 'fine_query_template': fine_query_template,
  408 + 'fine_doc_template': fine_doc_template,
363 'filters': filters, 409 'filters': filters,
364 'range_filters': range_filters, 410 'range_filters': range_filters,
365 'facets': facets, 411 'facets': facets,
366 'enable_translation': enable_translation, 412 'enable_translation': enable_translation,
367 'enable_embedding': enable_embedding, 413 'enable_embedding': enable_embedding,
368 'enable_rerank': do_rerank, 414 'enable_rerank': do_rerank,
  415 + 'coarse_input_window': coarse_input_window,
  416 + 'coarse_output_window': coarse_output_window,
  417 + 'fine_input_window': fine_input_window,
  418 + 'fine_output_window': fine_output_window,
  419 + 'rerank_window': rerank_window,
369 'min_score': min_score, 420 'min_score': min_score,
370 'sort_by': sort_by, 421 'sort_by': sort_by,
371 'sort_order': sort_order 422 'sort_order': sort_order
@@ -463,16 +514,12 @@ class Searcher: @@ -463,16 +514,12 @@ class Searcher:
463 # Keep requested response _source semantics for the final response fill. 514 # Keep requested response _source semantics for the final response fill.
464 response_source_spec = es_query.get("_source") 515 response_source_spec = es_query.get("_source")
465 516
466 - # In rerank window, first pass only fetches minimal fields required by rerank template. 517 + # In multi-stage rank window, first pass only needs score signals for coarse rank.
467 es_query_for_fetch = es_query 518 es_query_for_fetch = es_query
468 rerank_prefetch_source = None 519 rerank_prefetch_source = None
469 if in_rerank_window: 520 if in_rerank_window:
470 - rerank_prefetch_source = self._resolve_rerank_source_filter(  
471 - effective_doc_template,  
472 - parsed_query=parsed_query,  
473 - )  
474 es_query_for_fetch = dict(es_query) 521 es_query_for_fetch = dict(es_query)
475 - es_query_for_fetch["_source"] = rerank_prefetch_source 522 + es_query_for_fetch["_source"] = False
476 523
477 # Extract size and from from body for ES client parameters 524 # Extract size and from from body for ES client parameters
478 body_for_es = {k: v for k, v in es_query_for_fetch.items() if k not in ['size', 'from']} 525 body_for_es = {k: v for k, v in es_query_for_fetch.items() if k not in ['size', 'from']}
@@ -580,26 +627,148 @@ class Searcher: @@ -580,26 +627,148 @@ class Searcher:
580 context.end_stage(RequestContextStage.ELASTICSEARCH_SEARCH_PRIMARY) 627 context.end_stage(RequestContextStage.ELASTICSEARCH_SEARCH_PRIMARY)
581 628
582 style_intent_decisions: Dict[str, SkuSelectionDecision] = {} 629 style_intent_decisions: Dict[str, SkuSelectionDecision] = {}
583 - if self._has_style_intent(parsed_query) and in_rerank_window:  
584 - style_intent_decisions = self._apply_style_intent_to_hits(  
585 - es_response.get("hits", {}).get("hits") or [],  
586 - parsed_query,  
587 - context=context,  
588 - )  
589 - if style_intent_decisions: 630 + if do_rerank and in_rerank_window:
  631 + from dataclasses import asdict
  632 + from config.services_config import get_rerank_backend_config, get_rerank_service_url
  633 + from .rerank_client import coarse_resort_hits, run_lightweight_rerank, run_rerank
  634 +
  635 + rerank_query = parsed_query.text_for_rerank() if parsed_query else query
  636 + hits = es_response.get("hits", {}).get("hits") or []
  637 +
  638 + context.start_stage(RequestContextStage.COARSE_RANKING)
  639 + try:
  640 + coarse_debug = coarse_resort_hits(
  641 + hits,
  642 + fusion=coarse_cfg.fusion,
  643 + debug=debug,
  644 + )
  645 + hits = hits[:coarse_output_window]
  646 + es_response.setdefault("hits", {})["hits"] = hits
  647 + if debug:
  648 + coarse_ranks_by_doc = {
  649 + str(hit.get("_id")): rank
  650 + for rank, hit in enumerate(hits, 1)
  651 + if hit.get("_id") is not None
  652 + }
  653 + if debug:
  654 + coarse_debug_info = {
  655 + "docs_in": es_fetch_size,
  656 + "docs_out": len(hits),
  657 + "fusion": asdict(coarse_cfg.fusion),
  658 + }
  659 + context.store_intermediate_result("coarse_rank_scores", coarse_debug)
590 context.logger.info( 660 context.logger.info(
591 - "款式意图 SKU 预筛选完成 | hits=%s",  
592 - len(style_intent_decisions), 661 + "粗排完成 | docs_in=%s | docs_out=%s",
  662 + es_fetch_size,
  663 + len(hits),
593 extra={'reqid': context.reqid, 'uid': context.uid} 664 extra={'reqid': context.reqid, 'uid': context.uid}
594 ) 665 )
  666 + finally:
  667 + context.end_stage(RequestContextStage.COARSE_RANKING)
  668 +
  669 + ranking_source_spec = self._merge_source_specs(
  670 + self._resolve_rerank_source_filter(
  671 + fine_doc_template,
  672 + parsed_query=parsed_query,
  673 + ),
  674 + self._resolve_rerank_source_filter(
  675 + effective_doc_template,
  676 + parsed_query=parsed_query,
  677 + ),
  678 + )
  679 + candidate_ids = [str(h.get("_id")) for h in hits if h.get("_id") is not None]
  680 + if candidate_ids:
  681 + details_by_id, fill_took = self._fetch_hits_by_ids(
  682 + index_name=index_name,
  683 + doc_ids=candidate_ids,
  684 + source_spec=ranking_source_spec,
  685 + )
  686 + for hit in hits:
  687 + hid = hit.get("_id")
  688 + if hid is None:
  689 + continue
  690 + detail_hit = details_by_id.get(str(hid))
  691 + if detail_hit is not None and "_source" in detail_hit:
  692 + hit["_source"] = detail_hit.get("_source") or {}
  693 + if fill_took:
  694 + es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
  695 +
  696 + if self._has_style_intent(parsed_query):
  697 + style_intent_decisions = self._apply_style_intent_to_hits(
  698 + es_response.get("hits", {}).get("hits") or [],
  699 + parsed_query,
  700 + context=context,
  701 + )
  702 + if style_intent_decisions:
  703 + context.logger.info(
  704 + "款式意图 SKU 预筛选完成 | hits=%s",
  705 + len(style_intent_decisions),
  706 + extra={'reqid': context.reqid, 'uid': context.uid}
  707 + )
  708 +
  709 + fine_scores: Optional[List[float]] = None
  710 + hits = es_response.get("hits", {}).get("hits") or []
  711 + if fine_cfg.enabled and hits:
  712 + context.start_stage(RequestContextStage.FINE_RANKING)
  713 + try:
  714 + fine_scores, fine_meta, fine_debug_rows = run_lightweight_rerank(
  715 + query=rerank_query,
  716 + es_hits=hits[:fine_input_window],
  717 + language=language,
  718 + timeout_sec=fine_cfg.timeout_sec,
  719 + rerank_query_template=fine_query_template,
  720 + rerank_doc_template=fine_doc_template,
  721 + top_n=fine_output_window,
  722 + debug=debug,
  723 + service_profile=fine_cfg.service_profile,
  724 + )
  725 + if fine_scores is not None:
  726 + hits = hits[:fine_output_window]
  727 + es_response["hits"]["hits"] = hits
  728 + if debug:
  729 + fine_ranks_by_doc = {
  730 + str(hit.get("_id")): rank
  731 + for rank, hit in enumerate(hits, 1)
  732 + if hit.get("_id") is not None
  733 + }
  734 + fine_backend_name, fine_backend_cfg = get_rerank_backend_config(fine_cfg.service_profile)
  735 + fine_debug_info = {
  736 + "service_profile": fine_cfg.service_profile,
  737 + "service_url": get_rerank_service_url(profile=fine_cfg.service_profile),
  738 + "backend": fine_backend_name,
  739 + "model": fine_meta.get("model") if isinstance(fine_meta, dict) else None,
  740 + "backend_model_name": fine_backend_cfg.get("model_name"),
  741 + "query_template": fine_query_template,
  742 + "doc_template": fine_doc_template,
  743 + "query_text": str(fine_query_template).format_map({"query": rerank_query}),
  744 + "docs_in": min(len(fine_scores), fine_input_window),
  745 + "docs_out": len(hits),
  746 + "top_n": fine_output_window,
  747 + "meta": fine_meta,
  748 + }
  749 + context.store_intermediate_result("fine_rank_scores", fine_debug_rows)
  750 + context.logger.info(
  751 + "精排完成 | docs=%s | top_n=%s | meta=%s",
  752 + len(hits),
  753 + fine_output_window,
  754 + fine_meta,
  755 + extra={'reqid': context.reqid, 'uid': context.uid}
  756 + )
  757 + except Exception as e:
  758 + context.add_warning(f"Fine rerank failed: {e}")
  759 + context.logger.warning(
  760 + f"调用精排服务失败 | error: {e}",
  761 + extra={'reqid': context.reqid, 'uid': context.uid},
  762 + exc_info=True,
  763 + )
  764 + finally:
  765 + context.end_stage(RequestContextStage.FINE_RANKING)
595 766
596 - # Optional Step 4.5: AI reranking(仅当请求范围在重排窗口内时执行)  
597 - if do_rerank and in_rerank_window:  
598 context.start_stage(RequestContextStage.RERANKING) 767 context.start_stage(RequestContextStage.RERANKING)
599 try: 768 try:
600 - from .rerank_client import run_rerank  
601 -  
602 - rerank_query = parsed_query.text_for_rerank() if parsed_query else query 769 + final_hits = es_response.get("hits", {}).get("hits") or []
  770 + final_input = final_hits[:rerank_window]
  771 + es_response["hits"]["hits"] = final_input
603 es_response, rerank_meta, fused_debug = run_rerank( 772 es_response, rerank_meta, fused_debug = run_rerank(
604 query=rerank_query, 773 query=rerank_query,
605 es_response=es_response, 774 es_response=es_response,
@@ -612,19 +781,30 @@ class Searcher: @@ -612,19 +781,30 @@ class Searcher:
612 top_n=(from_ + size), 781 top_n=(from_ + size),
613 debug=debug, 782 debug=debug,
614 fusion=rc.fusion, 783 fusion=rc.fusion,
  784 + fine_scores=fine_scores[:len(final_input)] if fine_scores is not None else None,
  785 + service_profile=rc.service_profile,
615 style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost, 786 style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
616 ) 787 )
617 788
618 if rerank_meta is not None: 789 if rerank_meta is not None:
619 if debug: 790 if debug:
620 - from dataclasses import asdict  
621 - from config.services_config import get_rerank_service_url 791 + rerank_ranks_by_doc = {
  792 + str(hit.get("_id")): rank
  793 + for rank, hit in enumerate(es_response.get("hits", {}).get("hits") or [], 1)
  794 + if hit.get("_id") is not None
  795 + }
  796 + rerank_backend_name, rerank_backend_cfg = get_rerank_backend_config(rc.service_profile)
622 rerank_debug_info = { 797 rerank_debug_info = {
623 - "service_url": get_rerank_service_url(), 798 + "service_profile": rc.service_profile,
  799 + "service_url": get_rerank_service_url(profile=rc.service_profile),
  800 + "backend": rerank_backend_name,
  801 + "model": rerank_meta.get("model") if isinstance(rerank_meta, dict) else None,
  802 + "backend_model_name": rerank_backend_cfg.get("model_name"),
624 "query_template": effective_query_template, 803 "query_template": effective_query_template,
625 "doc_template": effective_doc_template, 804 "doc_template": effective_doc_template,
626 "query_text": str(effective_query_template).format_map({"query": rerank_query}), 805 "query_text": str(effective_query_template).format_map({"query": rerank_query}),
627 - "docs": len(es_response.get("hits", {}).get("hits") or []), 806 + "docs_in": len(final_input),
  807 + "docs_out": len(es_response.get("hits", {}).get("hits") or []),
628 "top_n": from_ + size, 808 "top_n": from_ + size,
629 "meta": rerank_meta, 809 "meta": rerank_meta,
630 "fusion": asdict(rc.fusion), 810 "fusion": asdict(rc.fusion),
@@ -645,15 +825,17 @@ class Searcher: @@ -645,15 +825,17 @@ class Searcher:
645 finally: 825 finally:
646 context.end_stage(RequestContextStage.RERANKING) 826 context.end_stage(RequestContextStage.RERANKING)
647 827
648 - # 当本次请求在重排窗口内时:已从 ES 取了 rerank_window 条并可能已重排,需按请求的 from/size 做分页切片 828 + # 当本次请求在重排窗口内时:已按多阶段排序产出前 rerank_window 条,需按请求的 from/size 做分页切片
649 if in_rerank_window: 829 if in_rerank_window:
650 hits = es_response.get("hits", {}).get("hits") or [] 830 hits = es_response.get("hits", {}).get("hits") or []
651 sliced = hits[from_ : from_ + size] 831 sliced = hits[from_ : from_ + size]
652 es_response.setdefault("hits", {})["hits"] = sliced 832 es_response.setdefault("hits", {})["hits"] = sliced
653 if sliced: 833 if sliced:
654 - # 对于启用重排的结果,优先使用 _fused_score 计算 max_score;否则退回原始 _score  
655 slice_max = max( 834 slice_max = max(
656 - (h.get("_fused_score", h.get("_score", 0.0)) for h in sliced), 835 + (
  836 + h.get("_fused_score", h.get("_fine_score", h.get("_coarse_score", h.get("_score", 0.0))))
  837 + for h in sliced
  838 + ),
657 default=0.0, 839 default=0.0,
658 ) 840 )
659 try: 841 try:
@@ -663,7 +845,6 @@ class Searcher: @@ -663,7 +845,6 @@ class Searcher:
663 else: 845 else:
664 es_response["hits"]["max_score"] = 0.0 846 es_response["hits"]["max_score"] = 0.0
665 847
666 - # Page fill: fetch detailed fields only for final page hits.  
667 if sliced: 848 if sliced:
668 if response_source_spec is False: 849 if response_source_spec is False:
669 for hit in sliced: 850 for hit in sliced:
@@ -747,6 +928,26 @@ class Searcher: @@ -747,6 +928,26 @@ class Searcher:
747 if doc_id is None: 928 if doc_id is None:
748 continue 929 continue
749 rerank_debug_by_doc[str(doc_id)] = item 930 rerank_debug_by_doc[str(doc_id)] = item
  931 + coarse_debug_raw = context.get_intermediate_result('coarse_rank_scores', None)
  932 + coarse_debug_by_doc: Dict[str, Dict[str, Any]] = {}
  933 + if isinstance(coarse_debug_raw, list):
  934 + for item in coarse_debug_raw:
  935 + if not isinstance(item, dict):
  936 + continue
  937 + doc_id = item.get("doc_id")
  938 + if doc_id is None:
  939 + continue
  940 + coarse_debug_by_doc[str(doc_id)] = item
  941 + fine_debug_raw = context.get_intermediate_result('fine_rank_scores', None)
  942 + fine_debug_by_doc: Dict[str, Dict[str, Any]] = {}
  943 + if isinstance(fine_debug_raw, list):
  944 + for item in fine_debug_raw:
  945 + if not isinstance(item, dict):
  946 + continue
  947 + doc_id = item.get("doc_id")
  948 + if doc_id is None:
  949 + continue
  950 + fine_debug_by_doc[str(doc_id)] = item
750 951
751 if self._has_style_intent(parsed_query): 952 if self._has_style_intent(parsed_query):
752 if style_intent_decisions: 953 if style_intent_decisions:
@@ -777,6 +978,12 @@ class Searcher: @@ -777,6 +978,12 @@ class Searcher:
777 rerank_debug = None 978 rerank_debug = None
778 if doc_id is not None: 979 if doc_id is not None:
779 rerank_debug = rerank_debug_by_doc.get(str(doc_id)) 980 rerank_debug = rerank_debug_by_doc.get(str(doc_id))
  981 + coarse_debug = None
  982 + if doc_id is not None:
  983 + coarse_debug = coarse_debug_by_doc.get(str(doc_id))
  984 + fine_debug = None
  985 + if doc_id is not None:
  986 + fine_debug = fine_debug_by_doc.get(str(doc_id))
780 style_intent_debug = None 987 style_intent_debug = None
781 if doc_id is not None and style_intent_decisions: 988 if doc_id is not None and style_intent_decisions:
782 decision = style_intent_decisions.get(str(doc_id)) 989 decision = style_intent_decisions.get(str(doc_id))
@@ -811,11 +1018,17 @@ class Searcher: @@ -811,11 +1018,17 @@ class Searcher:
811 "vendor_multilingual": vendor_multilingual, 1018 "vendor_multilingual": vendor_multilingual,
812 } 1019 }
813 1020
  1021 + if coarse_debug:
  1022 + debug_entry["coarse_score"] = coarse_debug.get("coarse_score")
  1023 + debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor")
  1024 + debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor")
  1025 +
814 # 若存在重排调试信息,则补充 doc 级别的融合分数信息 1026 # 若存在重排调试信息,则补充 doc 级别的融合分数信息
815 if rerank_debug: 1027 if rerank_debug:
816 debug_entry["doc_id"] = rerank_debug.get("doc_id") 1028 debug_entry["doc_id"] = rerank_debug.get("doc_id")
817 # 与 rerank_client 中字段保持一致,便于前端直接使用 1029 # 与 rerank_client 中字段保持一致,便于前端直接使用
818 debug_entry["rerank_score"] = rerank_debug.get("rerank_score") 1030 debug_entry["rerank_score"] = rerank_debug.get("rerank_score")
  1031 + debug_entry["fine_score"] = rerank_debug.get("fine_score")
819 debug_entry["text_score"] = rerank_debug.get("text_score") 1032 debug_entry["text_score"] = rerank_debug.get("text_score")
820 debug_entry["text_source_score"] = rerank_debug.get("text_source_score") 1033 debug_entry["text_source_score"] = rerank_debug.get("text_source_score")
821 debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score") 1034 debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score")
@@ -826,11 +1039,70 @@ class Searcher: @@ -826,11 +1039,70 @@ class Searcher:
826 debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es") 1039 debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es")
827 debug_entry["knn_score"] = rerank_debug.get("knn_score") 1040 debug_entry["knn_score"] = rerank_debug.get("knn_score")
828 debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") 1041 debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
  1042 + debug_entry["fine_factor"] = rerank_debug.get("fine_factor")
829 debug_entry["text_factor"] = rerank_debug.get("text_factor") 1043 debug_entry["text_factor"] = rerank_debug.get("text_factor")
830 debug_entry["knn_factor"] = rerank_debug.get("knn_factor") 1044 debug_entry["knn_factor"] = rerank_debug.get("knn_factor")
831 debug_entry["fused_score"] = rerank_debug.get("fused_score") 1045 debug_entry["fused_score"] = rerank_debug.get("fused_score")
832 debug_entry["rerank_input"] = rerank_debug.get("rerank_input") 1046 debug_entry["rerank_input"] = rerank_debug.get("rerank_input")
833 debug_entry["matched_queries"] = rerank_debug.get("matched_queries") 1047 debug_entry["matched_queries"] = rerank_debug.get("matched_queries")
  1048 + elif fine_debug:
  1049 + debug_entry["doc_id"] = fine_debug.get("doc_id")
  1050 + debug_entry["fine_score"] = fine_debug.get("fine_score")
  1051 + debug_entry["rerank_input"] = fine_debug.get("rerank_input")
  1052 +
  1053 + initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1054 + coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1055 + fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1056 + rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1057 + final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1058 +
  1059 + def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]:
  1060 + if previous_rank is None or current_rank is None:
  1061 + return None
  1062 + return previous_rank - current_rank
  1063 +
  1064 + debug_entry["ranking_funnel"] = {
  1065 + "es_recall": {
  1066 + "rank": initial_rank,
  1067 + "score": es_score,
  1068 + "normalized_score": normalized,
  1069 + "matched_queries": hit.get("matched_queries"),
  1070 + },
  1071 + "coarse_rank": {
  1072 + "rank": coarse_rank,
  1073 + "rank_change": _rank_change(initial_rank, coarse_rank),
  1074 + "score": coarse_debug.get("coarse_score") if coarse_debug else None,
  1075 + "text_score": coarse_debug.get("text_score") if coarse_debug else None,
  1076 + "knn_score": coarse_debug.get("knn_score") if coarse_debug else None,
  1077 + "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None,
  1078 + "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None,
  1079 + "signals": coarse_debug,
  1080 + },
  1081 + "fine_rank": {
  1082 + "rank": fine_rank,
  1083 + "rank_change": _rank_change(coarse_rank, fine_rank),
  1084 + "score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
  1085 + "rerank_input": fine_debug.get("rerank_input") if fine_debug else None,
  1086 + },
  1087 + "rerank": {
  1088 + "rank": rerank_rank,
  1089 + "rank_change": _rank_change(fine_rank, rerank_rank),
  1090 + "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"),
  1091 + "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"),
  1092 + "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"),
  1093 + "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"),
  1094 + "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"),
  1095 + "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None,
  1096 + "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None,
  1097 + "text_factor": rerank_debug.get("text_factor") if rerank_debug else None,
  1098 + "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None,
  1099 + "signals": rerank_debug,
  1100 + },
  1101 + "final_page": {
  1102 + "rank": final_rank,
  1103 + "rank_change": _rank_change(rerank_rank, final_rank),
  1104 + },
  1105 + }
834 1106
835 if style_intent_debug: 1107 if style_intent_debug:
836 debug_entry["style_intent_sku"] = style_intent_debug 1108 debug_entry["style_intent_sku"] = style_intent_debug
@@ -901,7 +1173,18 @@ class Searcher: @@ -901,7 +1173,18 @@ class Searcher:
901 "shards": es_response.get('_shards', {}), 1173 "shards": es_response.get('_shards', {}),
902 "es_score_normalization_factor": es_score_normalization_factor, 1174 "es_score_normalization_factor": es_score_normalization_factor,
903 }, 1175 },
  1176 + "coarse_rank": coarse_debug_info,
  1177 + "fine_rank": fine_debug_info,
904 "rerank": rerank_debug_info, 1178 "rerank": rerank_debug_info,
  1179 + "ranking_funnel": {
  1180 + "es_recall": {
  1181 + "docs_out": es_fetch_size,
  1182 + "score_normalization_factor": es_score_normalization_factor,
  1183 + },
  1184 + "coarse_rank": coarse_debug_info,
  1185 + "fine_rank": fine_debug_info,
  1186 + "rerank": rerank_debug_info,
  1187 + },
905 "feature_flags": context.metadata.get('feature_flags', {}), 1188 "feature_flags": context.metadata.get('feature_flags', {}),
906 "stage_timings": { 1189 "stage_timings": {
907 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() 1190 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items()
tests/queries.txt 0 → 100644
@@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
  1 +白色oversized T-shirt
  2 +falda negra oficina
  3 +red fitted tee
  4 +黒いミディ丈スカート
  5 +黑色中长半身裙
  6 +فستان أسود متوسط الطول
  7 +чёрное летнее платье
  8 +修身牛仔裤
  9 +date night dress
  10 +vacation outfit dress
  11 +minimalist top
  12 +streetwear t-shirt
  13 +office casual blouse
  14 +街头风T恤
  15 +宽松T恤
  16 +复古印花T恤
  17 +Y2K上衣
  18 +情侣T恤
  19 +美式复古T恤
  20 +重磅棉T恤
  21 +修身打底衫
  22 +辣妹风短袖
  23 +纯欲上衣
  24 +正肩白T恤
  25 +波西米亚花朵衬衫
  26 +泡泡袖短袖
  27 +扎染字母T恤
  28 +T-shirt Dress
  29 +Crop Top
  30 +Lace Undershirt
  31 +Leopard Print Ripped T-shirt
  32 +Breton Stripe T-shirt
  33 +V-Neck Cotton T-shirt
  34 +Sweet & Cool Bow T-shirt
  35 +Vacation Style T-shirt
  36 +Commuter Casual Top
  37 +Minimalist Solid T-shirt
  38 +Band T-shirt
  39 +Athletic Gym T-shirt
  40 +Plus Size Loose T-shirt
  41 +Korean Style Slim T-shirt
  42 +Basic Layering Top
  43 +
tests/test_es_query_builder.py
@@ -119,9 +119,12 @@ def test_text_query_skips_duplicate_translation_same_as_base(): @@ -119,9 +119,12 @@ def test_text_query_skips_duplicate_translation_same_as_base():
119 enable_knn=False, 119 enable_knn=False,
120 ) 120 )
121 121
122 - root = _recall_root(q)  
123 - assert root["bool"]["_name"] == "base_query"  
124 - assert [clause["multi_match"]["type"] for clause in root["bool"]["should"]] == ["best_fields", "phrase"] 122 + query_root = q["query"]
  123 + if "function_score" in query_root:
  124 + query_root = query_root["function_score"]["query"]
  125 + base_bool = query_root["bool"]
  126 + assert base_bool["_name"] == "base_query"
  127 + assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"]
125 128
126 129
127 def test_product_title_exclusion_filter_is_applied_once_on_outer_query(): 130 def test_product_title_exclusion_filter_is_applied_once_on_outer_query():
tests/test_es_query_builder_text_recall_languages.py
@@ -11,6 +11,7 @@ from typing import Any, Dict, List @@ -11,6 +11,7 @@ from typing import Any, Dict, List
11 11
12 import numpy as np 12 import numpy as np
13 13
  14 +from query.keyword_extractor import KEYWORDS_QUERY_BASE_KEY
14 from search.es_query_builder import ESQueryBuilder 15 from search.es_query_builder import ESQueryBuilder
15 16
16 17
@@ -129,6 +130,29 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): @@ -129,6 +130,29 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en():
129 assert "title.en" in _title_fields(idx["base_query_trans_en"]) 130 assert "title.en" in _title_fields(idx["base_query_trans_en"])
130 131
131 132
  133 +def test_keywords_combined_fields_second_must_same_fields_and_50pct():
  134 + """When ParsedQuery.keywords_queries is set, must includes a second combined_fields."""
  135 + qb = _builder_multilingual_title_only(default_language="en")
  136 + parsed = SimpleNamespace(
  137 + rewritten_query="连衣裙",
  138 + detected_language="zh",
  139 + translations={"en": "red dress"},
  140 + keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣 裙", "en": "dress"},
  141 + )
  142 + q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
  143 + idx = _clauses_index(q)
  144 + base = idx["base_query"]
  145 + assert len(base["must"]) == 2
  146 + assert base["must"][0]["combined_fields"]["query"] == "连衣裙"
  147 + assert base["must"][1]["combined_fields"]["query"] == "连衣 裙"
  148 + assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%"
  149 + assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"]
  150 + trans = idx["base_query_trans_en"]
  151 + assert len(trans["must"]) == 2
  152 + assert trans["must"][1]["combined_fields"]["query"] == "dress"
  153 + assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%"
  154 +
  155 +
132 def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): 156 def test_en_query_index_zh_en_includes_base_en_and_trans_zh():
133 qb = _builder_multilingual_title_only(default_language="en") 157 qb = _builder_multilingual_title_only(default_language="en")
134 q = _build( 158 q = _build(
@@ -351,7 +375,10 @@ def test_text_clauses_present_alongside_knn(): @@ -351,7 +375,10 @@ def test_text_clauses_present_alongside_knn():
351 parsed_query=parsed, 375 parsed_query=parsed,
352 enable_knn=True, 376 enable_knn=True,
353 ) 377 )
354 - assert "knn" in q 378 + qr = q["query"]
  379 + if "function_score" in qr:
  380 + qr = qr["function_score"]["query"]
  381 + assert any("knn" in c for c in qr["bool"]["should"])
355 idx = _clauses_index(q) 382 idx = _clauses_index(q)
356 assert set(idx) == {"base_query", "base_query_trans_zh"} 383 assert set(idx) == {"base_query", "base_query_trans_zh"}
357 384
tests/test_keywords_query.py 0 → 100644
@@ -0,0 +1,115 @@ @@ -0,0 +1,115 @@
  1 +import hanlp
  2 +from typing import List, Tuple, Dict, Any
  3 +
  4 +class KeywordExtractor:
  5 + """
  6 + 基于 HanLP 的名词关键词提取器
  7 + """
  8 + def __init__(self):
  9 + # 加载带位置信息的分词模型(细粒度)
  10 + self.tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF)
  11 + self.tok.config.output_spans = True # 启用位置输出
  12 +
  13 + # 加载词性标注模型
  14 + self.pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)
  15 +
  16 + def extract_keywords(self, query: str) -> str:
  17 + """
  18 + 从查询中提取关键词(名词,长度 ≥ 2)
  19 +
  20 + Args:
  21 + query: 输入文本
  22 +
  23 + Returns:
  24 + 拼接后的关键词字符串,非连续词之间自动插入空格
  25 + """
  26 + query = query.strip()
  27 + # 分词结果带位置:[[word, start, end], ...]
  28 + tok_result_with_position = self.tok(query)
  29 + tok_result = [x[0] for x in tok_result_with_position]
  30 +
  31 + # 词性标注
  32 + pos_tag_result = list(zip(tok_result, self.pos_tag(tok_result)))
  33 +
  34 + # 需要忽略的词
  35 + ignore_keywords = ['玩具']
  36 +
  37 + keywords = []
  38 + last_end_pos = 0
  39 +
  40 + for (word, postag), (_, start_pos, end_pos) in zip(pos_tag_result, tok_result_with_position):
  41 + if len(word) >= 2 and postag.startswith('N'):
  42 + if word in ignore_keywords:
  43 + continue
  44 + # 如果当前词与上一个词在原文中不连续,插入空格
  45 + if start_pos != last_end_pos and keywords:
  46 + keywords.append(" ")
  47 + keywords.append(word)
  48 + last_end_pos = end_pos
  49 + # 可选:打印调试信息
  50 + # print(f'分词: {word} | 词性: {postag} | 起始: {start_pos} | 结束: {end_pos}')
  51 +
  52 + return "".join(keywords).strip()
  53 +
  54 +
  55 +# 测试代码
  56 +if __name__ == "__main__":
  57 + extractor = KeywordExtractor()
  58 +
  59 + test_queries = [
  60 + # 中文(保留 9 个代表性查询)
  61 + "2.4G遥控大蛇",
  62 + "充气的篮球",
  63 + "遥控 塑料 飞船 汽车 ",
  64 + "亚克力相框",
  65 + "8寸 搪胶蘑菇钉",
  66 + "7寸娃娃",
  67 + "太空沙套装",
  68 + "脚蹬工程车",
  69 + "捏捏乐钥匙扣",
  70 +
  71 + # 英文(新增)
  72 + "plastic toy car",
  73 + "remote control helicopter",
  74 + "inflatable beach ball",
  75 + "music keychain",
  76 + "sand play set",
  77 + # 常见商品搜索
  78 + "plastic dinosaur toy",
  79 + "wireless bluetooth speaker",
  80 + "4K action camera",
  81 + "stainless steel water bottle",
  82 + "baby stroller with cup holder",
  83 +
  84 + # 疑问式 / 自然语言
  85 + "what is the best smartphone under 500 dollars",
  86 + "how to clean a laptop screen",
  87 + "where can I buy organic coffee beans",
  88 +
  89 + # 含数字、特殊字符
  90 + "USB-C to HDMI adapter 4K",
  91 + "LED strip lights 16.4ft",
  92 + "Nintendo Switch OLED model",
  93 + "iPhone 15 Pro Max case",
  94 +
  95 + # 简短词组
  96 + "gaming mouse",
  97 + "mechanical keyboard",
  98 + "wireless earbuds",
  99 +
  100 + # 长尾词
  101 + "rechargeable AA batteries with charger",
  102 + "foldable picnic blanket waterproof",
  103 +
  104 + # 商品属性组合
  105 + "women's running shoes size 8",
  106 + "men's cotton t-shirt crew neck",
  107 +
  108 +
  109 + # 其他语种(保留原样,用于多语言测试)
  110 + "свет USB с пультом дистанционного управления красочные", # 俄语
  111 + ]
  112 +
  113 + for q in test_queries:
  114 + keywords = extractor.extract_keywords(q)
  115 + print(f"{q:30} => {keywords}")
tests/test_search_rerank_window.py
@@ -299,6 +299,73 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): @@ -299,6 +299,73 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
299 assert loaded.rerank.enabled is True 299 assert loaded.rerank.enabled is True
300 300
301 301
  302 +def test_config_loader_parses_named_rerank_instances(tmp_path: Path):
  303 + from config.loader import AppConfigLoader
  304 +
  305 + config_data = {
  306 + "es_index_name": "test_products",
  307 + "field_boosts": {"title.en": 3.0},
  308 + "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}],
  309 + "query_config": {"supported_languages": ["en"], "default_language": "en"},
  310 + "services": {
  311 + "translation": {
  312 + "service_url": "http://localhost:6005",
  313 + "timeout_sec": 3.0,
  314 + "default_model": "dummy-model",
  315 + "default_scene": "general",
  316 + "cache": {"ttl_seconds": 60, "sliding_expiration": True},
  317 + "capabilities": {
  318 + "dummy-model": {
  319 + "enabled": True,
  320 + "backend": "llm",
  321 + "model": "dummy-model",
  322 + "base_url": "http://localhost:6005/v1",
  323 + "timeout_sec": 3.0,
  324 + "use_cache": True,
  325 + }
  326 + },
  327 + },
  328 + "embedding": {
  329 + "provider": "http",
  330 + "providers": {"http": {"text_base_url": "http://localhost:6005", "image_base_url": "http://localhost:6008"}},
  331 + "backend": "tei",
  332 + "backends": {"tei": {"base_url": "http://localhost:8080", "model_id": "dummy-embedding-model"}},
  333 + },
  334 + "rerank": {
  335 + "provider": "http",
  336 + "providers": {
  337 + "http": {
  338 + "instances": {
  339 + "default": {"service_url": "http://localhost:6007/rerank"},
  340 + "fine": {"service_url": "http://localhost:6009/rerank"},
  341 + }
  342 + }
  343 + },
  344 + "default_instance": "default",
  345 + "instances": {
  346 + "default": {"port": 6007, "backend": "qwen3_vllm_score"},
  347 + "fine": {"port": 6009, "backend": "bge"},
  348 + },
  349 + "backends": {
  350 + "bge": {"model_name": "BAAI/bge-reranker-v2-m3"},
  351 + "qwen3_vllm_score": {"model_name": "Qwen/Qwen3-Reranker-0.6B"},
  352 + },
  353 + },
  354 + },
  355 + "spu_config": {"enabled": False},
  356 + "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []},
  357 + }
  358 + config_path = tmp_path / "config.yaml"
  359 + config_path.write_text(yaml.safe_dump(config_data), encoding="utf-8")
  360 +
  361 + loader = AppConfigLoader(config_file=config_path)
  362 + loaded = loader.load(validate=False)
  363 +
  364 + assert loaded.services.rerank.default_instance == "default"
  365 + assert loaded.services.rerank.get_instance("fine").port == 6009
  366 + assert loaded.services.rerank.get_instance("fine").backend == "bge"
  367 +
  368 +
302 def test_searcher_reranks_top_window_by_default(monkeypatch): 369 def test_searcher_reranks_top_window_by_default(monkeypatch):
303 es_client = _FakeESClient() 370 es_client = _FakeESClient()
304 searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client) 371 searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client)
@@ -311,11 +378,18 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): @@ -311,11 +378,18 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
311 378
312 called: Dict[str, Any] = {"count": 0, "docs": 0} 379 called: Dict[str, Any] = {"count": 0, "docs": 0}
313 380
  381 + def _fake_run_lightweight_rerank(**kwargs):
  382 + hits = kwargs["es_hits"]
  383 + for idx, hit in enumerate(hits):
  384 + hit["_fine_score"] = float(len(hits) - idx)
  385 + return [hit["_fine_score"] for hit in hits], {"stage": "fine"}, []
  386 +
314 def _fake_run_rerank(**kwargs): 387 def _fake_run_rerank(**kwargs):
315 called["count"] += 1 388 called["count"] += 1
316 called["docs"] = len(kwargs["es_response"]["hits"]["hits"]) 389 called["docs"] = len(kwargs["es_response"]["hits"]["hits"])
317 return kwargs["es_response"], None, [] 390 return kwargs["es_response"], None, []
318 391
  392 + monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
319 monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank) 393 monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
320 394
321 result = searcher.search( 395 result = searcher.search(
@@ -328,22 +402,101 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): @@ -328,22 +402,101 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
328 ) 402 )
329 403
330 assert called["count"] == 1 404 assert called["count"] == 1
331 - # 应当对配置的 rerank_window 条文档做重排预取  
332 - window = searcher.config.rerank.rerank_window  
333 - assert called["docs"] == window 405 + assert called["docs"] == searcher.config.rerank.rerank_window
334 assert es_client.calls[0]["from_"] == 0 406 assert es_client.calls[0]["from_"] == 0
335 - assert es_client.calls[0]["size"] == window 407 + assert es_client.calls[0]["size"] == searcher.config.coarse_rank.input_window
336 assert es_client.calls[0]["include_named_queries_score"] is True 408 assert es_client.calls[0]["include_named_queries_score"] is True
337 - assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]}  
338 - assert len(es_client.calls) == 2  
339 - assert es_client.calls[1]["size"] == 10 409 + assert es_client.calls[0]["body"]["_source"] is False
  410 + assert len(es_client.calls) == 3
  411 + assert es_client.calls[1]["size"] == max(
  412 + searcher.config.coarse_rank.output_window,
  413 + searcher.config.rerank.rerank_window,
  414 + )
340 assert es_client.calls[1]["from_"] == 0 415 assert es_client.calls[1]["from_"] == 0
341 - assert es_client.calls[1]["body"]["query"]["ids"]["values"] == [str(i) for i in range(20, 30)] 416 + assert es_client.calls[2]["size"] == 10
  417 + assert es_client.calls[2]["from_"] == 0
  418 + assert es_client.calls[2]["body"]["query"]["ids"]["values"] == [str(i) for i in range(20, 30)]
342 assert len(result.results) == 10 419 assert len(result.results) == 10
343 assert result.results[0].spu_id == "20" 420 assert result.results[0].spu_id == "20"
344 assert result.results[0].brief == "brief-20" 421 assert result.results[0].brief == "brief-20"
345 422
346 423
  424 +def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch):
  425 + es_client = _FakeESClient(total_hits=120)
  426 + searcher = _build_searcher(_build_search_config(rerank_enabled=True, rerank_window=20), es_client)
  427 + context = create_request_context(reqid="t-debug", uid="u-debug")
  428 +
  429 + monkeypatch.setattr(
  430 + "search.searcher.get_tenant_config_loader",
  431 + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
  432 + )
  433 +
  434 + def _fake_run_lightweight_rerank(**kwargs):
  435 + hits = kwargs["es_hits"]
  436 + scores = []
  437 + debug_rows = []
  438 + for idx, hit in enumerate(hits):
  439 + score = float(len(hits) - idx)
  440 + hit["_fine_score"] = score
  441 + scores.append(score)
  442 + debug_rows.append(
  443 + {
  444 + "doc_id": hit["_id"],
  445 + "fine_score": score,
  446 + "rerank_input": {"doc_preview": f"product-{hit['_id']}"},
  447 + }
  448 + )
  449 + hits.sort(key=lambda item: item["_fine_score"], reverse=True)
  450 + return scores, {"model": "fine-bge"}, debug_rows
  451 +
  452 + def _fake_run_rerank(**kwargs):
  453 + hits = kwargs["es_response"]["hits"]["hits"]
  454 + fused_debug = []
  455 + for idx, hit in enumerate(hits):
  456 + hit["_rerank_score"] = 10.0 - idx
  457 + hit["_fused_score"] = 100.0 - idx
  458 + hit["_text_score"] = hit.get("_score", 0.0)
  459 + hit["_knn_score"] = 0.0
  460 + fused_debug.append(
  461 + {
  462 + "doc_id": hit["_id"],
  463 + "rerank_score": hit["_rerank_score"],
  464 + "fine_score": hit.get("_fine_score"),
  465 + "text_score": hit["_text_score"],
  466 + "knn_score": 0.0,
  467 + "rerank_factor": 1.0,
  468 + "fine_factor": 1.0,
  469 + "text_factor": 1.0,
  470 + "knn_factor": 1.0,
  471 + "fused_score": hit["_fused_score"],
  472 + "matched_queries": {},
  473 + "rerank_input": {"doc_preview": f"product-{hit['_id']}"},
  474 + }
  475 + )
  476 + return kwargs["es_response"], {"model": "final-reranker"}, fused_debug
  477 +
  478 + monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
  479 + monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
  480 +
  481 + result = searcher.search(
  482 + query="toy",
  483 + tenant_id="162",
  484 + from_=0,
  485 + size=5,
  486 + context=context,
  487 + enable_rerank=True,
  488 + debug=True,
  489 + )
  490 +
  491 + assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80
  492 + assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20
  493 + first = result.debug_info["per_result"][0]["ranking_funnel"]
  494 + assert first["es_recall"]["rank"] is not None
  495 + assert first["coarse_rank"]["score"] is not None
  496 + assert first["fine_rank"]["score"] is not None
  497 + assert first["rerank"]["rerank_score"] is not None
  498 +
  499 +
347 def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch): 500 def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch):
348 es_client = _FakeESClient() 501 es_client = _FakeESClient()
349 searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client) 502 searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client)
@@ -353,6 +506,10 @@ def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch): @@ -353,6 +506,10 @@ def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch):
353 "search.searcher.get_tenant_config_loader", 506 "search.searcher.get_tenant_config_loader",
354 lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}), 507 lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
355 ) 508 )
  509 + monkeypatch.setattr(
  510 + "search.rerank_client.run_lightweight_rerank",
  511 + lambda **kwargs: ([1.0] * len(kwargs["es_hits"]), {"stage": "fine"}, []),
  512 + )
356 monkeypatch.setattr("search.rerank_client.run_rerank", lambda **kwargs: (kwargs["es_response"], None, [])) 513 monkeypatch.setattr("search.rerank_client.run_rerank", lambda **kwargs: (kwargs["es_response"], None, []))
357 514
358 searcher.search( 515 searcher.search(
@@ -365,7 +522,8 @@ def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch): @@ -365,7 +522,8 @@ def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch):
365 rerank_doc_template="{title} {vendor} {brief}", 522 rerank_doc_template="{title} {vendor} {brief}",
366 ) 523 )
367 524
368 - assert es_client.calls[0]["body"]["_source"] == {"includes": ["brief", "title", "vendor"]} 525 + assert es_client.calls[0]["body"]["_source"] is False
  526 + assert es_client.calls[1]["body"]["_source"] == {"includes": ["brief", "title", "vendor"]}
369 527
370 528
371 def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_active(monkeypatch): 529 def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_active(monkeypatch):
@@ -378,6 +536,10 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a @@ -378,6 +536,10 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a
378 lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}), 536 lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
379 ) 537 )
380 monkeypatch.setattr( 538 monkeypatch.setattr(
  539 + "search.rerank_client.run_lightweight_rerank",
  540 + lambda **kwargs: ([1.0] * len(kwargs["es_hits"]), {"stage": "fine"}, []),
  541 + )
  542 + monkeypatch.setattr(
381 "search.rerank_client.run_rerank", 543 "search.rerank_client.run_rerank",
382 lambda **kwargs: (kwargs["es_response"], None, []), 544 lambda **kwargs: (kwargs["es_response"], None, []),
383 ) 545 )
@@ -414,7 +576,8 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a @@ -414,7 +576,8 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a
414 enable_rerank=None, 576 enable_rerank=None,
415 ) 577 )
416 578
417 - assert es_client.calls[0]["body"]["_source"] == { 579 + assert es_client.calls[0]["body"]["_source"] is False
  580 + assert es_client.calls[1]["body"]["_source"] == {
418 "includes": ["option1_name", "option2_name", "option3_name", "skus", "title"] 581 "includes": ["option1_name", "option2_name", "option3_name", "skus", "title"]
419 } 582 }
420 583