ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到

coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker，并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url，这样 fine/final 可以共用同一套服务代码，只起不同实例。

ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到
coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker，并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url，这样 fine/final 可以共用同一套服务代码，只起不同实例。
tangwang
1 parent ceaf6d03
Showing 20 changed files with 1432 additions and 53 deletions Show diff stats
config/__init__.py
config/config.yaml
config/loader.py
config/schema.py
config/services_config.py
context/request_context.py
docs/TODO-ES能力提升.md -> docs/issue-2026-03-21-ES能力提升.md
docs/TODO-意图判断-done.md -> docs/issue-2026-03-21-意图判断-done03-24.md
docs/issue-2026-03-26-ES文本搜索-补充多模态knn放入should-done-0327.md
docs/TODO-keywords限定-done.txt -> docs/issue-2026-03-27-keywords限定-done-0327.txt
docs/TODO.md -> docs/issue.md
docs/TODO.txt -> docs/issue.txt
providers/rerank.py
scripts/experiments/english_query_bucketing_demo.py
scripts/experiments/requirements_query_bucketing_experiments.txt
scripts/temp_embed_tenant_image_urls.py
search/rerank_client.py
search/searcher.py
tests/queries.txt
tests/test_search_rerank_window.py
@@ -4,6 +4,9 @@ from config.config_loader import ConfigLoader, ConfigurationError
 from config.loader import AppConfigLoader, get_app_config, reload_app_config
 from config.schema import (
     AppConfig,
+    CoarseRankConfig,
+    CoarseRankFusionConfig,
+    FineRankConfig,
     FunctionScoreConfig,
     IndexConfig,
     QueryConfig,
@@ -31,8 +34,11 @@ from config.utils import get_domain_fields, get_match_fields_for_index
 __all__ = [
     "AppConfig",
     "AppConfigLoader",
+    "CoarseRankConfig",
+    "CoarseRankFusionConfig",
     "ConfigLoader",
     "ConfigurationError",
+    "FineRankConfig",
     "FunctionScoreConfig",
     "IndexConfig",
     "QueryConfig",
@@ -228,15 +228,40 @@ function_score:
   boost_mode: "multiply"
   functions: []
+# 粗排配置（仅融合 ES 文本/向量信号，不调用模型）
+coarse_rank:
+  enabled: true
+  input_window: 700
+  output_window: 240
+  fusion:
+    text_bias: 0.1
+    text_exponent: 0.35
+    knn_text_weight: 1.0
+    knn_image_weight: 1.0
+    knn_tie_breaker: 0.1
+    knn_bias: 0.6
+    knn_exponent: 0.0
+
+# 精排配置（轻量 reranker）
+fine_rank:
+  enabled: true
+  input_window: 240
+  output_window: 80
+  timeout_sec: 10.0
+  rerank_query_template: "{query}"
+  rerank_doc_template: "{title}"
+  service_profile: "fine"
+
 # 重排配置（provider/URL 在 services.rerank）
 rerank:
   enabled: true
-  rerank_window: 400
+  rerank_window: 80
   timeout_sec: 15.0
   weight_es: 0.4
   weight_ai: 0.6
   rerank_query_template: "{query}"
   rerank_doc_template: "{title}"
+  service_profile: "default"
   # 乘法融合：fused = Π (max(score,0) + bias) ** exponent（rerank / text / knn 三项）
   # 其中 knn_score 先做一层 dis_max：
   #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
@@ -244,6 +269,8 @@ rerank:
   fusion:
     rerank_bias: 0.00001
     rerank_exponent: 1.0
+    fine_bias: 0.00001
+    fine_exponent: 1.0
     text_bias: 0.1
     text_exponent: 0.35
     knn_text_weight: 1.0
@@ -399,6 +426,9 @@ services:
       http:
         base_url: "http://127.0.0.1:6007"
         service_url: "http://127.0.0.1:6007/rerank"
+        service_urls:
+          default: "http://127.0.0.1:6007/rerank"
+          fine: "http://127.0.0.1:6009/rerank"
     request:
       max_docs: 1000
       normalize: true
@@ -27,10 +27,13 @@ except Exception:  # pragma: no cover
 from config.schema import (
     AppConfig,
     AssetsConfig,
+    CoarseRankConfig,
+    CoarseRankFusionConfig,
     ConfigMetadata,
     DatabaseSettings,
     ElasticsearchSettings,
     EmbeddingServiceConfig,
+    FineRankConfig,
     FunctionScoreConfig,
     IndexConfig,
     InfrastructureConfig,
@@ -464,6 +467,11 @@ class AppConfigLoader:
         )
         function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {}
+        coarse_rank_cfg = raw.get("coarse_rank") if isinstance(raw.get("coarse_rank"), dict) else {}
+        coarse_fusion_raw = (
+            coarse_rank_cfg.get("fusion") if isinstance(coarse_rank_cfg.get("fusion"), dict) else {}
+        )
+        fine_rank_cfg = raw.get("fine_rank") if isinstance(raw.get("fine_rank"), dict) else {}
         rerank_cfg = raw.get("rerank") if isinstance(raw.get("rerank"), dict) else {}
         fusion_raw = rerank_cfg.get("fusion") if isinstance(rerank_cfg.get("fusion"), dict) else {}
         spu_cfg = raw.get("spu_config") if isinstance(raw.get("spu_config"), dict) else {}
@@ -477,6 +485,33 @@ class AppConfigLoader:
                 boost_mode=str(function_score_cfg.get("boost_mode") or "multiply"),
                 functions=list(function_score_cfg.get("functions") or []),
             ),
+            coarse_rank=CoarseRankConfig(
+                enabled=bool(coarse_rank_cfg.get("enabled", True)),
+                input_window=int(coarse_rank_cfg.get("input_window", 700)),
+                output_window=int(coarse_rank_cfg.get("output_window", 240)),
+                fusion=CoarseRankFusionConfig(
+                    text_bias=float(coarse_fusion_raw.get("text_bias", 0.1)),
+                    text_exponent=float(coarse_fusion_raw.get("text_exponent", 0.35)),
+                    knn_text_weight=float(coarse_fusion_raw.get("knn_text_weight", 1.0)),
+                    knn_image_weight=float(coarse_fusion_raw.get("knn_image_weight", 1.0)),
+                    knn_tie_breaker=float(coarse_fusion_raw.get("knn_tie_breaker", 0.0)),
+                    knn_bias=float(coarse_fusion_raw.get("knn_bias", 0.6)),
+                    knn_exponent=float(coarse_fusion_raw.get("knn_exponent", 0.2)),
+                ),
+            ),
+            fine_rank=FineRankConfig(
+                enabled=bool(fine_rank_cfg.get("enabled", True)),
+                input_window=int(fine_rank_cfg.get("input_window", 240)),
+                output_window=int(fine_rank_cfg.get("output_window", 80)),
+                timeout_sec=float(fine_rank_cfg.get("timeout_sec", 10.0)),
+                rerank_query_template=str(fine_rank_cfg.get("rerank_query_template") or "{query}"),
+                rerank_doc_template=str(fine_rank_cfg.get("rerank_doc_template") or "{title}"),
+                service_profile=(
+                    str(v)
+                    if (v := fine_rank_cfg.get("service_profile")) not in (None, "")
+                    else "fine"
+                ),
+            ),
             rerank=RerankConfig(
                 enabled=bool(rerank_cfg.get("enabled", True)),
                 rerank_window=int(rerank_cfg.get("rerank_window", 384)),
@@ -485,6 +520,11 @@ class AppConfigLoader:
                 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)),
                 rerank_query_template=str(rerank_cfg.get("rerank_query_template") or "{query}"),
                 rerank_doc_template=str(rerank_cfg.get("rerank_doc_template") or "{title}"),
+                service_profile=(
+                    str(v)
+                    if (v := rerank_cfg.get("service_profile")) not in (None, "")
+                    else None
+                ),
                 fusion=RerankFusionConfig(
                     rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)),
                     rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)),
@@ -495,6 +535,8 @@ class AppConfigLoader:
                     knn_tie_breaker=float(fusion_raw.get("knn_tie_breaker", 0.0)),
                     knn_bias=float(fusion_raw.get("knn_bias", 0.6)),
                     knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)),
+                    fine_bias=float(fusion_raw.get("fine_bias", 0.00001)),
+                    fine_exponent=float(fusion_raw.get("fine_exponent", 1.0)),
                 ),
             ),
             spu_config=SPUConfig(
@@ -117,6 +117,48 @@ class RerankFusionConfig:
     knn_tie_breaker: float = 0.0
     knn_bias: float = 0.6
     knn_exponent: float = 0.2
+    fine_bias: float = 0.00001
+    fine_exponent: float = 1.0
+
+
+@dataclass(frozen=True)
+class CoarseRankFusionConfig:
+    """
+    Multiplicative fusion without model score:
+    fused = (max(text, 0) + text_bias) ** text_exponent
+            * (max(knn, 0) + knn_bias) ** knn_exponent
+    """
+
+    text_bias: float = 0.1
+    text_exponent: float = 0.35
+    knn_text_weight: float = 1.0
+    knn_image_weight: float = 1.0
+    knn_tie_breaker: float = 0.0
+    knn_bias: float = 0.6
+    knn_exponent: float = 0.2
+
+
+@dataclass(frozen=True)
+class CoarseRankConfig:
+    """Search-time coarse ranking configuration."""
+
+    enabled: bool = True
+    input_window: int = 700
+    output_window: int = 240
+    fusion: CoarseRankFusionConfig = field(default_factory=CoarseRankFusionConfig)
+
+
+@dataclass(frozen=True)
+class FineRankConfig:
+    """Search-time lightweight rerank configuration."""
+
+    enabled: bool = True
+    input_window: int = 240
+    output_window: int = 80
+    timeout_sec: float = 10.0
+    rerank_query_template: str = "{query}"
+    rerank_doc_template: str = "{title}"
+    service_profile: Optional[str] = "fine"
 @dataclass(frozen=True)
@@ -130,6 +172,7 @@ class RerankConfig:
     weight_ai: float = 0.6
     rerank_query_template: str = "{query}"
     rerank_doc_template: str = "{title}"
+    service_profile: Optional[str] = None
     fusion: RerankFusionConfig = field(default_factory=RerankFusionConfig)
@@ -141,6 +184,8 @@ class SearchConfig:
     indexes: List[IndexConfig] = field(default_factory=list)
     query_config: QueryConfig = field(default_factory=QueryConfig)
     function_score: FunctionScoreConfig = field(default_factory=FunctionScoreConfig)
+    coarse_rank: CoarseRankConfig = field(default_factory=CoarseRankConfig)
+    fine_rank: FineRankConfig = field(default_factory=FineRankConfig)
     rerank: RerankConfig = field(default_factory=RerankConfig)
     spu_config: SPUConfig = field(default_factory=SPUConfig)
     es_index_name: str = "search_products"
@@ -71,13 +71,20 @@ def get_rerank_backend_config() -&gt; Tuple[str, Dict[str, Any]]:
     return cfg.backend, cfg.get_backend_config()
-def get_rerank_base_url() -> str:
+def get_rerank_base_url(profile: str | None = None) -> str:
     provider_cfg = get_app_config().services.rerank.get_provider_config()
-    base = provider_cfg.get("service_url") or provider_cfg.get("base_url")
+    base = None
+    profile_name = str(profile).strip() if profile else ""
+    if profile_name:
+        service_urls = provider_cfg.get("service_urls")
+        if isinstance(service_urls, dict):
+            base = service_urls.get(profile_name)
+    if not base:
+        base = provider_cfg.get("service_url") or provider_cfg.get("base_url")
     if not base:
         raise ValueError("Rerank service URL is not configured")
     return str(base).rstrip("/")
-def get_rerank_service_url() -> str:
-    return get_rerank_base_url()
+def get_rerank_service_url(profile: str | None = None) -> str:
+    return get_rerank_base_url(profile=profile)
@@ -26,6 +26,8 @@ class RequestContextStage(Enum):
     # ES 按 ID 回源分页详情回填
     ELASTICSEARCH_PAGE_FILL = "elasticsearch_page_fill"
     RESULT_PROCESSING = "result_processing"
+    COARSE_RANKING = "coarse_ranking"
+    FINE_RANKING = "fine_ranking"
     RERANKING = "reranking"
     # 款式意图 SKU 预筛选（StyleSkuSelector.prepare_hits）
     STYLE_SKU_PREPARE_HITS = "style_sku_prepare_hits"
@@ -407,4 +409,4 @@ def clear_current_request_context() -&gt; None:
         reset_request_log_context(tokens)
         delattr(threading.current_thread(), 'request_log_tokens')
     if hasattr(threading.current_thread(), 'request_context'):
-        delattr(threading.current_thread(), 'request_context')
 \ No newline at end of file
+        delattr(threading.current_thread(), 'request_context')
@@ -0,0 +1,72 @@
+目前knn跟query里面是并列的层级，如下：
+{
+  "size": 400,
+  "from": 0,
+  "query": {
+    "bool": {
+      "must": [...
+      ],
+    }
+  },
+  "knn": {
+    "field": "title_embedding",
+    "query_vector": [...],
+    "k": 120,
+    "num_candidates": 400,
+    "boost": 2,
+    "_name": "knn_query"
+  },
+其中query的结构是这样的：  
+"query": {
+    "bool": {
+      "should": [
+        {
+          "bool": {
+            "_name": "base_query", 
+\# 原始query
+          }
+        },
+        {
+          "bool": {
+            "_name": "base_query_trans_zh",
+\# 翻译query。有可能是base_query_trans_en，也有可能两者都有
+            "boost": 0.75
+          }
+        }
+      ],
+      "minimum_should_match": 1
+    }
+  },
+我想把knn放到should里面，和base_query、base_query_trans_zh并列。
+另外，现在过滤是在knn里面单独加了一遍：
+  "knn": {
+    "field": "title_embedding",
+    "query_vector": [...],
+    "k": 120,
+    "num_candidates": 400,
+    "boost": 2,
+    "_name": "knn_query",
+    "filter": {
+      "range": {
+        "min_price": {
+          "gte": 100,
+          "lt": 200
+        }
+      }
+    }
+  }
+现在不需要了。因为knn在query的内层了。共用过滤。
+
+另外：
+我需要再增加一个knn。
+需要参考文本embedding获得的逻辑，
+通过
+curl -X POST "http://localhost:6008/embed/clip_text?normalize=true&priority=1" \
+  -H "Content-Type: application/json" \
+  -d '["纯棉短袖", "street tee"]'
+（用 POST /embed/clip_text 生成多模态文本向量。和文本embedding获取方法类似。注意思考代码如何精简，不要冗余。）
+得到文本的多模态embedding。
+然后在这里补充一个多模态embedding，寻找图片相似的结果，对应的商品图片字段为image_embedding.vector。
+重排融合：之前有knn的配置bias和exponential。现在，文本和图片的embedding相似需要融合，融合方式是dis_max，因此需要配置：
+1）各自的权重和tie_breaker
+2）整个向量方面的权重（bias和exponential）
 \ No newline at end of file
@@ -57,7 +57,7 @@ class HttpRerankProvider:
             return None, None
-def create_rerank_provider() -> HttpRerankProvider:
+def create_rerank_provider(service_profile: Optional[str] = None) -> HttpRerankProvider:
     """Create rerank provider from services config."""
     cfg = get_rerank_config()
     provider = (cfg.provider or "http").strip().lower()
@@ -65,5 +65,5 @@ def create_rerank_provider() -&gt; HttpRerankProvider:
     if provider != "http":
         raise ValueError(f"Unsupported rerank provider: {provider}")
-    url = get_rerank_service_url()
+    url = get_rerank_service_url(profile=service_profile)
     return HttpRerankProvider(service_url=url)
@@ -0,0 +1,554 @@
+#!/usr/bin/env python3
+"""
+Offline experiment: English query bucketing (intersection / boost / drop).
+
+Scheme A: spaCy noun_chunks + head + lemma + rule buckets
+Scheme B: spaCy NP candidates + KeyBERT rerank → intersection vs boost
+Scheme C: YAKE + spaCy noun/POS filter
+
+Run (after deps): python scripts/experiments/english_query_bucketing_demo.py
+Optional: pip install -r scripts/experiments/requirements_query_bucketing_experiments.txt
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
+
+
+# --- shared -----------------------------------------------------------------
+
+_POSSESSIVE_RE = re.compile(r"(['’]s)\b", re.IGNORECASE)
+
+
+def normalize_query(s: str) -> str:
+    s = (s or "").strip()
+    s = _POSSESSIVE_RE.sub("", s)
+    return s
+
+
+@dataclass
+class BucketResult:
+    intersection_terms: List[str] = field(default_factory=list)
+    boost_terms: List[str] = field(default_factory=list)
+    drop_terms: List[str] = field(default_factory=list)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "intersection_terms": self.intersection_terms,
+            "boost_terms": self.boost_terms,
+            "drop_terms": self.drop_terms,
+        }
+
+
+def _dedupe_preserve(seq: Sequence[str]) -> List[str]:
+    seen: Set[str] = set()
+    out: List[str] = []
+    for x in seq:
+        k = x.strip().lower()
+        if not k or k in seen:
+            continue
+        seen.add(k)
+        out.append(x.strip())
+    return out
+
+
+# --- Scheme A: spaCy + rules -------------------------------------------------
+
+WEAK_BOOST_ADJS = frozenset(
+    {
+        "best",
+        "good",
+        "great",
+        "new",
+        "free",
+        "cheap",
+        "top",
+        "fine",
+        "real",
+    }
+)
+
+FUNCTIONAL_DEP = frozenset(
+    {
+        "det",
+        "aux",
+        "auxpass",
+        "prep",
+        "mark",
+        "expl",
+        "cc",
+        "punct",
+        "case",
+    }
+)
+
+# Second pobj under list-like INTJ roots often encodes audience/size (boost, not must-match).
+_DEMOGRAPHIC_NOUNS = frozenset(
+    {
+        "women",
+        "woman",
+        "men",
+        "man",
+        "kids",
+        "kid",
+        "boys",
+        "boy",
+        "girls",
+        "girl",
+        "baby",
+        "babies",
+        "toddler",
+        "adult",
+        "adults",
+    }
+)
+
+
+def _lemma_lower(t) -> str:
+    return ((t.lemma_ or t.text) or "").lower().strip()
+
+
+def _surface_lower(t) -> str:
+    """Lowercased surface form (keeps plural 'headphones' vs lemma 'headphone')."""
+    return (t.text or "").lower().strip()
+
+
+_PRICE_PREP_LEMMAS = frozenset({"under", "over", "below", "above", "within", "between", "near"})
+
+
+def bucket_scheme_a_spacy(query: str, nlp) -> BucketResult:
+    """
+    Dependency-first bucketing: noun_chunks alone mis-parse verbal queries like
+    "noise cancelling headphones" (ROOT verb). Prefer dobj / ROOT product nouns,
+    purpose PP (for …), and brand INTJ/PROPN.
+    """
+    import spacy  # noqa: F401
+
+    # Do not strip possessives ('s) before spaCy: it changes the parse tree
+    # (e.g. "women's running shoes size 8" vs "women running shoes size 8").
+    text = (query or "").strip()
+    doc = nlp(text)
+    intersection: Set[str] = set()
+    boost: Set[str] = set()
+    drop: Set[str] = set()
+
+    stops = nlp.Defaults.stop_words | WEAK_BOOST_ADJS
+
+    def mark_drop(t) -> None:
+        if not t.is_space and not t.is_punct:
+            drop.add(t.text.lower())
+
+    # --- Drops: function words / question words ---
+    for token in doc:
+        if token.is_space or token.is_punct:
+            continue
+        lem = _lemma_lower(token)
+        if token.pos_ in ("DET", "PRON", "AUX", "ADP", "PART", "SCONJ", "CCONJ"):
+            mark_drop(token)
+            continue
+        if token.dep_ in FUNCTIONAL_DEP:
+            mark_drop(token)
+            continue
+        if token.pos_ == "ADV" and lem in {"where", "how", "when", "why", "what", "which"}:
+            mark_drop(token)
+            continue
+        if token.text.lower() in ("'s", "’s"):
+            mark_drop(token)
+            continue
+        if lem in stops and token.pos_ != "PROPN":
+            mark_drop(token)
+
+    pobj_heads_to_demote: Set[int] = set()
+
+    # Purpose / context: "for airplane travel" → boost phrase; demote bare head from intersection
+    for token in doc:
+        if token.dep_ == "prep" and token.text.lower() == "for":
+            for c in token.children:
+                if c.dep_ == "pobj" and c.pos_ in ("NOUN", "PROPN"):
+                    span = doc[c.left_edge.i : c.right_edge.i + 1]
+                    phrase = span.text.strip().lower()
+                    if phrase:
+                        boost.add(phrase)
+                    pobj_heads_to_demote.add(c.i)
+
+    # Price / range: "under 500 dollars" → boost only
+    for token in doc:
+        if token.dep_ != "prep" or _lemma_lower(token) not in _PRICE_PREP_LEMMAS:
+            continue
+        for c in token.children:
+            if c.dep_ == "pobj" and c.pos_ in ("NOUN", "PROPN"):
+                span = doc[c.left_edge.i : c.right_edge.i + 1]
+                phrase = span.text.strip().lower()
+                if phrase:
+                    boost.add(phrase)
+                pobj_heads_to_demote.add(c.i)
+
+    # Direct object product nouns (handles "noise cancelling … headphones")
+    for token in doc:
+        if token.dep_ == "dobj" and token.pos_ in ("NOUN", "PROPN"):
+            if token.i in pobj_heads_to_demote:
+                continue
+            intersection.add(_surface_lower(token))
+
+    # Copular questions / definitions: "what is the best smartphone …"
+    for token in doc:
+        if token.dep_ != "nsubj" or token.pos_ not in ("NOUN", "PROPN"):
+            continue
+        h = token.head
+        if h.pos_ == "AUX" and h.dep_ == "ROOT":
+            intersection.add(_surface_lower(token))
+
+    # Verbal ROOT: modifiers left of dobj → boost phrase (e.g. "noise cancelling")
+    roots = [t for t in doc if t.dep_ == "ROOT"]
+    if roots and roots[0].pos_ == "VERB":
+        root_v = roots[0]
+        for t in doc:
+            if t.dep_ != "dobj" or t.pos_ not in ("NOUN", "PROPN"):
+                continue
+            if t.i in pobj_heads_to_demote:
+                continue
+            parts: List[str] = []
+            for x in doc[: t.i]:
+                if x.is_punct or x.is_space:
+                    continue
+                if x.pos_ in ("DET", "ADP", "PRON"):
+                    continue
+                xl = _lemma_lower(x)
+                if xl in stops:
+                    continue
+                parts.append(x.text.lower())
+            if len(parts) >= 1:
+                boost.add(" ".join(parts))
+
+    # Brand / query lead: INTJ/PROPN ROOT (e.g. Nike …)
+    for token in doc:
+        if token.dep_ == "ROOT" and token.pos_ in ("INTJ", "PROPN"):
+            intersection.add(_surface_lower(token))
+        if token.pos_ == "PROPN":
+            intersection.add(_surface_lower(token))
+
+    _DIMENSION_ROOTS = frozenset({"size", "width", "length", "height", "weight"})
+
+    # "women's running shoes size 8" → shoes ∩, "size 8" boost (not size alone)
+    for token in doc:
+        if token.dep_ != "ROOT" or token.pos_ != "NOUN":
+            continue
+        if _lemma_lower(token) not in _DIMENSION_ROOTS:
+            continue
+        for c in token.children:
+            if c.dep_ == "nsubj" and c.pos_ in ("NOUN", "PROPN"):
+                intersection.add(_surface_lower(c))
+                for ch in c.children:
+                    if ch.dep_ == "compound" and ch.pos_ in ("NOUN", "VERB", "ADJ"):
+                        boost.add(_surface_lower(ch))
+                # Only the dimension head + numbers (not full subtree: left_edge/right_edge is huge)
+                dim_parts = [token.text.lower()]
+                for ch in token.children:
+                    if ch.dep_ == "nummod":
+                        dim_parts.append(ch.text.lower())
+                boost.add(" ".join(dim_parts))
+
+    # ROOT noun product (e.g. "plastic toy car")
+    for token in doc:
+        if token.dep_ == "ROOT" and token.pos_ in ("NOUN", "PROPN"):
+            if _lemma_lower(token) in _DIMENSION_ROOTS and any(
+                c.dep_ == "nsubj" and c.pos_ in ("NOUN", "PROPN") for c in token.children
+            ):
+                continue
+            intersection.add(_surface_lower(token))
+            for c in token.children:
+                if c.dep_ == "compound" and c.pos_ == "NOUN":
+                    boost.add(c.text.lower())
+            if token.i - token.left_edge.i >= 1:
+                comps = [x.text.lower() for x in doc[token.left_edge.i : token.i] if x.dep_ == "compound"]
+                if len(comps) >= 2:
+                    boost.add(" ".join(comps))
+
+    # List-like INTJ head with multiple pobj: first pobj = product head, rest often demographic
+    for token in doc:
+        if token.dep_ != "ROOT" or token.pos_ not in ("INTJ", "VERB", "NOUN"):
+            continue
+        pobjs = sorted(
+            [c for c in token.children if c.dep_ == "pobj" and c.pos_ in ("NOUN", "PROPN")],
+            key=lambda x: x.i,
+        )
+        if len(pobjs) >= 2 and token.pos_ == "INTJ":
+            intersection.add(_surface_lower(pobjs[0]))
+            for extra in pobjs[1:]:
+                if _lemma_lower(extra) in _DEMOGRAPHIC_NOUNS:
+                    boost.add(_surface_lower(extra))
+                else:
+                    intersection.add(_surface_lower(extra))
+        elif len(pobjs) == 1 and token.pos_ == "INTJ":
+            intersection.add(_surface_lower(pobjs[0]))
+
+    # amod under pobj (running → shoes)
+    for token in doc:
+        if token.dep_ == "amod" and token.head.pos_ in ("NOUN", "PROPN"):
+            if token.pos_ == "VERB":
+                boost.add(_surface_lower(token))
+            elif token.pos_ == "ADJ":
+                boost.add(_lemma_lower(token))
+
+    # Genitive possessor (women's shoes → women boost)
+    for token in doc:
+        if token.dep_ == "poss" and token.head.pos_ in ("NOUN", "PROPN"):
+            boost.add(_surface_lower(token))
+
+    # noun_chunks fallback when no dobj/ROOT intersection yet
+    if not intersection:
+        for chunk in doc.noun_chunks:
+            head = chunk.root
+            if head.pos_ not in ("NOUN", "PROPN"):
+                continue
+            # Price / range: "under 500 dollars" → boost, not a product head
+            if head.dep_ == "pobj" and head.head.dep_ == "prep":
+                prep = head.head
+                if _lemma_lower(prep) in _PRICE_PREP_LEMMAS:
+                    boost.add(chunk.text.strip().lower())
+                    continue
+            hl = _surface_lower(head)
+            if hl:
+                intersection.add(hl)
+            for t in chunk:
+                if t == head or t.pos_ != "PROPN":
+                    continue
+                intersection.add(_surface_lower(t))
+            for t in chunk:
+                if t == head:
+                    continue
+                if t.pos_ == "ADJ" or (t.pos_ == "NOUN" and t.dep_ == "compound"):
+                    boost.add(_lemma_lower(t))
+
+    # Remove demoted pobj heads from intersection (purpose / price clause)
+    for i in pobj_heads_to_demote:
+        t = doc[i]
+        intersection.discard(_lemma_lower(t))
+        intersection.discard(_surface_lower(t))
+
+    boost -= intersection
+    boost = {b for b in boost if b.lower() not in stops and b.strip()}
+
+    return BucketResult(
+        intersection_terms=_dedupe_preserve(sorted(intersection)),
+        boost_terms=_dedupe_preserve(sorted(boost)),
+        drop_terms=_dedupe_preserve(sorted(drop)),
+    )
+
+
+# --- Scheme B: spaCy candidates + KeyBERT -----------------------------------
+
+def _spacy_np_candidates(doc) -> List[str]:
+    phrases: List[str] = []
+    for chunk in doc.noun_chunks:
+        t = chunk.text.strip()
+        if len(t) < 2:
+            continue
+        root = chunk.root
+        if root.pos_ not in ("NOUN", "PROPN"):
+            continue
+        phrases.append(t)
+    return phrases
+
+
+def bucket_scheme_b_keybert(query: str, nlp, kw_model) -> BucketResult:
+    text = (query or "").strip()
+    doc = nlp(text)
+    candidates = _spacy_np_candidates(doc)
+    if not candidates:
+        candidates = [text]
+
+    # KeyBERT API: candidate_keywords=... (sentence-transformers backend)
+    try:
+        keywords = kw_model.extract_keywords(
+            text,
+            candidates=candidates,
+            top_n=min(8, max(4, len(candidates) + 2)),
+        )
+    except TypeError:
+        keywords = kw_model.extract_keywords(
+            text,
+            candidate_keywords=candidates,
+            top_n=min(8, max(4, len(candidates) + 2)),
+        )
+    ranked = [k[0].lower().strip() for k in (keywords or []) if k and k[0].strip()]
+
+    intersection: List[str] = []
+    boost: List[str] = []
+    if ranked:
+        intersection.append(ranked[0])
+        if len(ranked) > 1:
+            boost.extend(ranked[1:])
+    # Add remaining spaCy heads not in lists
+    heads: List[str] = []
+    for ch in doc.noun_chunks:
+        h = ch.root
+        if h.pos_ in ("NOUN", "PROPN"):
+            heads.append(_surface_lower(h))
+    for h in heads:
+        if h and h not in intersection and h not in boost:
+            boost.append(h)
+    if not intersection and heads:
+        intersection.append(heads[0])
+        boost = [x for x in boost if x != heads[0]]
+
+    drop_tokens: Set[str] = set()
+    stops = nlp.Defaults.stop_words | WEAK_BOOST_ADJS
+    for token in doc:
+        if token.is_punct:
+            continue
+        lem = (token.lemma_ or token.text).lower()
+        if token.pos_ in ("DET", "ADP", "PART", "PRON", "AUX") or lem in stops:
+            drop_tokens.add(token.text.lower())
+
+    return BucketResult(
+        intersection_terms=_dedupe_preserve(intersection),
+        boost_terms=_dedupe_preserve(boost),
+        drop_terms=sorted(drop_tokens),
+    )
+
+
+# --- Scheme C: YAKE + noun filter --------------------------------------------
+
+def bucket_scheme_c_yake(query: str, nlp, yake_extractor) -> BucketResult:
+    text = (query or "").strip()
+    doc = nlp(text)
+
+    kws = yake_extractor.extract_keywords(text)  # List[Tuple[str, float]] newest yake API may differ
+
+    scored: List[Tuple[str, float]] = []
+    if kws and isinstance(kws[0], (list, tuple)) and len(kws[0]) >= 2:
+        scored = [(str(a).strip(), float(b)) for a, b in kws]
+    else:
+        # older yake returns list of tuples (kw, score)
+        scored = [(str(x[0]).strip(), float(x[1])) for x in kws]
+
+    boost: List[str] = []
+    intersection: List[str] = []
+    for phrase, _score in sorted(scored, key=lambda x: x[1]):  # lower score = more important in YAKE
+        phrase = phrase.lower().strip()
+        if not phrase or len(phrase) < 2:
+            continue
+        sub = nlp(phrase)
+        keep = False
+        head_noun = False
+        for t in sub:
+            if t.is_punct or t.is_space:
+                continue
+            if t.pos_ in ("NOUN", "PROPN"):
+                keep = True
+                if t.dep_ == "ROOT" or t == sub[-1]:
+                    head_noun = True
+        if not keep:
+            continue
+        # top 1–2 important → intersection (very small)
+        if len(intersection) < 2 and head_noun and len(phrase.split()) <= 2:
+            intersection.append(phrase)
+        else:
+            boost.append(phrase)
+
+    drop: Set[str] = set()
+    stops = nlp.Defaults.stop_words | WEAK_BOOST_ADJS
+    for token in doc:
+        if token.is_punct:
+            continue
+        lem = (token.lemma_ or token.text).lower()
+        if token.pos_ in ("DET", "ADP", "PART", "PRON", "AUX") or lem in stops:
+            drop.add(token.text.lower())
+
+    return BucketResult(
+        intersection_terms=_dedupe_preserve(intersection),
+        boost_terms=_dedupe_preserve(boost),
+        drop_terms=sorted(drop),
+    )
+
+
+# --- CLI ---------------------------------------------------------------------
+
+DEFAULT_QUERIES = [
+    "best noise cancelling headphones for airplane travel",
+    "nike running shoes women",
+    "plastic toy car",
+    "what is the best smartphone under 500 dollars",
+    "women's running shoes size 8",
+]
+
+
+def _load_spacy():
+    import spacy
+
+    try:
+        return spacy.load("en_core_web_sm")
+    except OSError:
+        print(
+            "Missing model: run: python -m spacy download en_core_web_sm",
+            file=sys.stderr,
+        )
+        raise
+
+
+def _load_keybert():
+    from keybert import KeyBERT
+
+    # small & fast for demo; swap for larger if needed
+    return KeyBERT(model="paraphrase-MiniLM-L6-v2")
+
+
+def _load_yake():
+    import yake
+
+    return yake.KeywordExtractor(
+        lan="en",
+        n=3,
+        dedupLim=0.9,
+        top=20,
+        features=None,
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="English query bucketing experiments")
+    parser.add_argument(
+        "--queries",
+        nargs="*",
+        default=DEFAULT_QUERIES,
+        help="Queries to run (default: built-in examples)",
+    )
+    parser.add_argument(
+        "--scheme",
+        choices=("a", "b", "c", "all"),
+        default="all",
+    )
+    args = parser.parse_args()
+
+    nlp = _load_spacy()
+    kb = None
+    yk = None
+    if args.scheme in ("b", "all"):
+        kb = _load_keybert()
+    if args.scheme in ("c", "all"):
+        yk = _load_yake()
+
+    for q in args.queries:
+        print("=" * 72)
+        print("QUERY:", q)
+        print("-" * 72)
+        if args.scheme in ("a", "all"):
+            ra = bucket_scheme_a_spacy(q, nlp)
+            print("A  spaCy+rules:", json.dumps(ra.to_dict(), ensure_ascii=False))
+        if args.scheme in ("b", "all") and kb is not None:
+            rb = bucket_scheme_b_keybert(q, nlp, kb)
+            print("B  spaCy+KeyBERT:", json.dumps(rb.to_dict(), ensure_ascii=False))
+        if args.scheme in ("c", "all") and yk is not None:
+            rc = bucket_scheme_c_yake(q, nlp, yk)
+            print("C  YAKE+noun filter:", json.dumps(rc.to_dict(), ensure_ascii=False))
+        print()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,6 @@
+# Optional: English query bucketing experiments
+# After install: python -m spacy download en_core_web_sm
+spacy>=3.7.0
+keybert>=0.8.0
+sentence-transformers>=2.2.0
+yake>=0.4.8
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+"""
+临时脚本：从 ES 遍历指定租户的 image_url，批量调用图片 embedding 服务。
+5 进程并发，每请求最多 8 条 URL。日志打印到标准输出。
+
+用法:
+  source activate.sh   # 会加载 .env，提供 ES_HOST / ES_USERNAME / ES_PASSWORD
+  python scripts/temp_embed_tenant_image_urls.py
+
+未 source 时脚本也会尝试加载项目根目录 .env。
+"""
+
+from __future__ import annotations
+
+import json
+import multiprocessing as mp
+import os
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import urlencode
+
+import requests
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import scan
+
+# 未 source activate.sh 时仍可从项目根 .env 加载（与 ES_HOST / ES_USERNAME / ES_PASSWORD 一致）
+try:
+    from dotenv import load_dotenv
+
+    _ROOT = Path(__file__).resolve().parents[1]
+    load_dotenv(_ROOT / ".env")
+except ImportError:
+    pass
+
+# ---------------------------------------------------------------------------
+# 配置（可按需修改；默认与 .env 中 ES_* 一致，见 config/loader.py）
+# ---------------------------------------------------------------------------
+
+# Elasticsearch（默认读环境变量：ES_HOST、ES_USERNAME、ES_PASSWORD）
+ES_HOST: str = os.getenv("ES_HOST", "http://localhost:9200")
+ES_USERNAME: Optional[str] = os.getenv("ES_USERNAME") or None
+ES_PASSWORD: Optional[str] = os.getenv("ES_PASSWORD") or None
+ES_INDEX: str = "search_products_tenant_163"
+
+# 租户（keyword 字段，字符串）
+TENANT_ID: str = "163"
+
+# 图片 embedding 服务（与文档 7.1.2 一致）
+EMBED_BASE_URL: str = "http://localhost:6008"
+EMBED_PATH: str = "/embed/image"
+EMBED_QUERY: Dict[str, Any] = {
+    "normalize": "true",
+    "priority": "1",  # 与对接文档 curl 一致；批量离线可改为 "0"
+}
+
+# 并发与批量
+WORKER_PROCESSES: int = 5
+URLS_PER_REQUEST: int = 8
+
+# HTTP
+REQUEST_TIMEOUT_SEC: float = 120.0
+
+# ES scan（elasticsearch-py 8+/ES 9：`scan(..., query=...)` 会展开为 `client.search(**kwargs)`，
+# 必须传与 Search API 一致的参数名，例如顶层 `query` = DSL 的 query 子句，不要用裸 `match_all`。）
+SCROLL_CHUNK_SIZE: int = 500
+
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class BatchResult:
+    batch_index: int
+    url_count: int
+    ok: bool
+    status_code: Optional[int]
+    elapsed_sec: float
+    error: Optional[str] = None
+
+
+def _build_embed_url() -> str:
+    q = urlencode(EMBED_QUERY)
+    return f"{EMBED_BASE_URL.rstrip('/')}{EMBED_PATH}?{q}"
+
+
+def _process_batch(payload: Tuple[int, List[str]]) -> BatchResult:
+    batch_index, urls = payload
+    if not urls:
+        return BatchResult(batch_index, 0, True, None, 0.0, None)
+
+    url = _build_embed_url()
+    t0 = time.perf_counter()
+    try:
+        resp = requests.post(
+            url,
+            headers={"Content-Type": "application/json"},
+            data=json.dumps(urls),
+            timeout=REQUEST_TIMEOUT_SEC,
+        )
+        elapsed = time.perf_counter() - t0
+        ok = resp.status_code == 200
+        err: Optional[str] = None
+        if ok:
+            try:
+                body = resp.json()
+                if not isinstance(body, list) or len(body) != len(urls):
+                    ok = False
+                    err = f"response length mismatch or not list: got {type(body).__name__}"
+            except Exception as e:
+                ok = False
+                err = f"json decode: {e}"
+        else:
+            err = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
+
+        worker = mp.current_process().name
+        status = resp.status_code if resp else None
+        ms = elapsed * 1000.0
+        if ok:
+            print(
+                f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
+                f"http={status} elapsed_ms={ms:.2f} ok",
+                flush=True,
+            )
+        else:
+            print(
+                f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
+                f"http={status} elapsed_ms={ms:.2f} FAIL err={err}",
+                flush=True,
+            )
+        return BatchResult(batch_index, len(urls), ok, status, elapsed, err)
+    except Exception as e:
+        elapsed = time.perf_counter() - t0
+        worker = mp.current_process().name
+        print(
+            f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
+            f"http=None elapsed_ms={elapsed * 1000.0:.2f} FAIL err={e}",
+            flush=True,
+        )
+        return BatchResult(batch_index, len(urls), False, None, elapsed, str(e))
+
+
+def _iter_image_urls(es: Elasticsearch) -> List[str]:
+    # 对应 search body: { "query": { "term": { "tenant_id": "..." } } }
+    search_kw: Dict[str, Any] = {
+        "query": {"term": {"tenant_id": TENANT_ID}},
+        "source_includes": ["image_url"],
+    }
+    urls: List[str] = []
+    for hit in scan(
+        es,
+        query=search_kw,
+        index=ES_INDEX,
+        size=SCROLL_CHUNK_SIZE,
+    ):
+        src = hit.get("_source") or {}
+        u = src.get("image_url")
+        if u is None:
+            continue
+        s = str(u).strip()
+        if not s:
+            continue
+        urls.append(s)
+    return urls
+
+
+def main() -> int:
+    t_wall0 = time.perf_counter()
+
+    auth = None
+    if ES_USERNAME and ES_PASSWORD:
+        auth = (ES_USERNAME, ES_PASSWORD)
+
+    es = Elasticsearch([ES_HOST], basic_auth=auth)
+    if not es.ping():
+        print("ERROR: Elasticsearch ping failed", file=sys.stderr)
+        return 1
+
+    print(
+        f"[main] ES={ES_HOST} basic_auth={'yes' if auth else 'no'} "
+        f"index={ES_INDEX} tenant_id={TENANT_ID} "
+        f"workers={WORKER_PROCESSES} urls_per_req={URLS_PER_REQUEST}",
+        flush=True,
+    )
+    print(f"[main] embed_url={_build_embed_url()}", flush=True)
+
+    t_fetch0 = time.perf_counter()
+    all_urls = _iter_image_urls(es)
+    fetch_elapsed = time.perf_counter() - t_fetch0
+    print(
+        f"[main] collected image_url count={len(all_urls)} es_scan_elapsed_sec={fetch_elapsed:.3f}",
+        flush=True,
+    )
+
+    batches: List[List[str]] = []
+    for i in range(0, len(all_urls), URLS_PER_REQUEST):
+        batches.append(all_urls[i : i + URLS_PER_REQUEST])
+
+    if not batches:
+        print("[main] no URLs to process; done.", flush=True)
+        return 0
+
+    tasks = [(idx, batch) for idx, batch in enumerate(batches)]
+    print(f"[main] batches={len(tasks)} (parallel processes={WORKER_PROCESSES})", flush=True)
+
+    t_run0 = time.perf_counter()
+    total_urls = 0
+    success_urls = 0
+    failed_urls = 0
+    ok_batches = 0
+    fail_batches = 0
+    sum_req_sec = 0.0
+
+    with mp.Pool(processes=WORKER_PROCESSES) as pool:
+        for res in pool.imap_unordered(_process_batch, tasks, chunksize=1):
+            total_urls += res.url_count
+            sum_req_sec += res.elapsed_sec
+            if res.ok:
+                ok_batches += 1
+                success_urls += res.url_count
+            else:
+                fail_batches += 1
+                failed_urls += res.url_count
+
+    wall_total = time.perf_counter() - t_wall0
+    run_elapsed = time.perf_counter() - t_run0
+
+    print("---------- summary ----------", flush=True)
+    print(f"tenant_id:              {TENANT_ID}", flush=True)
+    print(f"total documents w/ url: {len(all_urls)}", flush=True)
+    print(f"total batches:          {len(batches)}", flush=True)
+    print(f"batches succeeded:      {ok_batches}", flush=True)
+    print(f"batches failed:         {fail_batches}", flush=True)
+    print(f"urls (success path):    {success_urls}", flush=True)
+    print(f"urls (failed path):     {failed_urls}", flush=True)
+    print(f"ES scan elapsed (s):    {fetch_elapsed:.3f}", flush=True)
+    print(f"embed phase wall (s):   {run_elapsed:.3f}", flush=True)
+    print(f"sum request time (s):   {sum_req_sec:.3f}  (sequential sum, for reference)", flush=True)
+    print(f"total wall time (s):    {wall_total:.3f}", flush=True)
+    print("-----------------------------", flush=True)
+    return 0 if fail_batches == 0 else 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -10,7 +10,7 @@
 from typing import Dict, Any, List, Optional, Tuple
 import logging
-from config.schema import RerankFusionConfig
+from config.schema import CoarseRankFusionConfig, RerankFusionConfig
 from providers import create_rerank_provider
 logger = logging.getLogger(__name__)
@@ -120,6 +120,7 @@ def call_rerank_service(
     docs: List[str],
     timeout_sec: float = DEFAULT_TIMEOUT_SEC,
     top_n: Optional[int] = None,
+    service_profile: Optional[str] = None,
 ) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]]]:
     """
     调用重排服务 POST /rerank，返回分数列表与 meta。
@@ -128,7 +129,7 @@ def call_rerank_service(
     if not docs:
         return [], {}
     try:
-        client = create_rerank_provider()
+        client = create_rerank_provider(service_profile=service_profile)
         return client.rerank(query=query, docs=docs, timeout_sec=timeout_sec, top_n=top_n)
     except Exception as e:
         logger.warning("Rerank request failed: %s", e, exc_info=True)
@@ -240,24 +241,105 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
 def _multiply_fusion_factors(
     rerank_score: float,
+    fine_score: Optional[float],
     text_score: float,
     knn_score: float,
     fusion: RerankFusionConfig,
-) -> Tuple[float, float, float, float]:
-    """(rerank_factor, text_factor, knn_factor, fused_without_style_boost)."""
+) -> Tuple[float, float, float, float, float]:
+    """(rerank_factor, fine_factor, text_factor, knn_factor, fused_without_style_boost)."""
     r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
+    if fine_score is None:
+        f = 1.0
+    else:
+        f = (max(fine_score, 0.0) + fusion.fine_bias) ** fusion.fine_exponent
     t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
     k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
-    return r, t, k, r * t * k
+    return r, f, t, k, r * f * t * k
+
+
+def _multiply_coarse_fusion_factors(
+    text_score: float,
+    knn_score: float,
+    fusion: CoarseRankFusionConfig,
+) -> Tuple[float, float, float]:
+    text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
+    knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
+    return text_factor, knn_factor, text_factor * knn_factor
 def _has_selected_sku(hit: Dict[str, Any]) -> bool:
     return bool(str(hit.get("_style_rerank_suffix") or "").strip())
+def coarse_resort_hits(
+    es_hits: List[Dict[str, Any]],
+    fusion: Optional[CoarseRankFusionConfig] = None,
+    debug: bool = False,
+) -> List[Dict[str, Any]]:
+    """Coarse rank with text/knn fusion only."""
+    if not es_hits:
+        return []
+
+    f = fusion or CoarseRankFusionConfig()
+    coarse_debug: List[Dict[str, Any]] = [] if debug else []
+    for hit in es_hits:
+        es_score = _to_score(hit.get("_score"))
+        matched_queries = hit.get("matched_queries")
+        knn_components = _collect_knn_score_components(matched_queries, f)
+        text_components = _collect_text_score_components(matched_queries, es_score)
+        text_score = text_components["text_score"]
+        knn_score = knn_components["knn_score"]
+        text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors(
+            text_score=text_score,
+            knn_score=knn_score,
+            fusion=f,
+        )
+
+        hit["_text_score"] = text_score
+        hit["_knn_score"] = knn_score
+        hit["_text_knn_score"] = knn_components["text_knn_score"]
+        hit["_image_knn_score"] = knn_components["image_knn_score"]
+        hit["_coarse_score"] = coarse_score
+
+        if debug:
+            coarse_debug.append(
+                {
+                    "doc_id": hit.get("_id"),
+                    "es_score": es_score,
+                    "text_score": text_score,
+                    "text_source_score": text_components["source_score"],
+                    "text_translation_score": text_components["translation_score"],
+                    "text_weighted_source_score": text_components["weighted_source_score"],
+                    "text_weighted_translation_score": text_components["weighted_translation_score"],
+                    "text_primary_score": text_components["primary_text_score"],
+                    "text_support_score": text_components["support_text_score"],
+                    "text_score_fallback_to_es": (
+                        text_score == es_score
+                        and text_components["source_score"] <= 0.0
+                        and text_components["translation_score"] <= 0.0
+                    ),
+                    "text_knn_score": knn_components["text_knn_score"],
+                    "image_knn_score": knn_components["image_knn_score"],
+                    "weighted_text_knn_score": knn_components["weighted_text_knn_score"],
+                    "weighted_image_knn_score": knn_components["weighted_image_knn_score"],
+                    "knn_primary_score": knn_components["primary_knn_score"],
+                    "knn_support_score": knn_components["support_knn_score"],
+                    "knn_score": knn_score,
+                    "coarse_text_factor": text_factor,
+                    "coarse_knn_factor": knn_factor,
+                    "coarse_score": coarse_score,
+                    "matched_queries": matched_queries,
+                }
+            )
+
+    es_hits.sort(key=lambda h: h.get("_coarse_score", h.get("_score", 0.0)), reverse=True)
+    return coarse_debug
+
+
 def fuse_scores_and_resort(
     es_hits: List[Dict[str, Any]],
     rerank_scores: List[float],
+    fine_scores: Optional[List[float]] = None,
     weight_es: float = DEFAULT_WEIGHT_ES,
     weight_ai: float = DEFAULT_WEIGHT_AI,
     fusion: Optional[RerankFusionConfig] = None,
@@ -290,6 +372,8 @@ def fuse_scores_and_resort(
     n = len(es_hits)
     if n == 0 or len(rerank_scores) != n:
         return []
+    if fine_scores is not None and len(fine_scores) != n:
+        fine_scores = None
     f = fusion or RerankFusionConfig()
     fused_debug: List[Dict[str, Any]] = [] if debug else []
@@ -297,13 +381,14 @@ def fuse_scores_and_resort(
     for idx, hit in enumerate(es_hits):
         es_score = _to_score(hit.get("_score"))
         rerank_score = _to_score(rerank_scores[idx])
+        fine_score = _to_score(fine_scores[idx]) if fine_scores is not None else _to_score(hit.get("_fine_score"))
         matched_queries = hit.get("matched_queries")
         knn_components = _collect_knn_score_components(matched_queries, f)
         knn_score = knn_components["knn_score"]
         text_components = _collect_text_score_components(matched_queries, es_score)
         text_score = text_components["text_score"]
-        rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
-            rerank_score, text_score, knn_score, f
+        rerank_factor, fine_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
+            rerank_score, fine_score if fine_scores is not None or "_fine_score" in hit else None, text_score, knn_score, f
         )
         sku_selected = _has_selected_sku(hit)
         style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
@@ -311,6 +396,7 @@ def fuse_scores_and_resort(
         hit["_original_score"] = hit.get("_score")
         hit["_rerank_score"] = rerank_score
+        hit["_fine_score"] = fine_score
         hit["_text_score"] = text_score
         hit["_knn_score"] = knn_score
         hit["_text_knn_score"] = knn_components["text_knn_score"]
@@ -330,6 +416,7 @@ def fuse_scores_and_resort(
                 "doc_id": hit.get("_id"),
                 "es_score": es_score,
                 "rerank_score": rerank_score,
+                "fine_score": fine_score,
                 "text_score": text_score,
                 "text_source_score": text_components["source_score"],
                 "text_translation_score": text_components["translation_score"],
@@ -350,6 +437,7 @@ def fuse_scores_and_resort(
                 "knn_support_score": knn_components["support_knn_score"],
                 "knn_score": knn_score,
                 "rerank_factor": rerank_factor,
+                "fine_factor": fine_factor,
                 "text_factor": text_factor,
                 "knn_factor": knn_factor,
                 "style_intent_selected_sku": sku_selected,
@@ -381,6 +469,8 @@ def run_rerank(
     debug: bool = False,
     fusion: Optional[RerankFusionConfig] = None,
     style_intent_selected_sku_boost: float = 1.2,
+    fine_scores: Optional[List[float]] = None,
+    service_profile: Optional[str] = None,
 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
     """
     完整重排流程：从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
@@ -404,6 +494,7 @@ def run_rerank(
         docs,
         timeout_sec=timeout_sec,
         top_n=top_n,
+        service_profile=service_profile,
     )
     if scores is None or len(scores) != len(hits):
@@ -412,6 +503,7 @@ def run_rerank(
     fused_debug = fuse_scores_and_resort(
         hits,
         scores,
+        fine_scores=fine_scores,
         weight_es=weight_es,
         weight_ai=weight_ai,
         fusion=fusion,
@@ -427,3 +519,53 @@ def run_rerank(
             es_response["hits"]["max_score"] = top
     return es_response, meta, fused_debug
+
+
+def run_lightweight_rerank(
+    query: str,
+    es_hits: List[Dict[str, Any]],
+    language: str = "zh",
+    timeout_sec: float = DEFAULT_TIMEOUT_SEC,
+    rerank_query_template: str = "{query}",
+    rerank_doc_template: str = "{title}",
+    top_n: Optional[int] = None,
+    debug: bool = False,
+    service_profile: Optional[str] = "fine",
+) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
+    """Call lightweight reranker and attach scores to hits without final fusion."""
+    if not es_hits:
+        return [], {}, []
+
+    query_text = str(rerank_query_template).format_map({"query": query})
+    rerank_debug_rows: Optional[List[Dict[str, Any]]] = [] if debug else None
+    docs = build_docs_from_hits(
+        es_hits,
+        language=language,
+        doc_template=rerank_doc_template,
+        debug_rows=rerank_debug_rows,
+    )
+    scores, meta = call_rerank_service(
+        query_text,
+        docs,
+        timeout_sec=timeout_sec,
+        top_n=top_n,
+        service_profile=service_profile,
+    )
+    if scores is None or len(scores) != len(es_hits):
+        return None, None, []
+
+    debug_rows: List[Dict[str, Any]] = [] if debug else []
+    for idx, hit in enumerate(es_hits):
+        fine_score = _to_score(scores[idx])
+        hit["_fine_score"] = fine_score
+        if debug:
+            row: Dict[str, Any] = {
+                "doc_id": hit.get("_id"),
+                "fine_score": fine_score,
+            }
+            if rerank_debug_rows is not None and idx < len(rerank_debug_rows):
+                row["rerank_input"] = rerank_debug_rows[idx]
+            debug_rows.append(row)
+
+    es_hits.sort(key=lambda h: h.get("_fine_score", 0.0), reverse=True)
+    return scores, meta, debug_rows
@@ -251,6 +251,30 @@ class Searcher:
         return hits_by_id, int(resp.get("took", 0) or 0)
     @staticmethod
+    def _restore_hits_in_doc_order(
+        doc_ids: List[str],
+        hits_by_id: Dict[str, Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        ordered_hits: List[Dict[str, Any]] = []
+        for doc_id in doc_ids:
+            hit = hits_by_id.get(str(doc_id))
+            if hit is not None:
+                ordered_hits.append(hit)
+        return ordered_hits
+
+    @staticmethod
+    def _merge_source_specs(*source_specs: Any) -> Optional[Dict[str, Any]]:
+        includes: set[str] = set()
+        for source_spec in source_specs:
+            if not isinstance(source_spec, dict):
+                continue
+            for field_name in source_spec.get("includes") or []:
+                includes.add(str(field_name))
+        if not includes:
+            return None
+        return {"includes": sorted(includes)}
+
+    @staticmethod
     def _has_style_intent(parsed_query: Optional[ParsedQuery]) -> bool:
         profile = getattr(parsed_query, "style_intent_profile", None)
         return bool(getattr(profile, "is_active", False))
@@ -327,20 +351,30 @@ class Searcher:
         index_langs = tenant_cfg.get("index_languages") or []
         enable_translation = len(index_langs) > 0
         enable_embedding = self.config.query_config.enable_text_embedding
+        coarse_cfg = self.config.coarse_rank
+        fine_cfg = self.config.fine_rank
         rc = self.config.rerank
         effective_query_template = rerank_query_template or rc.rerank_query_template
         effective_doc_template = rerank_doc_template or rc.rerank_doc_template
+        fine_query_template = fine_cfg.rerank_query_template or effective_query_template
+        fine_doc_template = fine_cfg.rerank_doc_template or effective_doc_template
         # 重排开关优先级：请求参数显式传值 > 服务端配置（默认开启）
         rerank_enabled_by_config = bool(rc.enabled)
         do_rerank = rerank_enabled_by_config if enable_rerank is None else bool(enable_rerank)
         rerank_window = rc.rerank_window
+        coarse_input_window = max(rerank_window, int(coarse_cfg.input_window))
+        coarse_output_window = max(rerank_window, int(coarse_cfg.output_window))
+        fine_input_window = max(rerank_window, int(fine_cfg.input_window))
+        fine_output_window = max(rerank_window, int(fine_cfg.output_window))
         # 若开启重排且请求范围在窗口内：从 ES 取前 rerank_window 条、重排后再按 from/size 分页；否则不重排，按原 from/size 查 ES
         in_rerank_window = do_rerank and (from_ + size) <= rerank_window
         es_fetch_from = 0 if in_rerank_window else from_
-        es_fetch_size = rerank_window if in_rerank_window else size
+        es_fetch_size = coarse_input_window if in_rerank_window else size
         es_score_normalization_factor: Optional[float] = None
         initial_ranks_by_doc: Dict[str, int] = {}
+        coarse_debug_info: Optional[Dict[str, Any]] = None
+        fine_debug_info: Optional[Dict[str, Any]] = None
         rerank_debug_info: Optional[Dict[str, Any]] = None
         # Start timing
@@ -367,12 +401,19 @@ class Searcher:
             'enable_rerank_request': enable_rerank,
             'rerank_query_template': effective_query_template,
             'rerank_doc_template': effective_doc_template,
+            'fine_query_template': fine_query_template,
+            'fine_doc_template': fine_doc_template,
             'filters': filters,
             'range_filters': range_filters,
             'facets': facets,
             'enable_translation': enable_translation,
             'enable_embedding': enable_embedding,
             'enable_rerank': do_rerank,
+            'coarse_input_window': coarse_input_window,
+            'coarse_output_window': coarse_output_window,
+            'fine_input_window': fine_input_window,
+            'fine_output_window': fine_output_window,
+            'rerank_window': rerank_window,
             'min_score': min_score,
             'sort_by': sort_by,
             'sort_order': sort_order
@@ -470,16 +511,12 @@ class Searcher:
             # Keep requested response _source semantics for the final response fill.
             response_source_spec = es_query.get("_source")
-            # In rerank window, first pass only fetches minimal fields required by rerank template.
+            # In multi-stage rank window, first pass only needs score signals for coarse rank.
             es_query_for_fetch = es_query
             rerank_prefetch_source = None
             if in_rerank_window:
-                rerank_prefetch_source = self._resolve_rerank_source_filter(
-                    effective_doc_template,
-                    parsed_query=parsed_query,
-                )
                 es_query_for_fetch = dict(es_query)
-                es_query_for_fetch["_source"] = rerank_prefetch_source
+                es_query_for_fetch["_source"] = False
             # Extract size and from from body for ES client parameters
             body_for_es = {k: v for k, v in es_query_for_fetch.items() if k not in ['size', 'from']}
@@ -587,26 +624,131 @@ class Searcher:
             context.end_stage(RequestContextStage.ELASTICSEARCH_SEARCH_PRIMARY)
         style_intent_decisions: Dict[str, SkuSelectionDecision] = {}
-        if self._has_style_intent(parsed_query) and in_rerank_window:
-            style_intent_decisions = self._apply_style_intent_to_hits(
-                es_response.get("hits", {}).get("hits") or [],
-                parsed_query,
-                context=context,
-            )
-            if style_intent_decisions:
+        if do_rerank and in_rerank_window:
+            from dataclasses import asdict
+            from config.services_config import get_rerank_service_url
+            from .rerank_client import coarse_resort_hits, run_lightweight_rerank, run_rerank
+
+            rerank_query = parsed_query.text_for_rerank() if parsed_query else query
+            hits = es_response.get("hits", {}).get("hits") or []
+
+            context.start_stage(RequestContextStage.COARSE_RANKING)
+            try:
+                coarse_debug = coarse_resort_hits(
+                    hits,
+                    fusion=coarse_cfg.fusion,
+                    debug=debug,
+                )
+                hits = hits[:coarse_output_window]
+                es_response.setdefault("hits", {})["hits"] = hits
+                if debug:
+                    coarse_debug_info = {
+                        "docs_in": es_fetch_size,
+                        "docs_out": len(hits),
+                        "fusion": asdict(coarse_cfg.fusion),
+                    }
+                    context.store_intermediate_result("coarse_rank_scores", coarse_debug)
                 context.logger.info(
-                    "款式意图 SKU 预筛选完成 | hits=%s",
-                    len(style_intent_decisions),
+                    "粗排完成 | docs_in=%s | docs_out=%s",
+                    es_fetch_size,
+                    len(hits),
                     extra={'reqid': context.reqid, 'uid': context.uid}
                 )
+            finally:
+                context.end_stage(RequestContextStage.COARSE_RANKING)
+
+            ranking_source_spec = self._merge_source_specs(
+                self._resolve_rerank_source_filter(
+                    fine_doc_template,
+                    parsed_query=parsed_query,
+                ),
+                self._resolve_rerank_source_filter(
+                    effective_doc_template,
+                    parsed_query=parsed_query,
+                ),
+            )
+            candidate_ids = [str(h.get("_id")) for h in hits if h.get("_id") is not None]
+            if candidate_ids:
+                details_by_id, fill_took = self._fetch_hits_by_ids(
+                    index_name=index_name,
+                    doc_ids=candidate_ids,
+                    source_spec=ranking_source_spec,
+                )
+                for hit in hits:
+                    hid = hit.get("_id")
+                    if hid is None:
+                        continue
+                    detail_hit = details_by_id.get(str(hid))
+                    if detail_hit is not None and "_source" in detail_hit:
+                        hit["_source"] = detail_hit.get("_source") or {}
+                if fill_took:
+                    es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
+
+            if self._has_style_intent(parsed_query):
+                style_intent_decisions = self._apply_style_intent_to_hits(
+                    es_response.get("hits", {}).get("hits") or [],
+                    parsed_query,
+                    context=context,
+                )
+                if style_intent_decisions:
+                    context.logger.info(
+                        "款式意图 SKU 预筛选完成 | hits=%s",
+                        len(style_intent_decisions),
+                        extra={'reqid': context.reqid, 'uid': context.uid}
+                    )
+
+            fine_scores: Optional[List[float]] = None
+            hits = es_response.get("hits", {}).get("hits") or []
+            if fine_cfg.enabled and hits:
+                context.start_stage(RequestContextStage.FINE_RANKING)
+                try:
+                    fine_scores, fine_meta, fine_debug_rows = run_lightweight_rerank(
+                        query=rerank_query,
+                        es_hits=hits[:fine_input_window],
+                        language=language,
+                        timeout_sec=fine_cfg.timeout_sec,
+                        rerank_query_template=fine_query_template,
+                        rerank_doc_template=fine_doc_template,
+                        top_n=fine_output_window,
+                        debug=debug,
+                        service_profile=fine_cfg.service_profile,
+                    )
+                    if fine_scores is not None:
+                        hits = hits[:fine_output_window]
+                        es_response["hits"]["hits"] = hits
+                        if debug:
+                            fine_debug_info = {
+                                "service_url": get_rerank_service_url(profile=fine_cfg.service_profile),
+                                "query_template": fine_query_template,
+                                "doc_template": fine_doc_template,
+                                "query_text": str(fine_query_template).format_map({"query": rerank_query}),
+                                "docs": len(hits),
+                                "top_n": fine_output_window,
+                                "meta": fine_meta,
+                            }
+                            context.store_intermediate_result("fine_rank_scores", fine_debug_rows)
+                        context.logger.info(
+                            "精排完成 | docs=%s | top_n=%s | meta=%s",
+                            len(hits),
+                            fine_output_window,
+                            fine_meta,
+                            extra={'reqid': context.reqid, 'uid': context.uid}
+                        )
+                except Exception as e:
+                    context.add_warning(f"Fine rerank failed: {e}")
+                    context.logger.warning(
+                        f"调用精排服务失败 | error: {e}",
+                        extra={'reqid': context.reqid, 'uid': context.uid},
+                        exc_info=True,
+                    )
+                finally:
+                    context.end_stage(RequestContextStage.FINE_RANKING)
-        # Optional Step 4.5: AI reranking（仅当请求范围在重排窗口内时执行）
-        if do_rerank and in_rerank_window:
             context.start_stage(RequestContextStage.RERANKING)
             try:
-                from .rerank_client import run_rerank
-
-                rerank_query = parsed_query.text_for_rerank() if parsed_query else query
+                final_hits = es_response.get("hits", {}).get("hits") or []
+                final_input = final_hits[:rerank_window]
+                es_response["hits"]["hits"] = final_input
                 es_response, rerank_meta, fused_debug = run_rerank(
                     query=rerank_query,
                     es_response=es_response,
@@ -619,15 +761,15 @@ class Searcher:
                     top_n=(from_ + size),
                     debug=debug,
                     fusion=rc.fusion,
+                    fine_scores=fine_scores[:len(final_input)] if fine_scores is not None else None,
+                    service_profile=rc.service_profile,
                     style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
                 )
                 if rerank_meta is not None:
                     if debug:
-                        from dataclasses import asdict
-                        from config.services_config import get_rerank_service_url
                         rerank_debug_info = {
-                            "service_url": get_rerank_service_url(),
+                            "service_url": get_rerank_service_url(profile=rc.service_profile),
                             "query_template": effective_query_template,
                             "doc_template": effective_doc_template,
                             "query_text": str(effective_query_template).format_map({"query": rerank_query}),
@@ -652,15 +794,17 @@ class Searcher:
             finally:
                 context.end_stage(RequestContextStage.RERANKING)
-        # 当本次请求在重排窗口内时：已从 ES 取了 rerank_window 条并可能已重排，需按请求的 from/size 做分页切片
+        # 当本次请求在重排窗口内时：已按多阶段排序产出前 rerank_window 条，需按请求的 from/size 做分页切片
         if in_rerank_window:
             hits = es_response.get("hits", {}).get("hits") or []
             sliced = hits[from_ : from_ + size]
             es_response.setdefault("hits", {})["hits"] = sliced
             if sliced:
-                # 对于启用重排的结果，优先使用 _fused_score 计算 max_score；否则退回原始 _score
                 slice_max = max(
-                    (h.get("_fused_score", h.get("_score", 0.0)) for h in sliced),
+                    (
+                        h.get("_fused_score", h.get("_fine_score", h.get("_coarse_score", h.get("_score", 0.0))))
+                        for h in sliced
+                    ),
                     default=0.0,
                 )
                 try:
@@ -670,7 +814,6 @@ class Searcher:
             else:
                 es_response["hits"]["max_score"] = 0.0
-            # Page fill: fetch detailed fields only for final page hits.
             if sliced:
                 if response_source_spec is False:
                     for hit in sliced:
@@ -754,6 +897,16 @@ class Searcher:
                     if doc_id is None:
                         continue
                     rerank_debug_by_doc[str(doc_id)] = item
+            fine_debug_raw = context.get_intermediate_result('fine_rank_scores', None)
+            fine_debug_by_doc: Dict[str, Dict[str, Any]] = {}
+            if isinstance(fine_debug_raw, list):
+                for item in fine_debug_raw:
+                    if not isinstance(item, dict):
+                        continue
+                    doc_id = item.get("doc_id")
+                    if doc_id is None:
+                        continue
+                    fine_debug_by_doc[str(doc_id)] = item
             if self._has_style_intent(parsed_query):
                 if style_intent_decisions:
@@ -784,6 +937,9 @@ class Searcher:
                     rerank_debug = None
                     if doc_id is not None:
                         rerank_debug = rerank_debug_by_doc.get(str(doc_id))
+                    fine_debug = None
+                    if doc_id is not None:
+                        fine_debug = fine_debug_by_doc.get(str(doc_id))
                     style_intent_debug = None
                     if doc_id is not None and style_intent_decisions:
                         decision = style_intent_decisions.get(str(doc_id))
@@ -823,6 +979,7 @@ class Searcher:
                         debug_entry["doc_id"] = rerank_debug.get("doc_id")
                         # 与 rerank_client 中字段保持一致，便于前端直接使用
                         debug_entry["rerank_score"] = rerank_debug.get("rerank_score")
+                        debug_entry["fine_score"] = rerank_debug.get("fine_score")
                         debug_entry["text_score"] = rerank_debug.get("text_score")
                         debug_entry["text_source_score"] = rerank_debug.get("text_source_score")
                         debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score")
@@ -833,11 +990,16 @@ class Searcher:
                         debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es")
                         debug_entry["knn_score"] = rerank_debug.get("knn_score")
                         debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
+                        debug_entry["fine_factor"] = rerank_debug.get("fine_factor")
                         debug_entry["text_factor"] = rerank_debug.get("text_factor")
                         debug_entry["knn_factor"] = rerank_debug.get("knn_factor")
                         debug_entry["fused_score"] = rerank_debug.get("fused_score")
                         debug_entry["rerank_input"] = rerank_debug.get("rerank_input")
                         debug_entry["matched_queries"] = rerank_debug.get("matched_queries")
+                    elif fine_debug:
+                        debug_entry["doc_id"] = fine_debug.get("doc_id")
+                        debug_entry["fine_score"] = fine_debug.get("fine_score")
+                        debug_entry["rerank_input"] = fine_debug.get("rerank_input")
                     if style_intent_debug:
                         debug_entry["style_intent_sku"] = style_intent_debug
@@ -908,6 +1070,8 @@ class Searcher:
                     "shards": es_response.get('_shards', {}),
                     "es_score_normalization_factor": es_score_normalization_factor,
                 },
+                "coarse_rank": coarse_debug_info,
+                "fine_rank": fine_debug_info,
                 "rerank": rerank_debug_info,
                 "feature_flags": context.metadata.get('feature_flags', {}),
                 "stage_timings": {
@@ -0,0 +1,43 @@
+白色oversized T-shirt
+falda negra oficina
+red fitted tee
+黒いミディ丈スカート
+黑色中长半身裙
+فستان أسود متوسط الطول
+чёрное летнее платье
+修身牛仔裤
+date night dress
+vacation outfit dress
+minimalist top
+streetwear t-shirt
+office casual blouse
+街头风T恤
+宽松T恤
+复古印花T恤
+Y2K上衣
+情侣T恤
+美式复古T恤
+重磅棉T恤
+修身打底衫
+辣妹风短袖
+纯欲上衣
+正肩白T恤
+波西米亚花朵衬衫
+泡泡袖短袖
+扎染字母T恤
+T-shirt Dress
+Crop Top
+Lace Undershirt
+Leopard Print Ripped T-shirt
+Breton Stripe T-shirt
+V-Neck Cotton T-shirt
+Sweet & Cool Bow T-shirt
+Vacation Style T-shirt
+Commuter Casual Top
+Minimalist Solid T-shirt
+Band T-shirt
+Athletic Gym T-shirt
+Plus Size Loose T-shirt
+Korean Style Slim T-shirt
+Basic Layering Top
+
@@ -311,11 +311,18 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
     called: Dict[str, Any] = {"count": 0, "docs": 0}
+    def _fake_run_lightweight_rerank(**kwargs):
+        hits = kwargs["es_hits"]
+        for idx, hit in enumerate(hits):
+            hit["_fine_score"] = float(len(hits) - idx)
+        return [hit["_fine_score"] for hit in hits], {"stage": "fine"}, []
+
     def _fake_run_rerank(**kwargs):
         called["count"] += 1
         called["docs"] = len(kwargs["es_response"]["hits"]["hits"])
         return kwargs["es_response"], None, []
+    monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
     monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
     result = searcher.search(
@@ -328,17 +335,20 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
     )
     assert called["count"] == 1
-    # 应当对配置的 rerank_window 条文档做重排预取
-    window = searcher.config.rerank.rerank_window
-    assert called["docs"] == window
+    assert called["docs"] == searcher.config.rerank.rerank_window
     assert es_client.calls[0]["from_"] == 0
-    assert es_client.calls[0]["size"] == window
+    assert es_client.calls[0]["size"] == searcher.config.coarse_rank.input_window
     assert es_client.calls[0]["include_named_queries_score"] is True
-    assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]}
-    assert len(es_client.calls) == 2
-    assert es_client.calls[1]["size"] == 10
+    assert es_client.calls[0]["body"]["_source"] is False
+    assert len(es_client.calls) == 3
+    assert es_client.calls[1]["size"] == max(
+        searcher.config.coarse_rank.output_window,
+        searcher.config.rerank.rerank_window,
+    )
     assert es_client.calls[1]["from_"] == 0
-    assert es_client.calls[1]["body"]["query"]["ids"]["values"] == [str(i) for i in range(20, 30)]
+    assert es_client.calls[2]["size"] == 10
+    assert es_client.calls[2]["from_"] == 0
+    assert es_client.calls[2]["body"]["query"]["ids"]["values"] == [str(i) for i in range(20, 30)]
     assert len(result.results) == 10
     assert result.results[0].spu_id == "20"
     assert result.results[0].brief == "brief-20"
@@ -353,6 +363,10 @@ def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch):
         "search.searcher.get_tenant_config_loader",
         lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
     )
+    monkeypatch.setattr(
+        "search.rerank_client.run_lightweight_rerank",
+        lambda **kwargs: ([1.0] * len(kwargs["es_hits"]), {"stage": "fine"}, []),
+    )
     monkeypatch.setattr("search.rerank_client.run_rerank", lambda **kwargs: (kwargs["es_response"], None, []))
     searcher.search(
@@ -365,7 +379,8 @@ def test_searcher_rerank_prefetch_source_follows_doc_template(monkeypatch):
         rerank_doc_template="{title} {vendor} {brief}",
     )
-    assert es_client.calls[0]["body"]["_source"] == {"includes": ["brief", "title", "vendor"]}
+    assert es_client.calls[0]["body"]["_source"] is False
+    assert es_client.calls[1]["body"]["_source"] == {"includes": ["brief", "title", "vendor"]}
 def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_active(monkeypatch):
@@ -378,6 +393,10 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a
         lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
     )
     monkeypatch.setattr(
+        "search.rerank_client.run_lightweight_rerank",
+        lambda **kwargs: ([1.0] * len(kwargs["es_hits"]), {"stage": "fine"}, []),
+    )
+    monkeypatch.setattr(
         "search.rerank_client.run_rerank",
         lambda **kwargs: (kwargs["es_response"], None, []),
     )
@@ -414,7 +433,8 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a
         enable_rerank=None,
     )
-    assert es_client.calls[0]["body"]["_source"] == {
+    assert es_client.calls[0]["body"]["_source"] is False
+    assert es_client.calls[1]["body"]["_source"] == {
         "includes": ["option1_name", "option2_name", "option3_name", "skus", "title"]
     }