tangwang · tangwang · tangwang · tangwang · tangwang
Showing 28 changed files Show diff stats
api/routes/indexer.py
config/config.yaml
docs/issue-2026-03-27-添加粗排精排-第2轮-done-0327.txt
docs/issue-2026-03-27-添加粗排精排-第3轮-done-0327.txt
docs/issue-2026-03-28-添加粗排精排-第4轮-done-0328.txt
docs/suggestion索引构建.md
docs/工作总结-微服务性能优化与架构.md
docs/常用查询 - ES.md
docs/搜索API对接指南-00-总览与快速开始.md
docs/搜索API对接指南-05-索引接口（Indexer）.md
docs/搜索API对接指南-07-微服务接口（Embedding-Reranker-Translation）.md
docs/相关性检索优化说明.md
frontend/static/js/app.js
indexer/ANCHORS_AND_SEMANTIC_ATTRIBUTES.md
indexer/document_transformer.py
indexer/product_enrich.py
mappings/README.md
mappings/generate_search_products_mapping.py
mappings/search_products.json
mappings/search_products.json.bak
@@ -80,7 +80,7 @@ class BuildDocsFromDbRequest(BaseModel):
 class EnrichContentItem(BaseModel):
     """单条待生成内容理解字段的商品。"""
     spu_id: str = Field(..., description="SPU ID")
-    title: str = Field(..., description="商品标题，用于 LLM 分析生成 qanchors / tags 等")
+    title: str = Field(..., description="商品标题，用于 LLM 分析生成 qanchors / enriched_tags 等")
     image_url: Optional[str] = Field(None, description="商品主图 URL（预留给多模态/内容理解扩展）")
     brief: Optional[str] = Field(None, description="商品简介/短描述")
     description: Optional[str] = Field(None, description="商品详情/长描述")
@@ -88,15 +88,11 @@ class EnrichContentItem(BaseModel):
 class EnrichContentRequest(BaseModel):
     """
-    内容理解字段生成请求：根据商品标题批量生成 qanchors、semantic_attributes、tags。
+    内容理解字段生成请求：根据商品标题批量生成 qanchors、enriched_attributes、tags。
     供外部 indexer 在自行组织 doc 时调用，与翻译、向量化等微服务并列。
     """
     tenant_id: str = Field(..., description="租户 ID，用于请求路由与结果归属，不参与缓存键")
     items: List[EnrichContentItem] = Field(..., description="待分析的 SPU 列表（spu_id + title，可附带 brief/description/image_url）")
-    languages: List[str] = Field(
-        default_factory=lambda: ["zh", "en"],
-        description="目标语言列表，需在支持范围内（zh/en/de/ru/fr），默认 zh, en",
-    )
 @router.post("/reindex")
@@ -444,103 +440,36 @@ async def build_docs_from_db(request: BuildDocsFromDbRequest):
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
-def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]], languages: List[str]) -> List[Dict[str, Any]]:
+def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]]) -> List[Dict[str, Any]]:
     """
-    同步执行内容理解：调用 product_enrich.analyze_products，按语言批量跑 LLM，
-    再聚合成每 SPU 的 qanchors、semantic_attributes、tags。供 run_in_executor 调用。
+    同步执行内容理解，返回与 ES mapping 对齐的字段结构。
+    语言策略由 product_enrich 内部统一决定，路由层不参与。
     """
-    from indexer.product_enrich import analyze_products, split_multi_value_field
+    from indexer.product_enrich import build_index_content_fields
-    llm_langs = list(dict.fromkeys(languages)) or ["en"]
-
-    products = [
+    results = build_index_content_fields(items=items, tenant_id=tenant_id)
+    return [
         {
-            "id": it["spu_id"],
-            "title": (it.get("title") or "").strip(),
-            "brief": (it.get("brief") or "").strip(),
-            "description": (it.get("description") or "").strip(),
-            "image_url": (it.get("image_url") or "").strip(),
+            "spu_id": item["id"],
+            "qanchors": item["qanchors"],
+            "enriched_attributes": item["enriched_attributes"],
+            "enriched_tags": item["enriched_tags"],
+            **({"error": item["error"]} if item.get("error") else {}),
         }
-        for it in items
-    ]
-    dim_keys = [
-        "tags",
-        "target_audience",
-        "usage_scene",
-        "season",
-        "key_attributes",
-        "material",
-        "features",
+        for item in results
     ]
-    # 按 spu_id 聚合：qanchors[lang], semantic_attributes[], tags[]
-    by_spu: Dict[str, Dict[str, Any]] = {}
-    for it in items:
-        sid = str(it["spu_id"])
-        by_spu[sid] = {"qanchors": {}, "semantic_attributes": [], "tags": []}
-
-    for lang in llm_langs:
-        try:
-            rows = analyze_products(
-                products=products,
-                target_lang=lang,
-                batch_size=20,
-                tenant_id=tenant_id,
-            )
-        except Exception as e:
-            logger.warning("enrich-content analyze_products failed for lang=%s: %s", lang, e)
-            for it in items:
-                sid = str(it["spu_id"])
-                if "error" not in by_spu[sid]:
-                    by_spu[sid]["error"] = str(e)
-            continue
-
-        for row in rows:
-            spu_id = str(row.get("id") or "")
-            if spu_id not in by_spu:
-                continue
-            rec = by_spu[spu_id]
-            if row.get("error"):
-                rec["error"] = row["error"]
-                continue
-            anchor_text = str(row.get("anchor_text") or "").strip()
-            if anchor_text:
-                rec["qanchors"][lang] = anchor_text
-            for name in dim_keys:
-                raw = row.get(name)
-                if not raw:
-                    continue
-                for value in split_multi_value_field(str(raw)):
-                    rec["semantic_attributes"].append({"lang": lang, "name": name, "value": value})
-                    if name == "tags":
-                        rec["tags"].append(value)
-
-    # 去重 tags（保持顺序）
-    out = []
-    for it in items:
-        sid = str(it["spu_id"])
-        rec = by_spu[sid]
-        tags = list(dict.fromkeys(rec["tags"]))
-        out.append({
-            "spu_id": sid,
-            "qanchors": rec["qanchors"],
-            "semantic_attributes": rec["semantic_attributes"],
-            "tags": tags,
-            **({"error": rec["error"]} if rec.get("error") else {}),
-        })
-    return out
-
 @router.post("/enrich-content")
 async def enrich_content(request: EnrichContentRequest):
     """
-    内容理解字段生成接口：根据商品标题批量生成 qanchors、semantic_attributes、tags。
+    内容理解字段生成接口：根据商品标题批量生成 qanchors、enriched_attributes、tags。
     使用场景：
     - 外部 indexer 采用「微服务组合」方式自己组织 doc 时，可调用本接口获取 LLM 生成的
       锚文本与语义属性，再与翻译、向量化结果合并写入 ES。
     - 与 /indexer/build-docs 解耦，避免 build-docs 因 LLM 耗时过长而阻塞；调用方可
-      先拿不含 qanchors/tags 的 doc，再异步或离线补齐本接口结果后更新 ES。
+      先拿不含 qanchors/enriched_tags 的 doc，再异步或离线补齐本接口结果后更新 ES。
     实现逻辑与 indexer.product_enrich.analyze_products 一致，支持多语言与 Redis 缓存。
     """
@@ -568,8 +497,7 @@ async def enrich_content(request: EnrichContentRequest):
             None,
             lambda: _run_enrich_content(
                 tenant_id=request.tenant_id,
-                items=items_payload,
-                languages=request.languages or ["zh", "en"],
+                items=items_payload
             ),
         )
         return {
@@ -502,7 +502,7 @@ services:
         engine: "vllm"
         max_model_len: 172
         tensor_parallel_size: 1
-        gpu_memory_utilization: 0.24
+        gpu_memory_utilization: 0.15
         dtype: "float16"
         enable_prefix_caching: true
         enforce_eager: false
@@ -20,7 +20,7 @@ config.yaml
 现在链路是：
 ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到 coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker，并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url，这样 fine/final 可以共用同一套服务代码，只起不同实例。
-你是用了./.venv/bin/pytest -q tests/test_rerank_client.py tests/test_search_rerank_window.py 进行验证，结果是 18 passed。也按要求尝试了 ./scripts/service_ctl.sh restart reranker backend 做联调，但当前环境里只有 6007 的现有 reranker，配置里的 fine 端口 6009 还没对应实例，而且服务重启后进程状态不太稳定，没能完成完整线上链路验证。当前最关键的落地前置是把轻量 bge 实例补起来，再用同样脚本重启联调。
+你用了./.venv/bin/pytest -q tests/test_rerank_client.py tests/test_search_rerank_window.py 进行验证，结果是 18 passed。也按要求尝试了 ./scripts/service_ctl.sh restart reranker backend 做联调，但当前环境里只有 6007 的现有 reranker，配置里的 fine 端口 6009 还没对应实例，而且服务重启后进程状态不太稳定，没能完成完整线上链路验证。当前最关键的落地前置是把轻量 bge 实例补起来，再用同样脚本重启联调。
 因此，需要回顾这个漏斗的代码，进行进一步的优化：
 1. Fine Rank环节的信息都是N/A。fine 实例也需要在config.yaml中体现，并且需要被server_ctl.sh管理，参考现在是怎么管理reranker的。fine和reranker分别调用哪个模型（两者代码功能，但是只是启动不同的实例），请你深度思考后决定。这里是需要参考当前代码、根据实际需要（原来只有一个reranker、现在多了一个精排，也要调用一个重排模型，因此带来的必要的部分重构。
@@ -0,0 +1,98 @@
+这是上一轮检索效果优化的需求：
+参考
+searcher.py 
+rerank_client.py 
+schema.py 
+es_query_builder.py 
+config.yaml 
+相关性检索优化说明.md 
+
+在ES返回到rerank期间增加一轮粗排+一轮精排。
+1. ES召回，600
+2. 粗排：600->240。配置文件增加粗排相关配置，包括输入条数（配置为700，ES拉取的条数改为粗排输入条数），然后增加粗排的融合公式配置，参考现有的reranker融合公式即可、只是去掉其中的重排模型项。
+3. 现在的sku选择、为reranker生成title后缀这一套逻辑，是放在粗排后，因为精排也是一个reranker模型（只不过是一个轻量级的，bge-reranker），需要用这个title后缀。
+4. 精排：240-80，使用bge-reranker，但是，因为reranker只能选一个backend，考虑如何重构。现在，精排也是一个独立的进程、独立提供端口，服务。但是，因为跟重排逻辑是一致的（即使有部分不一致也应该分离不一致的点进行配置化），所以共用代码，只是根据需要启动两个实例，避免代码冗余。
+5. 重排：80，也是用当前的重排代码，调用单独的实例（即现在使用的实例），返回后，经过融合公式，到分页，也参考现在的融合公式，但是，加入一项精排模型打分。
+测试时，使用跟我同样的环境./scripts/service_ctl.sh reranker backend 重启相关服务进行测试
+
+你已经完成了一般修改，已把三段排序链路接上了，主改动在 search/searcher.py、search/rerank_client.py、config/schema.py、config/loader.py、config/services_config.py 和 config/config.yaml。
+
+现在链路是：
+ES 拉取 coarse_rank.input_window 条 -> 粗排按 text/knn 融合裁到 coarse_rank.output_window -> 再做 SKU 选择和 title suffix -> 精排调用轻量 reranker 裁到 fine_rank.output_window -> 最终重排调用现有 reranker，并在最终融合里加入 fine_score。同时把 reranker client/provider 改成了按 service_profile 选不同 service_url，这样 fine/final 可以共用同一套服务代码，只起不同实例。
+
+并且，你对调试展示进行了重构。你已经把结果卡片和全局调试面板都改成按漏斗阶段取值和展示，在 app.js 里把 ES 召回、粗排、精排、最终 rerank 分开渲染了。
+现在每条结果的 debug 会按阶段展示：
+ES 召回：rank、ES score、norm score、matched queries。
+粗排：rank/rank_change、coarse_score、text/knn 输入、text_source/text_translation/text_primary/text_support、text_knn/image_knn、factor。
+精排：rank/rank_change、fine_score、fine input。
+最终 rerank：rank/rank_change、rerank_score、text/knn score、各 factor、fused_score，以及完整 signals。
+
+请你仔细阅读漏斗环节的这些代码，特别是关于打分、重排序、debug信息记录方面的。
+
+
+现在，请注意，需要优化的是：
+1. Fine Rank环节似乎没有进行融合公式的计算、继而进行重排序，请修复。
+2.从软件工程的视角review代码： 
+因为增加了多重排序漏斗，数据的记录、传递，交互的接口，是否设计足够合理，存在哪些问题。
+请从软件工程的角度审视这些逻辑，是否有需要梳理、清理和重写的地方。
+3. Fine Rank和Final Rerank环节信息记录优化：
+这两个环节都要体现融合公式的输入、关键因子、以及融合公式的得分。为了避免代码膨胀，Fine Rank和Final Rerank
+都可以采用一个字符串记录这些关键信息，字符串内包括融合公式各项的名称和具体数值，以及最终结果。你也可以继续沿用当前的记录方式，需要你对比一下哪种代码量更少、更清晰简洁。
+也要仔细思考当前的代码，真实的计算和信息的记录，是否存在分离的情况，是否存在冗余和分叉。这种情况是不允许的，存在隐藏的风险，以后改了正式逻辑而没有改调试信息，将导致不一致。
+务必注意，当前已经有相关的信息记录逻辑，注意不要叠补丁，可以适当修改、或者清理重写，而不是新增，要使得代码更简洁和干净，并保证信息记录与真实逻辑一致。
+
+
+涉及代码较多，请耐心阅读，以上都是一些需要深度思考的任务，慢慢来，留足够多的时间来review和重新设计。
+
+
+
+
+
+
+因为增加了两个环节，多了很多变量。
+以这些为效果评估的测试集，调试参数。这次的调整范围是，融合公式中的各个
+falda negra oficina
+red fitted tee
+黒いミディ丈スカート
+黑色中长半身裙
+чёрное летнее платье
+修身牛仔裤
+date night dress
+vacation outfit dress
+minimalist top
+
+仔细思考这些漏斗中重要的信息如何呈现。对应的修改前端代码。
+注意包括整体漏斗信息的呈现，以及每条结构中独自的信息。
+我需要这些信息，辅助各环节融合公式的调参，根据我的需求，深度思考该如何设计，要呈现哪些信息，如何呈现。
+可以对现有的逻辑做适当的重构，重新整理。
+
+
+
+
+fine 实例也需要在config.yaml中体现，并且需要被server_ctl.sh管理，参考现在是怎么管理reranker的。fine和reranker分别调用哪个模型（两者代码功能，但是只是启动不同的实例），请你深度思考后决定。这里是需要参考当前代码、根据实际需要（原来只有一个reranker、现在多了一个精排，也要调用一个重排模型，因此带来的必要的部分重构。
+
+1. Fine Rank环节的信息都是N/A，是没有配置吗。fine rank是使用bge-reranker，复用当前reranker模型的代码，但是需要单独起一个服务、单独加载一个模型。
+2. Ranking Funnel、Fusion Factors、Signal Breakdown
+这些是不是整合起来、按漏斗收集、整理信息、以及进行呈现比较好。
+ES 召回的环节，展示Matched Queries各项打分、ES的总分、norm后打分、排序位置，等等关键信息
+粗排：粗排融合公式的各项输入、重要中间结果和参数、最后得分，排序位置以及上升/下降了多少。等等关键信息。
+精排：同样例举关键的输入、中间过程、输出、排序和位置变化等。
+reranker：类似
+
+因为涉及的环节较多，非常要注意的一个点是：不要每次修改都在原来的基础上，为实现目标而打补丁，应该观察一下所涉及的代码现在是怎么做的，务必注意如何适当的清理掉现有逻辑，该如何对其进行修改，来达到目的，以达到代码的精简，避免冗余、分叉。
+
+
+
+
+
+1. Fine Rank 这个环节没有体现融合公式的输入、关键因子、以及融合公式的得分。为了避免代码膨胀，Fine Rank和Final Rerank
+都可以采用一个字符串记录这些关键信息，字符串内包括融合公式各项的名称和具体数值，以及最终结果。
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,314 @@
+这是上一轮检索质量优化的需求说明：
+
+参考文件：
+`searcher.py`
+`rerank_client.py`
+`schema.py`
+`es_query_builder.py`
+`config.yaml`
+`相关性检索优化说明.md`
+
+在ES返回结果与重排之间增加一个粗排阶段和一个精排阶段。
+
+1. ES召回：600条
+2. 粗排阶段：600 -> 240条。
+   在配置文件中增加粗排相关配置，包括输入大小（设置为700，即从ES获取的数量应改为粗排的输入大小）。
+   然后增加粗排的融合公式配置。可参考现有的重排融合公式，但需要去掉重排模型那一项。
+3. 目前重排的SKU选择和标题后缀生成逻辑，应放在粗排之后，因为精排也是一个重排模型（轻量级模型，`bge-reranker`），它也需要这个标题后缀。
+4. 精排阶段：240 -> 80条。
+   使用`bge-reranker`，但由于目前重排只能选择一个后端，请考虑如何重构。
+   现在精排也应该是一个独立的进程和服务，拥有自己的端口。
+   但由于其逻辑与重排逻辑一致（即便存在差异，这些差异也应抽离并配置化），代码应该共享。按需启动两个实例即可，避免代码重复。
+5. 最终重排：80条。
+   仍使用当前重排代码，调用独立的实例（即当前在用的那个）。
+   返回后，应用融合公式，再进行分页。
+   这里也应参考当前的融合公式，但需增加一项：精排模型得分。
+
+测试时，请使用与我相同的环境，并使用以下命令重启相关服务：
+`./scripts/service_ctl.sh reranker backend`
+
+你已经完成了整体修改，并将三级排序流水线串联起来了。
+主要改动在：
+`search/searcher.py`
+`search/rerank_client.py`
+`config/schema.py`
+`config/loader.py`
+`config/services_config.py`
+以及 `config/config.yaml`。
+
+现在的流程是：
+
+ES获取 `coarse_rank.input_window` 条 ->
+粗排通过文本/KNN融合裁剪至 `coarse_rank.output_window` ->
+然后进行SKU选择和标题后缀处理 ->
+精排调用轻量重排，裁剪至 `fine_rank.output_window` ->
+最终重排调用现有重排，最终融合时也加入了 `fine_score`。
+
+同时，重排客户端/提供者已改为通过 `service_profile` 选择不同的 `service_url`，因此精排和最终重排可以共享同一套服务代码，仅以不同实例运行。
+
+你还重构了调试展示。
+你修改了结果卡片和全局调试面板，使其按漏斗阶段读取并渲染数值，在 `app.js` 中，你现在分别渲染ES召回、粗排、精排和最终重排。
+
+现在，每个结果的调试信息按阶段展示：
+
+* ES召回：`rank`、ES得分、归一化得分、匹配查询
+* 粗排：`rank` / `rank_change`、`coarse_score`、文本/KNN输入、`text_source` / `text_translation` / `text_primary` / `text_support`、`text_knn` / `image_knn`、`factor`
+* 精排：`rank` / `rank_change`、`fine_score`、`fine input`
+* 最终重排：`rank` / `rank_change`、`rerank_score`、文本/KNN得分、各因子、`fused_score` 以及完整信号
+
+请仔细阅读这些漏斗阶段的代码，特别是涉及打分、重排和调试信息记录的部分。
+
+现在，请注意需要优化的部分：
+
+1. 精排阶段似乎没有计算融合公式并据此重排。请修复此问题。
+2. 从软件工程的角度审视代码：
+   既然引入了多级排序漏斗，数据记录、传递和交互接口的设计是否足够合理？存在哪些问题？
+   请从软件工程角度审视这一逻辑，判断是否有需要重新组织、清理或重写的部分。
+3. 优化精排和最终重排阶段的信息记录：
+   这两个阶段都应体现融合公式的输入、关键因子以及融合公式计算出的得分。
+   为避免代码臃肿，精排和最终重排都可以使用一个字符串来记录这些关键信息。该字符串可以包含融合公式中各项的名称和值，以及最终结果。
+   你也可以继续使用当前的记录方式；请对比哪种方式代码更少、更清晰简洁。
+   同时请仔细思考当前代码：实际的计算过程和记录的信息是否分离？是否存在冗余或分歧？
+   这是不可取的，因为会引入潜在风险：如果后续修改了生产逻辑但未更新调试信息，就会导致不一致。
+
+请特别注意：现在已经存在相关的信息记录逻辑。不要只是层层打补丁。
+你可以适当修改，或者清理重写，而不仅仅是增加代码。
+目标是让代码更简单、更干净，同时确保记录的信息始终与实际逻辑保持一致。
+
+涉及代码较多，请耐心阅读。
+以上所有任务都需要深入思考。请慢慢来，为全面的审查和重新设计留出足够空间。
+
+
+
+
+
+
+**整体图**
+这个 pipeline 现在可以理解成一条“先广召回，再逐层收窄、逐层加贵信号”的漏斗：
+
+1. Query 解析
+2. ES 召回
+3. 粗排：只用 ES 内部文本/KNN 信号
+4. 款式 SKU 选择 + title suffix
+5. 精排：轻量 reranker + 文本/KNN 融合
+6. 最终 rerank：重 reranker + fine score + 文本/KNN 融合
+7. 分页、补全字段、格式化返回
+
+主控代码在 [searcher.py](/data/saas-search/search/searcher.py)，打分与 rerank 细节在 [rerank_client.py](/data/saas-search/search/rerank_client.py)，配置定义在 [schema.py](/data/saas-search/config/schema.py) 和 [config.yaml](/data/saas-search/config/config.yaml)。
+
+**先看入口怎么决定走哪条路**
+在 [searcher.py:348](/data/saas-search/search/searcher.py#L348) 开始，`search()` 先读租户语言、开关、窗口大小。
+关键判断在 [searcher.py:364](/data/saas-search/search/searcher.py#L364) 到 [searcher.py:372](/data/saas-search/search/searcher.py#L372)：
+
+- `rerank_window` 现在是 80，见 [config.yaml:256](/data/saas-search/config/config.yaml#L256)
+- `coarse_rank.input_window` 是 700，`output_window` 是 240，见 [config.yaml:231](/data/saas-search/config/config.yaml#L231)
+- `fine_rank.input_window` 是 240，`output_window` 是 80，见 [config.yaml:245](/data/saas-search/config/config.yaml#L245)
+
+所以如果请求满足 `from_ + size <= rerank_window`，就进入完整漏斗：
+- ES 实际取前 `700`
+- 粗排后留 `240`
+- 精排后留 `80`
+- 最终 rerank 也只处理这 `80`
+- 最后再做分页切片
+
+如果请求页超出 80，就不走后面的多阶段漏斗，直接按 ES 原逻辑返回。
+
+这点非常重要，因为它决定了“贵模型只服务头部结果”。
+
+**Step 1：Query 解析阶段**
+在 [searcher.py:432](/data/saas-search/search/searcher.py#L432) 到 [searcher.py:469](/data/saas-search/search/searcher.py#L469)：
+`query_parser.parse()` 做几件事：
+
+- 规范化 query
+- 检测语言
+- 可能做 rewrite
+- 生成文本向量
+- 如果有图搜，还会带图片向量
+- 生成翻译结果
+- 识别 style intent
+
+这一步的结果存在 `parsed_query` 里，后面 ES 查询、style SKU 选择、fine/final rerank 全都依赖它。
+
+**Step 2：ES Query 构建**
+ES DSL 在 [searcher.py:471](/data/saas-search/search/searcher.py#L471) 开始，通过 [es_query_builder.py:181](/data/saas-search/search/es_query_builder.py#L181) 的 `build_query()` 生成。
+
+这里的核心结构是：
+- 文本召回 clause
+- 文本向量 KNN clause
+- 图片向量 KNN clause
+- 它们一起放进 `bool.should`
+- 过滤条件放进 `filter`
+- facet 的多选条件走 `post_filter`
+
+KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builder.py#L250) 之后：
+- 文本向量 clause 名字固定叫 `knn_query`
+- 图片向量 clause 名字固定叫 `image_knn_query`
+
+而文本召回那边，后续 fusion 代码约定会去读：
+- 原始 query 的 named query：`base_query`
+- 翻译 query 的 named query：`base_query_trans_*`
+
+也就是说，后面的粗排/精排/最终 rerank，并不是重新理解 ES score，而是从 `matched_queries` 里把这些命名子信号拆出来自己重算。
+
+**Step 3：ES 召回**
+在 [searcher.py:579](/data/saas-search/search/searcher.py#L579) 到 [searcher.py:627](/data/saas-search/search/searcher.py#L627)。
+
+这里有个很关键的工程优化：
+如果在 rerank window 内，第一次 ES 拉取时会把 `_source` 关掉，只取排序必需信号，见 [searcher.py:517](/data/saas-search/search/searcher.py#L517) 到 [searcher.py:523](/data/saas-search/search/searcher.py#L523)。
+
+原因是：
+- 粗排先只需要 `_score` 和 `matched_queries`
+- 不需要一上来把 700 条完整商品详情都拉回来
+- 等粗排收窄后，再补 fine/final rerank 需要的字段
+
+这是现在这条 pipeline 很核心的性能设计点。
+
+**Step 4：粗排**
+粗排入口在 [searcher.py:638](/data/saas-search/search/searcher.py#L638)，真正的打分在 [rerank_client.py:348](/data/saas-search/search/rerank_client.py#L348) 的 `coarse_resort_hits()`。
+
+粗排只看两类信号：
+- `text_score`
+- `knn_score`
+
+它们先都从统一 helper `_build_hit_signal_bundle()` 里拿，见 [rerank_client.py:246](/data/saas-search/search/rerank_client.py#L246)。
+
+文本分怎么来，见 [rerank_client.py:200](/data/saas-search/search/rerank_client.py#L200)：
+- `source_score = matched_queries["base_query"]`
+- `translation_score = max(base_query_trans_*)`
+- `weighted_translation = 0.8 * translation_score`
+- `primary_text = max(source, weighted_translation)`
+- `support_text = 另一路`
+- `text_score = primary_text + 0.25 * support_text`
+
+这就是一个 text dismax 思路：
+原 query 是主路，翻译 query 是辅助路，但不是简单相加。
+
+向量分怎么来，见 [rerank_client.py:156](/data/saas-search/search/rerank_client.py#L156)：
+- `text_knn_score`
+- `image_knn_score`
+- 分别乘自己的 weight
+- 取强的一路做主路
+- 弱的一路按 `knn_tie_breaker` 做辅助
+
+然后粗排融合公式在 [rerank_client.py:334](/data/saas-search/search/rerank_client.py#L334)：
+- `coarse_score = (text_score + text_bias)^text_exponent * (knn_score + knn_bias)^knn_exponent`
+
+配置定义在 [schema.py:124](/data/saas-search/config/schema.py#L124) 和 [config.yaml:231](/data/saas-search/config/config.yaml#L231)。
+
+算完后：
+- 写入 `hit["_coarse_score"]`
+- 按 `_coarse_score` 排序
+- 留前 240，见 [searcher.py:645](/data/saas-search/search/searcher.py#L645)
+
+**Step 5：粗排后补字段 + SKU 选择**
+粗排完以后，`searcher` 会按 doc template 反推 fine/final rerank 需要哪些 `_source` 字段，然后只补这些字段，见 [searcher.py:669](/data/saas-search/search/searcher.py#L669)。
+
+之后才做 style SKU 选择，见 [searcher.py:696](/data/saas-search/search/searcher.py#L696)。
+
+为什么放这里？
+因为现在 fine rank 也是 reranker，它也要吃 title suffix。
+而 suffix 是 SKU 选择之后写到 hit 上的 `_style_rerank_suffix`。
+真正把 suffix 拼进 doc 文本的地方在 [rerank_client.py:65](/data/saas-search/search/rerank_client.py#L65) 到 [rerank_client.py:74](/data/saas-search/search/rerank_client.py#L74)。
+
+所以顺序必须是：
+- 先粗排
+- 再选 SKU
+- 再用带 suffix 的 title 去跑 fine/final rerank
+
+**Step 6：精排**
+入口在 [searcher.py:711](/data/saas-search/search/searcher.py#L711)，实现是 [rerank_client.py:603](/data/saas-search/search/rerank_client.py#L603) 的 `run_lightweight_rerank()`。
+
+它会做三件事：
+
+1. 用 `build_docs_from_hits()` 把每条商品变成 reranker 输入文本
+2. 用 `service_profile="fine"` 调轻量服务
+3. 不再只按 `fine_score` 排，而是按融合后的 `_fine_fused_score` 排
+
+精排融合公式现在是：
+- `fine_stage_score = fine_factor * text_factor * knn_factor * style_boost`
+
+具体公共计算在 [rerank_client.py:286](/data/saas-search/search/rerank_client.py#L286) 的 `_compute_multiplicative_fusion()`：
+- `fine_factor = (fine_score + fine_bias)^fine_exponent`
+- `text_factor = (text_score + text_bias)^text_exponent`
+- `knn_factor = (knn_score + knn_bias)^knn_exponent`
+- 如果命中了 selected SKU，再乘 style boost
+
+写回 hit 的字段见 [rerank_client.py:655](/data/saas-search/search/rerank_client.py#L655)：
+- `_fine_score`
+- `_fine_fused_score`
+- `_text_score`
+- `_knn_score`
+
+排序逻辑在 [rerank_client.py:683](/data/saas-search/search/rerank_client.py#L683)：
+按 `_fine_fused_score` 降序排，然后留前 80，见 [searcher.py:727](/data/saas-search/search/searcher.py#L727)。
+
+这就是你这次特别关心的点：现在 fine rank 已经不是“模型裸分排序”，而是“模型分 + ES 文本/KNN 信号融合后排序”。
+
+**Step 7：最终 rerank**
+入口在 [searcher.py:767](/data/saas-search/search/searcher.py#L767)，实现是 [rerank_client.py:538](/data/saas-search/search/rerank_client.py#L538) 的 `run_rerank()`。
+
+它和 fine rank 很像，但多了一个更重的模型分 `rerank_score`。
+最终公式是：
+
+- `final_score = rerank_factor * fine_factor * text_factor * knn_factor * style_boost`
+
+也就是：
+- fine rank 产生的 `fine_score` 不会丢
+- 到最终 rerank 时，它会继续作为一个乘法项参与最终融合
+
+这个逻辑在 [rerank_client.py:468](/data/saas-search/search/rerank_client.py#L468) 到 [rerank_client.py:476](/data/saas-search/search/rerank_client.py#L476)。
+
+算完后写入：
+- `_rerank_score`
+- `_fused_score`
+
+然后按 `_fused_score` 排序，见 [rerank_client.py:531](/data/saas-search/search/rerank_client.py#L531)。
+
+这里你可以把它理解成：
+- fine rank 负责“轻量快速筛一遍，把 240 缩成 80”
+- 最终 rerank 负责“用更贵模型做最终拍板”
+- 但最终拍板时，不会忽略 fine rank 结果，而是把 fine score 当成一个先验信号保留进去
+
+**Step 8：分页与字段补全**
+多阶段排序只在头部窗口内完成。
+真正返回给用户前，在 [searcher.py:828](/data/saas-search/search/searcher.py#L828) 之后还会做两件事：
+
+- 先按 `from_:from_+size` 对最终 80 条切片
+- 再按用户原始 `_source` 需求补回页面真正要显示的字段，见 [searcher.py:859](/data/saas-search/search/searcher.py#L859)
+
+所以这条链路是“三次不同目的的数据访问”：
+
+- 第一次 ES：只要排序信号
+- 第二次按 id 回填：只要 fine/final rerank 需要字段
+- 第三次按页面 ids 回填：只要最终页面显示字段
+
+这也是为什么它性能上比“一次全量拉 700 条完整文档”更合理。
+
+**Step 9：结果格式化与 debug funnel**
+最后在 [searcher.py:906](/data/saas-search/search/searcher.py#L906) 进入结果处理。
+这里会把每个商品的阶段信息组装成 `ranking_funnel`，见 [searcher.py:1068](/data/saas-search/search/searcher.py#L1068)：
+
+- `es_recall`
+- `coarse_rank`
+- `fine_rank`
+- `rerank`
+- `final_page`
+
+其中：
+- coarse stage 主要保留 text/translation/knn 的拆分信号
+- fine/rerank stage 现在都保留 `fusion_inputs`、`fusion_factors`、`fusion_summary`
+- `fusion_summary` 来自真实计算过程本身，见 [rerank_client.py:265](/data/saas-search/search/rerank_client.py#L265)
+
+这点很重要，因为现在“实际排序逻辑”和“debug 展示逻辑”是同源的，不是两套各写一份。
+
+**一句话总结这条 pipeline**
+这条 pipeline 的本质是：
+
+- ES 负责便宜的大范围召回
+- 粗排负责只靠 ES 内置信号先做一次结构化筛选
+- style SKU 选择负责把商品文本改造成更适合 reranker 理解的输入
+- fine rank 负责用轻模型把候选进一步压缩
+- final rerank 负责用重模型做最终判定
+- 每一层都尽量复用前一层信号，而不是推翻重来
+
+如果你愿意，我下一步可以继续按“一个具体 query 的真实流转样例”来讲，比如假设用户搜 `black dress`，我把它从 `parsed_query`、ES named queries、coarse/fine/final 的每个分数怎么出来，完整手推一遍。
 \ No newline at end of file
@@ -169,7 +169,7 @@
 ##### 4.1 从商品索引收集 title / qanchors / tags（Step 1）
-  - 遍历店铺的所有商品：获取每个商品的 `"spu_id"`, `"title"`, `"qanchors"`, `"tags"`（按 `spu_id`、`id.keyword` 升序，便于 `search_after` 稳定分页）
+  - 遍历店铺的所有商品：获取每个商品的 `"spu_id"`, `"title"`, `"qanchors"`, `"enriched_tags"`（按 `spu_id`、`id.keyword` 升序，便于 `search_after` 稳定分页）
 - 对每个商品文档：
@@ -207,7 +207,7 @@
      - **qanchors 处理**：
        - `qanchors` 字段同样为多语言对象：
          ```json
-         "qanchors": { "en": "...", "zh": "..." }
+         "qanchors": { "en": ["slim fit", "sporty casual"], "zh": ["修身", "显瘦"] }
          ```
        - 取 `q_raw = qanchors[lang]`
        - 通过 `_split_qanchors(q_raw)` 拆分为若干字符串：
@@ -217,10 +217,14 @@
          - `text_norm = _normalize_text(q_text)`，再用 `_looks_noise` 过滤
          - 同样按 `(lang, text_norm)` 合并为 `SuggestionCandidate`，调用 `add_product("qanchor", spu_id=product_id)`。
-  4. **tags 处理**（与 `index_languages` 循环并列，每个商品只做一次）：
-     - `tags` 可为字符串数组，或逗号等分隔的单个字符串；经 `_iter_product_tags` 展开为若干条。
-     - 每条 tag **无语言字段**：使用 `query.query_parser.detect_text_language_for_suggestions`（与 `QueryParser` 相同的 `LanguageDetector`）判定语言，并约束在租户的 `index_languages` 内。
-     - 通过 `_looks_noise` 后按 `(detected_lang, text_norm)` 合并，调用 `add_product("tag", spu_id=product_id)`。
+  4. **enriched_tags 处理**（与 `index_languages` 循环并列）：
+     - `enriched_tags` 现为多语言对象，例如：
+       ```json
+       "enriched_tags": { "en": ["Classic", "ribbed neckline"], "zh": ["辣妹风"] }
+       ```
+     - 优先读取 `enriched_tags[lang]`，每个值可为字符串数组，或逗号等分隔的单个字符串；经 `_iter_product_tags` 展开为若干条。
+     - 对历史旧数据，若 `enriched_tags` 仍是单层字符串 / 数组，则继续走语言检测兜底，并约束在租户的 `index_languages` 内。
+     - 通过 `_looks_noise` 后按 `(lang, text_norm)` 合并，调用 `add_product("tag", spu_id=product_id)`。
 ##### 4.2 从查询日志收集用户 query（Step 2）
@@ -95,10 +95,10 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot;
 ### 5. 内容理解字段（支撑 Suggest）
-**能力**：支持根据商品标题批量生成 **qanchors**（锚文本）、**semantic_attributes**、**tags**，供索引与 suggest 使用。
+**能力**：支持根据商品标题批量生成 **qanchors**（锚文本）、**enriched_attributes**、**tags**，供索引与 suggest 使用。
 **具体内容**：
-- **接口**：`POST /indexer/enrich-content`（Indexer 服务端口 **6004**）。请求体为 `items` 数组，每项含 `spu_id`、`title`（必填）及可选多语言标题等；单次请求最多 **50 条**，建议批量调用。响应 `results` 与 `items` 一一对应，每项含 `spu_id`、`qanchors`（按语言键，如 `qanchors.zh`、`qanchors.en`，逗号分隔短语）、`semantic_attributes`、`tags`。
+- **接口**：`POST /indexer/enrich-content`（Indexer 服务端口 **6004**）。请求体为 `items` 数组，每项含 `spu_id`、`title`（必填）及可选多语言标题等；单次请求最多 **50 条**，建议批量调用。响应 `results` 与 `items` 一一对应，每项含 `spu_id`、`qanchors`（按语言键，如 `qanchors.zh`、`qanchors.en`，逗号分隔短语）、`enriched_attributes`、`tags`。
 -- **索引侧**：微服务组合方式下，调用方先拿不含 qanchors/tags 的 doc，再调用本接口补齐后写入 ES 的 `qanchors.{lang}` 等字段；索引 transformer（`indexer/document_transformer.py`、`indexer/product_enrich.py`）内也可在构建 doc 时调用内容理解逻辑，写入 `qanchors.{lang}`。
 - **Suggest 侧**：`suggestion/builder.py` 从 ES 商品索引读取 `_source: ["id", "spu_id", "title", "qanchors"]`，对 `qanchors.{lang}` 用 `_split_qanchors` 拆成词条，以 `source="qanchor"` 加入候选，排序时 `qanchor` 权重大于纯 title（`add_product("qanchor", ...)`）；suggest 配置中 `sources: ["query_log", "qanchor"]` 表示候选来源包含 qanchor。
 - **实现与依赖**：内容理解内部使用大模型（需 `DASHSCOPE_API_KEY`），支持多语言与 Redis 缓存（如 `product_anchors`）；逻辑与 `indexer/product_enrich` 一致。
-# 查看所有租户索引
- curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/_cat/indices/search_products_tenant_*?v'
+## Elasticsearch 排查流程
-# ======================================
-# 租户相关
-# ======================================
-#
-# 说明：索引已按租户拆分为 search_products_tenant_{tenant_id}，
-# 一般情况下不需要在查询中再按 tenant_id 过滤（可选保留用于排查）。
+### 1. 集群健康状态
+
+```bash
+# 集群整体健康（green / yellow / red）
+curl -s -u 'saas:4hOaLaf41y2VuI8y' 'http://127.0.0.1:9200/_cluster/health?pretty'
+```
+
+### 2. 索引概览
+
+```bash
+# 查看所有租户索引状态与体积
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/_cat/indices/search_products_tenant_*?v'
+
+# 或查看全部索引
+curl -s -u 'saas:4hOaLaf41y2VuI8y' 'http://127.0.0.1:9200/_cat/indices?v'
+```
+
+### 3. 分片分布
+
+```bash
+# 查看分片在各节点的分布情况
+curl -s -u 'saas:4hOaLaf41y2VuI8y' 'http://127.0.0.1:9200/_cat/shards?v'
+```
+
+### 4. 分配诊断（如有异常）
+
+```bash
+# 当 health 非 green 或 shards 状态异常时，定位具体原因
+curl -s -u 'saas:4hOaLaf41y2VuI8y' -X POST 'http://127.0.0.1:9200/_cluster/allocation/explain?pretty' \
+  -H 'Content-Type: application/json' \
+  -d '{"index":"search_products_tenant_163","shard":0,"primary":true}'
+```
+
+> 典型结论示例：`disk_threshold` — 磁盘超过高水位，新分片禁止分配。
+
+### 5. 系统层检查
+
+```bash
+# 服务状态
+sudo systemctl status elasticsearch
+
+# 磁盘空间
+df -h
+
+# ES 数据目录占用
+du -sh /var/lib/elasticsearch/
+```
+
+### 6. 配置与日志
+
+```bash
+# 配置文件
+cat /etc/elasticsearch/elasticsearch.yml
+
+# 实时日志
+journalctl -u elasticsearch -f
+```
+
+---
+
+### 快速排查路径
+
+```
+_cluster/health          → 确认集群状态（green/yellow/red）
+    ↓
+_cat/indices             → 检查索引体积与状态
+    ↓
+_cat/shards              → 查看分片分布
+    ↓
+_cluster/allocation/explain  → 定位分配问题（如需要）
+    ↓
+systemctl / df / 日志     → 系统层验证
+```
+
+---
+以下是将您提供的 Elasticsearch 查询整理为 Markdown 格式的文档：
+
+---
+
+# Elasticsearch 查询集合
+
+## 租户相关
+
+> **说明**：索引已按租户拆分为 `search_products_tenant_{tenant_id}`，一般情况下不需要在查询中再按 `tenant_id` 过滤（可选保留用于排查）。
+
+---
 ### 1. 根据 tenant_id / spu_id 查询
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
+
+#### 查询指定 spu_id 的商品（返回 title）
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 11,
     "_source": ["title"],
     "query": {
-      "bool": {
-        "filter": [
-          { "term": {"spu_id" : 206150} }
-        ]
-      }
+        "bool": {
+            "filter": [
+                { "term": {"spu_id" : 206150} }
+            ]
+        }
     }
-  }'
-
-
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
-  "size": 100,
-  "_source": ["title"],
-  "query": {
-    "match_all": {}
-  }
 }'
+```
-
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
-  "size": 5,
-  "_source": ["title", "keywords", "tags"],
-  "query": {
-    "bool": {
-      "filter": [
-        { "term": { "spu_id": "223167" } }
-      ]
+#### 查询所有商品（返回 title）
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+    "size": 100,
+    "_source": ["title"],
+    "query": {
+        "match_all": {}
     }
-  }
 }'
+```
-
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
-  "size": 1,
-  "_source": ["title", "keywords", "tags"],
-  "query": {
-    "bool": {
-      "must": [
-        {
-          "match": {
-            "title.en": {
-              "query": "Floerns Women Gothic Graphic Ribbed Strapless Tube Top Asymmetrical Ruched Bandeau Tops"
-            }
-          }
+#### 查询指定 spu_id 的商品（返回 title、keywords、tags）
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+    "size": 5,
+    "_source": ["title", "keywords", "tags"],
+    "query": {
+        "bool": {
+            "filter": [
+                { "term": { "spu_id": "223167" } }
+            ]
         }
-      ],
-      "filter": [
-        { "terms": { "tags": ["女装", "派对"] } }
-      ]
     }
-  }
 }'
+```
+#### 组合查询：匹配标题 + 过滤标签
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+    "size": 1,
+    "_source": ["title", "keywords", "tags"],
+    "query": {
+        "bool": {
+            "must": [
+                {
+                    "match": {
+                        "title.en": {
+                            "query": "Floerns Women Gothic Graphic Ribbed Strapless Tube Top Asymmetrical Ruched Bandeau Tops"
+                        }
+                    }
+                }
+            ],
+            "filter": [
+                { "terms": { "tags": ["女装", "派对"] } }
+            ]
+        }
+    }
+}'
+```
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
+#### 组合查询：匹配标题 + 过滤租户（冗余示例）
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["title"],
     "query": {
-      "bool": {
-        "must": [
-          {
-            "match": {
-              "title.en": {
-                "query": "Floerns Women Gothic Graphic Ribbed Strapless Tube Top Asymmetrical Ruched Bandeau Tops"
-              }
-            }
-          }
-        ],
-        "filter": [
-          { "term": { "tenant_id": "170" } }
-        ]
-      }
+        "bool": {
+            "must": [
+                {
+                    "match": {
+                        "title.en": {
+                            "query": "Floerns Women Gothic Graphic Ribbed Strapless Tube Top Asymmetrical Ruched Bandeau Tops"
+                        }
+                    }
+                }
+            ],
+            "filter": [
+                { "term": { "tenant_id": "170" } }
+            ]
+        }
     }
 }'
+```
-Curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_analyze'   -H 'Content-Type: application/json'   -d '{
-  "analyzer": "index_ik",
-  "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
-}'
+---
+
+### 2. 分析器测试
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_analyze'   -H 'Content-Type: application/json'   -d '{
-  "analyzer": "query_ik",
-  "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
+#### 测试 index_ik 分析器
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
+    "analyzer": "index_ik",
+    "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
 }'
+```
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
-  "size": 100,
-  "from": 0,
-  "query": {
-    "bool": {
-      "must": [
-        {
-          "multi_match": {
-            "_name": "base_query",
-            "fields": [
-              "title.zh^3.0",
-              "brief.zh^1.5",
-              "description.zh",
-              "vendor.zh^1.5",
-              "tags",
-              "category_path.zh^1.5",
-              "category_name_text.zh^1.5",
-              "option1_values^0.5"
-            ],
-            "minimum_should_match": "75%",
-            "operator": "AND",
-            "query": "裙",
-            "tie_breaker": 0.9
-          }
-        }
-      ],
-      "filter": [
-        {
-          "match_all": {}
-        }
-      ]
-    }
-  }
+#### 测试 query_ik 分析器
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
+    "analyzer": "query_ik",
+    "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
 }'
+```
+
+---
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty'   -H 'Content-Type: application/json'   -d '{
-  "size": 1,
-  "from": 0,
-  "query": {
-    "bool": {
-      "must": [
-        {
-          "multi_match": {
-            "_name": "base_query",
-            "fields": [
-              "title.zh^3.0",
-              "brief.zh^1.5",
-              "description.zh",
-              "vendor.zh^1.5",
-              "tags",
-              "category_path.zh^1.5",
-              "category_name_text.zh^1.5",
-              "option1_values^0.5"
+### 3. 多字段搜索 + 聚合（综合分面示例）
+
+#### 多字段匹配 + 聚合（category1、color、size、material）
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+    "size": 1,
+    "from": 0,
+    "query": {
+        "bool": {
+            "must": [
+                {
+                    "multi_match": {
+                        "_name": "base_query",
+                        "fields": [
+                            "title.zh^3.0",
+                            "brief.zh^1.5",
+                            "description.zh",
+                            "vendor.zh^1.5",
+                            "tags",
+                            "category_path.zh^1.5",
+                            "category_name_text.zh^1.5",
+                            "option1_values^0.5"
+                        ],
+                        "minimum_should_match": "75%",
+                        "operator": "AND",
+                        "query": "裙",
+                        "tie_breaker": 0.9
+                    }
+                }
             ],
-            "minimum_should_match": "75%",
-            "operator": "AND",
-            "query": "裙",
-            "tie_breaker": 0.9
-          }
-        }
-      ],
-      "filter": [
-        { "match_all": {} }
-      ]
-    }
-  },
-  "aggs": {
-    "category1_name_facet": {
-      "terms": {
-        "field": "category1_name",
-        "size": 15,
-        "order": {
-          "_count": "desc"
+            "filter": [
+                { "match_all": {} }
+            ]
         }
-      }
     },
-    "specifications_color_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filter_by_name": {
-          "filter": {
-            "term": {
-              "specifications.name": "color"
+    "aggs": {
+        "category1_name_facet": {
+            "terms": {
+                "field": "category1_name",
+                "size": 15,
+                "order": { "_count": "desc" }
             }
-          },
-          "aggs": {
-            "value_counts": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 20,
-                "order": {
-                  "_count": "desc"
+        },
+        "specifications_color_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filter_by_name": {
+                    "filter": { "term": { "specifications.name": "color" } },
+                    "aggs": {
+                        "value_counts": {
+                            "terms": {
+                                "field": "specifications.value",
+                                "size": 20,
+                                "order": { "_count": "desc" }
+                            }
+                        }
+                    }
                 }
-              }
-            }
-          }
-        }
-      }
-    },
-    "specifications_size_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filter_by_name": {
-          "filter": {
-            "term": {
-              "specifications.name": "size"
             }
-          },
-          "aggs": {
-            "value_counts": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 15,
-                "order": {
-                  "_count": "desc"
+        },
+        "specifications_size_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filter_by_name": {
+                    "filter": { "term": { "specifications.name": "size" } },
+                    "aggs": {
+                        "value_counts": {
+                            "terms": {
+                                "field": "specifications.value",
+                                "size": 15,
+                                "order": { "_count": "desc" }
+                            }
+                        }
+                    }
                 }
-              }
-            }
-          }
-        }
-      }
-    },
-    "specifications_material_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filter_by_name": {
-          "filter": {
-            "term": {
-              "specifications.name": "material"
             }
-          },
-          "aggs": {
-            "value_counts": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 10,
-                "order": {
-                  "_count": "desc"
+        },
+        "specifications_material_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filter_by_name": {
+                    "filter": { "term": { "specifications.name": "material" } },
+                    "aggs": {
+                        "value_counts": {
+                            "terms": {
+                                "field": "specifications.value",
+                                "size": 10,
+                                "order": { "_count": "desc" }
+                            }
+                        }
+                    }
                 }
-              }
             }
-          }
         }
-      }
     }
-  }
 }'
+```
+
+---
+### 4. 通用查询（通用索引示例）
+
+#### 查询所有
+```bash
 GET /search_products_tenant_2/_search
 {
-  "query": {
-    "match_all": {}
-  }
+    "query": {
+        "match_all": {}
+    }
 }
+```
-
-curl -u 'saas:4hOaLaf41y2VuI8y'   -X GET 'http://localhost:9200/search_products/_search?pretty'   -H 'Content-Type: application/json'   -d '{
+#### 按 spu_id 查询（通用索引）
+```bash
+curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 5,
     "query": {
-      "bool": {
-        "filter": [
-          { "term": { "spu_id": "74123" } }
-        ]
-      }
+        "bool": {
+            "filter": [
+                { "term": { "spu_id": "74123" } }
+            ]
+        }
     }
-  }'
+}'
+```
+---
-### 2. 统计租户的总文档数
+### 5. 统计租户总文档数
+
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "match_all": {}
-  }
+    "query": {
+        "match_all": {}
+    }
 }'
+```
+---
-# ======================================
-# 分面数据诊断相关查询
-# ======================================
+## 分面数据诊断相关查询
-## 1. 检查ES文档的分面字段数据
+### 1. 检查 ES 文档的分面字段数据
-### 1.1 查询特定租户的商品，显示分面相关字段
+#### 1.1 查询特定租户的商品，显示分面相关字段
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "term": {
-      "tenant_id": "162"
-    }
-  },
-  "size": 1,
-  "_source": [
-    "spu_id", 
-    "title", 
-    "category1_name", 
-    "category2_name",
-    "category3_name",
-    "specifications", 
-    "option1_name",
-    "option2_name",
-    "option3_name"
-  ]
+    "query": {
+        "term": { "tenant_id": "162" }
+    },
+    "size": 1,
+    "_source": [
+        "spu_id", "title", "category1_name", "category2_name",
+        "category3_name", "specifications", "option1_name",
+        "option2_name", "option3_name"
+    ]
 }'
+```
-### 1.2 验证category1_name字段是否有数据
+#### 1.2 验证 category1_name 字段是否有数据
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "term": { "tenant_id": "162" } },
-        { "exists": { "field": "category1_name" } }
-      ]
-    }
-  },
-  "size": 0
+    "query": {
+        "bool": {
+            "filter": [
+                { "term": { "tenant_id": "162" } },
+                { "exists": { "field": "category1_name" } }
+            ]
+        }
+    },
+    "size": 0
 }'
+```
-### 1.3 验证specifications字段是否有数据
+#### 1.3 验证 specifications 字段是否有数据
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "term": { "tenant_id": "162" } },
-        { "exists": { "field": "specifications" } }
-      ]
-    }
-  },
-  "size": 0
+    "query": {
+        "bool": {
+            "filter": [
+                { "term": { "tenant_id": "162" } },
+                { "exists": { "field": "specifications" } }
+            ]
+        }
+    },
+    "size": 0
 }'
+```
-## 2. 分面聚合查询（Facet Aggregations）
+---
-### 2.1 category1_name 分面聚合
+### 2. 分面聚合查询（Facet Aggregations）
+
+#### 2.1 category1_name 分面聚合
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "match_all": {}
-  },
-  "size": 0,
-  "aggs": {
-    "category1_name_facet": {
-      "terms": {
-        "field": "category1_name",
-        "size": 50
-      }
+    "query": { "match_all": {} },
+    "size": 0,
+    "aggs": {
+        "category1_name_facet": {
+            "terms": { "field": "category1_name", "size": 50 }
+        }
     }
-  }
 }'
+```
-### 2.2 specifications.color 分面聚合
+#### 2.2 specifications.color 分面聚合
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "match_all": {}
-  },
-  "size": 0,
-  "aggs": {
-    "specifications_color_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filtered": {
-          "filter": {
-            "term": {
-              "specifications.name": "color"
-            }
-          },
-          "aggs": {
-            "values": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 50
-              }
+    "query": { "match_all": {} },
+    "size": 0,
+    "aggs": {
+        "specifications_color_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filtered": {
+                    "filter": { "term": { "specifications.name": "color" } },
+                    "aggs": {
+                        "values": { "terms": { "field": "specifications.value", "size": 50 } }
+                    }
+                }
             }
-          }
         }
-      }
     }
-  }
 }'
+```
-### 2.3 specifications.size 分面聚合
+#### 2.3 specifications.size 分面聚合
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "match_all": {}
-  },
-  "size": 0,
-  "aggs": {
-    "specifications_size_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filtered": {
-          "filter": {
-            "term": {
-              "specifications.name": "size"
-            }
-          },
-          "aggs": {
-            "values": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 50
-              }
+    "query": { "match_all": {} },
+    "size": 0,
+    "aggs": {
+        "specifications_size_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filtered": {
+                    "filter": { "term": { "specifications.name": "size" } },
+                    "aggs": {
+                        "values": { "terms": { "field": "specifications.value", "size": 50 } }
+                    }
+                }
             }
-          }
         }
-      }
     }
-  }
 }'
+```
-### 2.4 specifications.material 分面聚合
+#### 2.4 specifications.material 分面聚合
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "match_all": {}
-  },
-  "size": 0,
-  "aggs": {
-    "specifications_material_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filtered": {
-          "filter": {
-            "term": {
-              "specifications.name": "material"
-            }
-          },
-          "aggs": {
-            "values": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 50
-              }
+    "query": { "match_all": {} },
+    "size": 0,
+    "aggs": {
+        "specifications_material_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filtered": {
+                    "filter": { "term": { "specifications.name": "material" } },
+                    "aggs": {
+                        "values": { "terms": { "field": "specifications.value", "size": 50 } }
+                    }
+                }
             }
-          }
         }
-      }
     }
-  }
 }'
+```
-### 2.5 综合分面聚合（category + color + size + material）
+#### 2.5 综合分面聚合（category + color + size + material）
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "match_all": {}
-  },
-  "size": 0,
-  "aggs": {
-    "category1_name_facet": {
-      "terms": {
-        "field": "category1_name",
-        "size": 50
-      }
-    },
-    "specifications_color_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filtered": {
-          "filter": {
-            "term": {
-              "specifications.name": "color"
-            }
-          },
-          "aggs": {
-            "values": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 50
-              }
-            }
-          }
-        }
-      }
-    },
-    "specifications_size_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filtered": {
-          "filter": {
-            "term": {
-              "specifications.name": "size"
-            }
-          },
-          "aggs": {
-            "values": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 50
-              }
+    "query": { "match_all": {} },
+    "size": 0,
+    "aggs": {
+        "category1_name_facet": { "terms": { "field": "category1_name", "size": 50 } },
+        "specifications_color_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filtered": {
+                    "filter": { "term": { "specifications.name": "color" } },
+                    "aggs": { "values": { "terms": { "field": "specifications.value", "size": 50 } } }
+                }
             }
-          }
-        }
-      }
-    },
-    "specifications_material_facet": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "filtered": {
-          "filter": {
-            "term": {
-              "specifications.name": "material"
+        },
+        "specifications_size_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filtered": {
+                    "filter": { "term": { "specifications.name": "size" } },
+                    "aggs": { "values": { "terms": { "field": "specifications.value", "size": 50 } } }
+                }
             }
-          },
-          "aggs": {
-            "values": {
-              "terms": {
-                "field": "specifications.value",
-                "size": 50
-              }
+        },
+        "specifications_material_facet": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "filtered": {
+                    "filter": { "term": { "specifications.name": "material" } },
+                    "aggs": { "values": { "terms": { "field": "specifications.value", "size": 50 } } }
+                }
             }
-          }
         }
-      }
     }
-  }
 }'
+```
-## 3. 检查specifications嵌套字段的详细结构
+---
-### 3.1 查看specifications的name字段有哪些值
+### 3. 检查 specifications 嵌套字段的详细结构
+
+#### 3.1 查看 specifications 的 name 字段有哪些值
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "term": {
-      "tenant_id": "162"
-    }
-  },
-  "size": 0,
-  "aggs": {
-    "specifications_names": {
-      "nested": {
-        "path": "specifications"
-      },
-      "aggs": {
-        "name_values": {
-          "terms": {
-            "field": "specifications.name",
-            "size": 20
-          }
+    "query": { "term": { "tenant_id": "162" } },
+    "size": 0,
+    "aggs": {
+        "specifications_names": {
+            "nested": { "path": "specifications" },
+            "aggs": {
+                "name_values": { "terms": { "field": "specifications.name", "size": 20 } }
+            }
         }
-      }
     }
-  }
 }'
+```
-### 3.2 查看某个商品的完整specifications数据
+#### 3.2 查看某个商品的完整 specifications 数据
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "term": { "tenant_id": "162" } },
-        { "exists": { "field": "specifications" } }
-      ]
-    }
-  },
-  "size": 1,
-  "_source": ["spu_id", "title", "specifications"]
+    "query": {
+        "bool": {
+            "filter": [
+                { "term": { "tenant_id": "162" } },
+                { "exists": { "field": "specifications" } }
+            ]
+        }
+    },
+    "size": 1,
+    "_source": ["spu_id", "title", "specifications"]
 }'
+```
+
+---
-## 4. 统计查询
+### 4. 统计查询
-### 4.1 统计有category1_name的文档数量
+#### 4.1 统计有 category1_name 的文档数量
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "exists": { "field": "category1_name" } }
-      ]
+    "query": {
+        "bool": {
+            "filter": [
+                { "exists": { "field": "category1_name" } }
+            ]
+        }
     }
-  }
 }'
+```
-### 4.2 统计有specifications的文档数量
+#### 4.2 统计有 specifications 的文档数量
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "exists": { "field": "specifications" } }
-      ]
+    "query": {
+        "bool": {
+            "filter": [
+                { "exists": { "field": "specifications" } }
+            ]
+        }
     }
-  }
 }'
+```
+---
-## 5. 诊断问题场景
+### 5. 诊断问题场景
-### 5.1 查找没有category1_name但有category的文档（MySQL有数据但ES没有）
+#### 5.1 查找没有 category1_name 但有 category 的文档（MySQL 有数据但 ES 没有）
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "term": { "tenant_id": "162" } }
-      ],
-      "must_not": [
-        { "exists": { "field": "category1_name" } }
-      ]
-    }
-  },
-  "size": 10,
-  "_source": ["spu_id", "title", "category_name_text", "category_path"]
+    "query": {
+        "bool": {
+            "filter": [
+                { "term": { "tenant_id": "162" } }
+            ],
+            "must_not": [
+                { "exists": { "field": "category1_name" } }
+            ]
+        }
+    },
+    "size": 10,
+    "_source": ["spu_id", "title", "category_name_text", "category_path"]
 }'
+```
-### 5.2 查找有option但没有specifications的文档（数据转换问题）
+#### 5.2 查找有 option 但没有 specifications 的文档（数据转换问题）
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
-  "query": {
-    "bool": {
-      "filter": [
-        { "term": { "tenant_id": "162" } },
-        { "exists": { "field": "option1_name" } }
-      ],
-      "must_not": [
-        { "exists": { "field": "specifications" } }
-      ]
-    }
-  },
-  "size": 10,
-  "_source": ["spu_id", "title", "option1_name", "option2_name", "option3_name", "specifications"]
+    "query": {
+        "bool": {
+            "filter": [
+                { "term": { "tenant_id": "162" } },
+                { "exists": { "field": "option1_name" } }
+            ],
+            "must_not": [
+                { "exists": { "field": "specifications" } }
+            ]
+        }
+    },
+    "size": 10,
+    "_source": ["spu_id", "title", "option1_name", "option2_name", "option3_name", "specifications"]
 }'
+```
+
+---
+## 重排序示例
-重排序：
+```bash
 GET /search_products_tenant_170/_search
 {
     "query": {
-        "match": {  
+        "match": {
             "title.en": {
                 "query": "quick brown fox",
                 "minimum_should_match": "90%"
@@ -644,31 +632,52 @@ GET /search_products_tenant_170/_search
         }
     },
     "rescore": {
-        "window_size": 50, 
-        "query": {         
+        "window_size": 50,
+        "query": {
             "rescore_query": {
                 "match_phrase": {
                     "title.en": {
                         "query": "quick brown fox",
-                        "slop":  50
+                        "slop": 50
                     }
                 }
             }
         }
     }
 }
+```
+
+---
+## 检查字段是否存在
-检查某个字段是否存在
+```bash
 curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
-  'http://localhost:9200/search_products_tenant_163/_count' \
-  -H 'Content-Type: application/json' \
-  -d '{
+'http://localhost:9200/search_products_tenant_163/_count' \
+-H 'Content-Type: application/json' \
+-d '{
     "query": {
-      "bool": {
-        "filter": [
-          { "exists": { "field": "title_embedding" } }
-        ]
-      }
+        "bool": {
+            "filter": [
+                { "exists": { "field": "image_embedding" } }
+            ]
+        }
     }
-  }'
 \ No newline at end of file
+}'
+
+curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
+'http://localhost:9200/search_products_tenant_163/_count' \
+-H 'Content-Type: application/json' \
+-d '{
+"query": {
+"nested": {
+    "path": "image_embedding",
+    "query": {
+    "exists": {
+        "field": "image_embedding.vector"
+    }
+    }
+}
+}
+}'
+```
 \ No newline at end of file
@@ -90,7 +90,7 @@ curl -X POST &quot;http://43.166.252.75:6002/search/&quot; \
 | 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据（不写入ES） |
 | 构建ES文档（正式对接） | POST | `/indexer/build-docs` | 基于上游提供的 MySQL 行数据构建 ES doc，不写入 ES，供 Java 等调用后自行写入 |
 | 构建ES文档（测试用） | POST | `/indexer/build-docs-from-db` | 仅在测试/调试时使用，根据 `tenant_id + spu_ids` 内部查库并构建 ES doc |
-| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、semantic_attributes、tags，供微服务组合方式使用 |
+| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags，供微服务组合方式使用 |
 | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 |
 | 健康检查 | GET | `/admin/health` | 服务健康检查 |
 | 获取配置 | GET | `/admin/config` | 获取租户配置 |
@@ -13,7 +13,7 @@
 | 查询文档 | POST | `/indexer/documents` | 按 SPU ID 列表查询 ES 文档，不写入 ES |
 | 构建 ES 文档（正式） | POST | `/indexer/build-docs` | 由上游提供 MySQL 行数据，返回 ES-ready 文档，不写 ES |
 | 构建 ES 文档（测试） | POST | `/indexer/build-docs-from-db` | 由本服务查库并构建文档，仅测试/调试用 |
-| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、semantic_attributes、tags（供微服务组合方式使用） |
+| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags（供微服务组合方式使用） |
 | 索引健康检查 | GET | `/indexer/health` | 检查索引服务与数据库连接状态 |
 #### 5.0 支撑外部 indexer 的三种方式
@@ -510,7 +510,6 @@ curl -X POST &quot;http://localhost:6004/indexer/build-docs&quot; \
     {
       "spu": {
         "id": 10001,
-        "tenant_id": "162",
         "title": "测试T恤 纯棉短袖",
         "brief": "舒适纯棉，多色可选",
         "description": "这是一款适合日常穿着的纯棉T恤，透气吸汗。",
@@ -521,7 +520,7 @@ curl -X POST &quot;http://localhost:6004/indexer/build-docs&quot; \
         "category_path": "服装/上衣/T恤",
         "fake_sales": 1280,
         "image_src": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg",
-        "tags": "T恤,纯棉,短袖,夏季",
+        "enriched_tags": ["T恤", "纯棉"],
         "create_time": "2024-01-01T00:00:00Z",
         "update_time": "2024-01-01T00:00:00Z"
       },
@@ -570,7 +569,7 @@ curl -X POST &quot;http://localhost:6004/indexer/build-docs&quot; \
       "tenant_id": "170",
       "spu_id": "223167",
       "title": { "en": "...", "zh": "..." },
-      "tags": ["Floerns", "Clothing", "Shoes & Jewelry"],
+      "enriched_tags": ["Floerns", "Clothing", "Shoes & Jewelry"],
       "skus": [
         {
           "sku_id": "3988393",
@@ -649,7 +648,7 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 ### 5.8 内容理解字段生成接口
 - **端点**: `POST /indexer/enrich-content`
-- **描述**: 根据商品内容信息批量生成 **qanchors**（锚文本）、**semantic_attributes**（语义属性）、**tags**（细分标签），供外部 indexer 在「微服务组合」方式下自行拼装 doc 时使用。请求以 `items[]` 传入商品内容字段（必填/可选见下表）。内部逻辑与 `indexer.product_enrich` 一致，支持多语言与 Redis 缓存；单次请求在线程池中执行，避免阻塞其他接口。
+- **描述**: 根据商品内容信息批量生成 **qanchors**（锚文本）、**enriched_attributes**（语义属性）、**enriched_tags**（细分标签），供外部 indexer 在「微服务组合」方式下自行拼装 doc 时使用。请求以 `items[]` 传入商品内容字段（必填/可选见下表）。接口只暴露商品内容输入，语言选择、分析维度与最终字段结构统一由 `indexer.product_enrich` 内部决定；当前返回结果与 `search_products` mapping 保持一致。单次请求在线程池中执行，避免阻塞其他接口。
 #### 请求参数
@@ -669,8 +668,7 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
       "title": "12PCS Dolls with Bottles",
       "image_url": "https://example.com/images/223168.jpg"
     }
-  ],
-  "languages": ["zh", "en"]
+  ]
 }
 ```
@@ -678,7 +676,6 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 |------|------|------|--------|------|
 | `tenant_id` | string | Y | - | 租户 ID。目前仅用于记录日志，不产生实际作用|
 | `items` | array | Y | - | 待分析列表；**单次最多 50 条** |
-| `languages` | array[string] | N | `["zh", "en"]` | 目标语言，需在支持范围内：`zh`、`en`、`de`、`ru`、`fr` |
 `items[]` 字段说明：
@@ -696,6 +693,12 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 - `tenant_id`、`spu_id` 只用于请求归属与结果回填，不参与缓存键。
 - 因此，输入内容不变时可跨请求直接命中缓存；任一输入字段变化时，会自然落到新的缓存 key。
+语言说明：
+
+- 接口不接受语言控制参数。
+- 返回哪些语言、返回哪些语义维度，统一由 `indexer.product_enrich` 内部逻辑决定。
+- 当前为了与 `search_products` mapping 对齐，返回结果只包含核心索引语言 `zh`、`en`。
+
 批量请求建议：
 - **全量**：强烈建议 尽可能 **20 个 SPU/doc** 攒成一个批次后再请求一次。
 - **增量**：可按时效要求设置时间窗口（例如 **5 分钟**），在窗口内尽可能攒到 **20 个**；达到 20 或窗口到期就发送一次请求。
@@ -711,21 +714,28 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
     {
       "spu_id": "223167",
       "qanchors": {
-        "zh": "短袖T恤,纯棉,男装,夏季",
-        "en": "cotton t-shirt, short sleeve, men, summer"
+        "zh": ["短袖T恤", "纯棉", "男装", "夏季"],
+        "en": ["cotton t-shirt", "short sleeve", "men", "summer"]
       },
-      "semantic_attributes": [
-        { "lang": "zh", "name": "tags", "value": "纯棉" },
-        { "lang": "zh", "name": "usage_scene", "value": "日常" },
-        { "lang": "en", "name": "tags", "value": "cotton" }
-      ],
-      "tags": ["纯棉", "短袖", "男装", "cotton", "short sleeve"]
+      "enriched_tags": {
+        "zh": ["纯棉", "短袖", "男装"],
+        "en": ["cotton", "short sleeve", "men"]
+      },
+      "enriched_attributes": [
+        { "name": "enriched_tags", "value": { "zh": "纯棉" } },
+        { "name": "usage_scene", "value": { "zh": "日常" } },
+        { "name": "enriched_tags", "value": { "en": "cotton" } }
+      ]
     },
     {
       "spu_id": "223168",
-      "qanchors": { "en": "dolls, toys, 12pcs" },
-      "semantic_attributes": [],
-      "tags": ["dolls", "toys"]
+      "qanchors": {
+        "en": ["dolls", "toys", "12pcs"]
+      },
+      "enriched_tags": {
+        "en": ["dolls", "toys"]
+      },
+      "enriched_attributes": []
     }
   ]
 }
@@ -733,10 +743,10 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 | 字段 | 类型 | 说明 |
 |------|------|------|
-| `results` | array | 与请求 `items` 一一对应，每项含 `spu_id`、`qanchors`、`semantic_attributes`、`tags` |
-| `results[].qanchors` | object | 按语言键的锚文本（逗号分隔短语），可写入 ES 文档的 `qanchors.{lang}` |
-| `results[].semantic_attributes` | array | 语义属性列表，每项为 `{ "lang", "name", "value" }`，可写入 ES 的 `semantic_attributes` nested 字段 |
-| `results[].tags` | array | 从语义属性中抽取的 `name=tags` 的 value 集合，可与业务原有 `tags` 合并后写入 ES 的 `tags` 字段 |
+| `results` | array | 与请求 `items` 一一对应，每项含 `spu_id`、`qanchors`、`enriched_attributes`、`enriched_tags` |
+| `results[].qanchors` | object | 与 ES `qanchors` 字段同结构，按语言键返回短语数组 |
+| `results[].enriched_tags` | object | 与 ES `enriched_tags` 字段同结构，按语言键返回标签数组 |
+| `results[].enriched_attributes` | array | 与 ES `enriched_attributes` nested 字段同结构，每项为 `{ "name", "value": { "zh"?: "...", "en"?: "..." } }` |
 | `results[].error` | string | 若该条处理失败（如 LLM 异常），会在此字段返回错误信息 |
 **错误响应**:
@@ -753,13 +763,12 @@ curl -X POST &quot;http://localhost:6004/indexer/enrich-content&quot; \
     "items": [
       {
         "spu_id": "223167",
-        "title": "纯棉短袖T恤 夏季男装",
+        "title": "纯棉短袖T恤 夏季男装夏季男装",
         "brief": "夏季透气纯棉短袖，舒适亲肤",
         "description": "100%棉，圆领版型，适合日常通勤与休闲穿搭。",
         "image_url": "https://example.com/images/223167.jpg"
       }
-    ],
-    "languages": ["zh", "en"]
+    ]
   }'
 ```
@@ -444,7 +444,7 @@ curl &quot;http://localhost:6006/health&quot;
 - **Base URL**: Indexer 服务地址，如 `http://localhost:6004`
 - **路径**: `POST /indexer/enrich-content`
-- **说明**: 根据商品标题批量生成 `qanchors`、`semantic_attributes`、`tags`，用于拼装 ES 文档。内部使用大模型（需配置 `DASHSCOPE_API_KEY`），支持多语言与 Redis 缓存；单次最多 50 条，建议批量调用以提升效率。
+- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`tags`，用于拼装 ES 文档。内部使用大模型（需配置 `DASHSCOPE_API_KEY`），支持多语言与 Redis 缓存；单次最多 50 条，建议批量调用以提升效率。
 请求/响应格式、示例及错误码见 [-05-索引接口（Indexer）](./搜索API对接指南-05-索引接口（Indexer）.md#58-内容理解字段生成接口)。
@@ -260,6 +260,238 @@ python ./scripts/eval_search_quality.py
 4. 非 `zh/en` 语种字段动态拼接（如 `de/fr/es`）
+# 搜索pipeline
+**整体图**
+这个 pipeline 现在可以理解成一条“先广召回，再逐层收窄、逐层加贵信号”的漏斗：
+
+1. Query 解析
+2. ES 召回
+3. 粗排：只用 ES 内部文本/KNN 信号
+4. 款式 SKU 选择 + title suffix
+5. 精排：轻量 reranker + 文本/KNN 融合
+6. 最终 rerank：重 reranker + fine score + 文本/KNN 融合
+7. 分页、补全字段、格式化返回
+
+主控代码在 [searcher.py](/data/saas-search/search/searcher.py)，打分与 rerank 细节在 [rerank_client.py](/data/saas-search/search/rerank_client.py)，配置定义在 [schema.py](/data/saas-search/config/schema.py) 和 [config.yaml](/data/saas-search/config/config.yaml)。
+
+**先看入口怎么决定走哪条路**
+在 [searcher.py:348](/data/saas-search/search/searcher.py#L348) 开始，`search()` 先读租户语言、开关、窗口大小。
+关键判断在 [searcher.py:364](/data/saas-search/search/searcher.py#L364) 到 [searcher.py:372](/data/saas-search/search/searcher.py#L372)：
+
+- `rerank_window` 现在是 80，见 [config.yaml:256](/data/saas-search/config/config.yaml#L256)
+- `coarse_rank.input_window` 是 700，`output_window` 是 240，见 [config.yaml:231](/data/saas-search/config/config.yaml#L231)
+- `fine_rank.input_window` 是 240，`output_window` 是 80，见 [config.yaml:245](/data/saas-search/config/config.yaml#L245)
+
+所以如果请求满足 `from_ + size <= rerank_window`，就进入完整漏斗：
+- ES 实际取前 `700`
+- 粗排后留 `240`
+- 精排后留 `80`
+- 最终 rerank 也只处理这 `80`
+- 最后再做分页切片
+
+如果请求页超出 80，就不走后面的多阶段漏斗，直接按 ES 原逻辑返回。
+
+这点非常重要，因为它决定了“贵模型只服务头部结果”。
+
+**Step 1：Query 解析阶段**
+在 [searcher.py:432](/data/saas-search/search/searcher.py#L432) 到 [searcher.py:469](/data/saas-search/search/searcher.py#L469)：
+`query_parser.parse()` 做几件事：
+
+- 规范化 query
+- 检测语言
+- 可能做 rewrite
+- 生成文本向量
+- 如果有图搜，还会带图片向量
+- 生成翻译结果
+- 识别 style intent
+
+这一步的结果存在 `parsed_query` 里，后面 ES 查询、style SKU 选择、fine/final rerank 全都依赖它。
+
+**Step 2：ES Query 构建**
+ES DSL 在 [searcher.py:471](/data/saas-search/search/searcher.py#L471) 开始，通过 [es_query_builder.py:181](/data/saas-search/search/es_query_builder.py#L181) 的 `build_query()` 生成。
+
+这里的核心结构是：
+- 文本召回 clause
+- 文本向量 KNN clause
+- 图片向量 KNN clause
+- 它们一起放进 `bool.should`
+- 过滤条件放进 `filter`
+- facet 的多选条件走 `post_filter`
+
+KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builder.py#L250) 之后：
+- 文本向量 clause 名字固定叫 `knn_query`
+- 图片向量 clause 名字固定叫 `image_knn_query`
+
+而文本召回那边，后续 fusion 代码约定会去读：
+- 原始 query 的 named query：`base_query`
+- 翻译 query 的 named query：`base_query_trans_*`
+
+也就是说，后面的粗排/精排/最终 rerank，并不是重新理解 ES score，而是从 `matched_queries` 里把这些命名子信号拆出来自己重算。
+
+**Step 3：ES 召回**
+在 [searcher.py:579](/data/saas-search/search/searcher.py#L579) 到 [searcher.py:627](/data/saas-search/search/searcher.py#L627)。
+
+这里有个很关键的工程优化：
+如果在 rerank window 内，第一次 ES 拉取时会把 `_source` 关掉，只取排序必需信号，见 [searcher.py:517](/data/saas-search/search/searcher.py#L517) 到 [searcher.py:523](/data/saas-search/search/searcher.py#L523)。
+
+原因是：
+- 粗排先只需要 `_score` 和 `matched_queries`
+- 不需要一上来把 700 条完整商品详情都拉回来
+- 等粗排收窄后，再补 fine/final rerank 需要的字段
+
+这是现在这条 pipeline 很核心的性能设计点。
+
+**Step 4：粗排**
+粗排入口在 [searcher.py:638](/data/saas-search/search/searcher.py#L638)，真正的打分在 [rerank_client.py:348](/data/saas-search/search/rerank_client.py#L348) 的 `coarse_resort_hits()`。
+
+粗排只看两类信号：
+- `text_score`
+- `knn_score`
+
+它们先都从统一 helper `_build_hit_signal_bundle()` 里拿，见 [rerank_client.py:246](/data/saas-search/search/rerank_client.py#L246)。
+
+文本分怎么来，见 [rerank_client.py:200](/data/saas-search/search/rerank_client.py#L200)：
+- `source_score = matched_queries["base_query"]`
+- `translation_score = max(base_query_trans_*)`
+- `weighted_translation = 0.8 * translation_score`
+- `primary_text = max(source, weighted_translation)`
+- `support_text = 另一路`
+- `text_score = primary_text + 0.25 * support_text`
+
+这就是一个 text dismax 思路：
+原 query 是主路，翻译 query 是辅助路，但不是简单相加。
+
+向量分怎么来，见 [rerank_client.py:156](/data/saas-search/search/rerank_client.py#L156)：
+- `text_knn_score`
+- `image_knn_score`
+- 分别乘自己的 weight
+- 取强的一路做主路
+- 弱的一路按 `knn_tie_breaker` 做辅助
+
+然后粗排融合公式在 [rerank_client.py:334](/data/saas-search/search/rerank_client.py#L334)：
+- `coarse_score = (text_score + text_bias)^text_exponent * (knn_score + knn_bias)^knn_exponent`
+
+配置定义在 [schema.py:124](/data/saas-search/config/schema.py#L124) 和 [config.yaml:231](/data/saas-search/config/config.yaml#L231)。
+
+算完后：
+- 写入 `hit["_coarse_score"]`
+- 按 `_coarse_score` 排序
+- 留前 240，见 [searcher.py:645](/data/saas-search/search/searcher.py#L645)
+
+**Step 5：粗排后补字段 + SKU 选择**
+粗排完以后，`searcher` 会按 doc template 反推 fine/final rerank 需要哪些 `_source` 字段，然后只补这些字段，见 [searcher.py:669](/data/saas-search/search/searcher.py#L669)。
+
+之后才做 style SKU 选择，见 [searcher.py:696](/data/saas-search/search/searcher.py#L696)。
+
+为什么放这里？
+因为现在 fine rank 也是 reranker，它也要吃 title suffix。
+而 suffix 是 SKU 选择之后写到 hit 上的 `_style_rerank_suffix`。
+真正把 suffix 拼进 doc 文本的地方在 [rerank_client.py:65](/data/saas-search/search/rerank_client.py#L65) 到 [rerank_client.py:74](/data/saas-search/search/rerank_client.py#L74)。
+
+所以顺序必须是：
+- 先粗排
+- 再选 SKU
+- 再用带 suffix 的 title 去跑 fine/final rerank
+
+**Step 6：精排**
+入口在 [searcher.py:711](/data/saas-search/search/searcher.py#L711)，实现是 [rerank_client.py:603](/data/saas-search/search/rerank_client.py#L603) 的 `run_lightweight_rerank()`。
+
+它会做三件事：
+
+1. 用 `build_docs_from_hits()` 把每条商品变成 reranker 输入文本
+2. 用 `service_profile="fine"` 调轻量服务
+3. 不再只按 `fine_score` 排，而是按融合后的 `_fine_fused_score` 排
+
+精排融合公式现在是：
+- `fine_stage_score = fine_factor * text_factor * knn_factor * style_boost`
+
+具体公共计算在 [rerank_client.py:286](/data/saas-search/search/rerank_client.py#L286) 的 `_compute_multiplicative_fusion()`：
+- `fine_factor = (fine_score + fine_bias)^fine_exponent`
+- `text_factor = (text_score + text_bias)^text_exponent`
+- `knn_factor = (knn_score + knn_bias)^knn_exponent`
+- 如果命中了 selected SKU，再乘 style boost
+
+写回 hit 的字段见 [rerank_client.py:655](/data/saas-search/search/rerank_client.py#L655)：
+- `_fine_score`
+- `_fine_fused_score`
+- `_text_score`
+- `_knn_score`
+
+排序逻辑在 [rerank_client.py:683](/data/saas-search/search/rerank_client.py#L683)：
+按 `_fine_fused_score` 降序排，然后留前 80，见 [searcher.py:727](/data/saas-search/search/searcher.py#L727)。
+
+这就是你这次特别关心的点：现在 fine rank 已经不是“模型裸分排序”，而是“模型分 + ES 文本/KNN 信号融合后排序”。
+
+**Step 7：最终 rerank**
+入口在 [searcher.py:767](/data/saas-search/search/searcher.py#L767)，实现是 [rerank_client.py:538](/data/saas-search/search/rerank_client.py#L538) 的 `run_rerank()`。
+
+它和 fine rank 很像，但多了一个更重的模型分 `rerank_score`。
+最终公式是：
+
+- `final_score = rerank_factor * fine_factor * text_factor * knn_factor * style_boost`
+
+也就是：
+- fine rank 产生的 `fine_score` 不会丢
+- 到最终 rerank 时，它会继续作为一个乘法项参与最终融合
+
+这个逻辑在 [rerank_client.py:468](/data/saas-search/search/rerank_client.py#L468) 到 [rerank_client.py:476](/data/saas-search/search/rerank_client.py#L476)。
+
+算完后写入：
+- `_rerank_score`
+- `_fused_score`
+
+然后按 `_fused_score` 排序，见 [rerank_client.py:531](/data/saas-search/search/rerank_client.py#L531)。
+
+这里你可以把它理解成：
+- fine rank 负责“轻量快速筛一遍，把 240 缩成 80”
+- 最终 rerank 负责“用更贵模型做最终拍板”
+- 但最终拍板时，不会忽略 fine rank 结果，而是把 fine score 当成一个先验信号保留进去
+
+**Step 8：分页与字段补全**
+多阶段排序只在头部窗口内完成。
+真正返回给用户前，在 [searcher.py:828](/data/saas-search/search/searcher.py#L828) 之后还会做两件事：
+
+- 先按 `from_:from_+size` 对最终 80 条切片
+- 再按用户原始 `_source` 需求补回页面真正要显示的字段，见 [searcher.py:859](/data/saas-search/search/searcher.py#L859)
+
+所以这条链路是“三次不同目的的数据访问”：
+
+- 第一次 ES：只要排序信号
+- 第二次按 id 回填：只要 fine/final rerank 需要字段
+- 第三次按页面 ids 回填：只要最终页面显示字段
+
+这也是为什么它性能上比“一次全量拉 700 条完整文档”更合理。
+
+**Step 9：结果格式化与 debug funnel**
+最后在 [searcher.py:906](/data/saas-search/search/searcher.py#L906) 进入结果处理。
+这里会把每个商品的阶段信息组装成 `ranking_funnel`，见 [searcher.py:1068](/data/saas-search/search/searcher.py#L1068)：
+
+- `es_recall`
+- `coarse_rank`
+- `fine_rank`
+- `rerank`
+- `final_page`
+
+其中：
+- coarse stage 主要保留 text/translation/knn 的拆分信号
+- fine/rerank stage 现在都保留 `fusion_inputs`、`fusion_factors`、`fusion_summary`
+- `fusion_summary` 来自真实计算过程本身，见 [rerank_client.py:265](/data/saas-search/search/rerank_client.py#L265)
+
+这点很重要，因为现在“实际排序逻辑”和“debug 展示逻辑”是同源的，不是两套各写一份。
+
+**一句话总结这条 pipeline**
+这条 pipeline 的本质是：
+
+- ES 负责便宜的大范围召回
+- 粗排负责只靠 ES 内置信号先做一次结构化筛选
+- style SKU 选择负责把商品文本改造成更适合 reranker 理解的输入
+- fine rank 负责用轻模型把候选进一步压缩
+- final rerank 负责用重模型做最终判定
+- 每一层都尽量复用前一层信号，而不是推翻重来
+
+如果你愿意，我下一步可以继续按“一个具体 query 的真实流转样例”来讲，比如假设用户搜 `black dress`，我把它从 `parsed_query`、ES named queries、coarse/fine/final 的每个分数怎么出来，完整手推一遍。
+
+
 ## reranker方面：
 BAAI/bge-reranker-v2-m3的一个严重badcase：
@@ -546,22 +546,25 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) {
             ${buildStageCard('Fine Rank', 'Lightweight reranker output', [
                 { label: 'rank', value: fineStage.rank ?? 'N/A' },
                 { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' },
-                { label: 'fine_score', value: formatDebugNumber(fineStage.score ?? debug.fine_score) },
-            ], renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, false))}
+                { label: 'stage_score', value: formatDebugNumber(fineStage.score ?? debug.score) },
+                { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score ?? debug.fine_score) },
+                { label: 'text_score', value: formatDebugNumber(fineStage.text_score ?? debug.text_score) },
+                { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score ?? debug.knn_score) },
+            ], `${renderJsonDetails('Fine Fusion', fineStage.fusion_summary || debug.fusion_summary || fineStage.fusion_factors, false)}${renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, false)}`)}
             ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
                 { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' },
                 { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' },
+                { label: 'stage_score', value: formatDebugNumber(rerankStage.score ?? rerankStage.fused_score ?? debug.score) },
                 { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) },
+                { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score ?? debug.fine_score) },
                 { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) },
                 { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) },
-                { label: 'text_source', value: formatDebugNumber(rerankStage.signals?.text_source_score ?? debug.text_source_score) },
-                { label: 'text_translation', value: formatDebugNumber(rerankStage.signals?.text_translation_score ?? debug.text_translation_score) },
                 { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) },
                 { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) },
                 { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) },
                 { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) },
                 { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) },
-            ], renderJsonDetails('Rerank Signals', rerankStage.signals, false))}
+            ], `${renderJsonDetails('Final Fusion', rerankStage.fusion_summary || debug.fusion_summary || rerankStage.fusion_factors, false)}${renderJsonDetails('Rerank Signals', rerankStage.signals, false)}`)}
         </div>
     `;
-## qanchors 与 semantic_attributes 设计与索引逻辑说明
+## qanchors 与 enriched_attributes 设计与索引逻辑说明
 本文档详细说明：
 - **锚文本字段 `qanchors.{lang}` 的作用与来源**
-- **语义属性字段 `semantic_attributes` 的结构、用途与写入流程**
+- **语义属性字段 `enriched_attributes` 的结构、用途与写入流程**
 - **多语言支持策略（zh / en / de / ru / fr）**
 - **索引阶段与 LLM 调用的集成方式**
@@ -43,13 +43,13 @@
   - 主搜索：作为额外的全文字段参与 BM25 召回与打分（可在 `search/query_config.py` 中给一定权重）；
   - Suggestion：`suggestion/builder.py` 会从 `qanchors.{lang}` 中拆分词条作为候选（`source="qanchor"`，权重大于 `title`）。
-#### 1.2 `semantic_attributes`：面向过滤/分面的通用语义属性
+#### 1.2 `enriched_attributes`：面向过滤/分面的通用语义属性
 - **Mapping 位置**：`mappings/search_products.json`，追加的 nested 字段。
 - **结构**：
 ```1392:1410:/home/tw/saas-search/mappings/search_products.json
-"semantic_attributes": {
+"enriched_attributes": {
   "type": "nested",
   "properties": {
     "lang":  { "type": "keyword" },  // 语言：zh / en / de / ru / fr
@@ -199,7 +199,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
         self._fill_llm_attributes(doc, spu_row)
 ```
-也就是说，**每个 SPU 文档默认会尝试补充 qanchors 与 semantic_attributes**。
+也就是说，**每个 SPU 文档默认会尝试补充 qanchors 与 enriched_attributes**。
 #### 3.2 语言选择策略
@@ -237,7 +237,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
         if not spu_id or not title:
             return
-        semantic_list = doc.get("semantic_attributes") or []
+        semantic_list = doc.get("enriched_attributes") or []
         qanchors_obj = doc.get("qanchors") or {}
         dim_keys = [
@@ -291,7 +291,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
         if qanchors_obj:
             doc["qanchors"] = qanchors_obj
         if semantic_list:
-            doc["semantic_attributes"] = semantic_list
+            doc["enriched_attributes"] = semantic_list
 ```
 要点：
@@ -307,7 +307,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
   - 没有 `title`；
   - 或者 `tenant_config.index_languages` 与 `SUPPORTED_LANGS` 没有交集；
   - 或 `DASHSCOPE_API_KEY` 未配置 / LLM 请求报错；
-- 则 `_fill_llm_attributes` 会在日志中输出 `warning`，**不会抛异常**，索引流程继续，只是该 SPU 在这一轮不会得到 `qanchors` / `semantic_attributes`。
+- 则 `_fill_llm_attributes` 会在日志中输出 `warning`，**不会抛异常**，索引流程继续，只是该 SPU 在这一轮不会得到 `qanchors` / `enriched_attributes`。
 这保证了整个索引服务在 LLM 不可用时表现为一个普通的“传统索引”，而不会中断。
@@ -344,13 +344,13 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
 ```json
 {
   "nested": {
-    "path": "semantic_attributes",
+    "path": "enriched_attributes",
     "query": {
       "bool": {
         "must": [
-          { "term": { "semantic_attributes.lang": "zh" } },
-          { "term": { "semantic_attributes.name": "usage_scene" } },
-          { "term": { "semantic_attributes.value": "通勤" } }
+          { "term": { "enriched_attributes.lang": "zh" } },
+          { "term": { "enriched_attributes.name": "usage_scene" } },
+          { "term": { "enriched_attributes.value": "通勤" } }
         ]
       }
     }
@@ -400,7 +400,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
 1. **功能定位**：
    - `qanchors.{lang}`：更好地贴近用户真实查询词，用于召回与 suggestion；
-   - `semantic_attributes`：以结构化形式承载 LLM 抽取的语义维度，用于 filter / facet。
+   - `enriched_attributes`：以结构化形式承载 LLM 抽取的语义维度，用于 filter / facet。
 2. **多语言对齐**：
    - 完全复用租户级 `index_languages` 配置；
    - 对每种语言单独生成锚文本与语义属性，不互相混用。
@@ -409,7 +409,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = &quot;zh&quot;) -&gt; st
    - 当 LLM/配置异常时，只是“缺少增强特征”，不影响基础搜索能力。
 4. **未来扩展**：
    - 可以在 `dim_keys` 中新增维度名（如 `style`, `benefit` 等），只要在 prompt 与解析逻辑中增加对应列即可；
-   - 可以为 `semantic_attributes` 增加额外字段（如 `confidence`、`source`），用于更精细的控制（当前 mapping 为简单版）。
+   - 可以为 `enriched_attributes` 增加额外字段（如 `confidence`、`source`），用于更精细的控制（当前 mapping 为简单版）。
-如需在查询层面增加基于 `semantic_attributes` 的统一 DSL（类似 `specifications` 的过滤/分面规则），推荐在 `docs/搜索API对接指南.md` 中新增一节，并在 `search/es_query_builder.py` 里封装构造逻辑，避免前端直接拼 nested 查询。 
+如需在查询层面增加基于 `enriched_attributes` 的统一 DSL（类似 `specifications` 的过滤/分面规则），推荐在 `docs/搜索API对接指南.md` 中新增一节，并在 `search/es_query_builder.py` 里封装构造逻辑，避免前端直接拼 nested 查询。 
@@ -11,9 +11,8 @@ SPU文档转换器 - 公共转换逻辑。
 import pandas as pd
 import numpy as np
 import logging
-import re
 from typing import Dict, Any, Optional, List
-from indexer.product_enrich import analyze_products, split_multi_value_field
+from indexer.product_enrich import build_index_content_fields
 logger = logging.getLogger(__name__)
@@ -75,6 +74,39 @@ class SPUDocumentTransformer:
             )
         return translations
+    def _build_core_language_text_object(
+        self,
+        text: Optional[str],
+        source_lang: str,
+        scene: str = "general",
+    ) -> Dict[str, str]:
+        """
+        构建与 mapping 中 core_language_text(_with_keyword) 对齐的对象。
+        当前核心语言固定为 zh/en。
+        """
+        if not text or not str(text).strip():
+            return {}
+
+        source_text = str(text).strip()
+        obj: Dict[str, str] = {}
+
+        if source_lang in CORE_INDEX_LANGUAGES:
+            obj[source_lang] = source_text
+
+        if self.translator:
+            translations = self._translate_index_languages(
+                text=source_text,
+                source_lang=source_lang,
+                index_languages=CORE_INDEX_LANGUAGES,
+                scene=scene,
+            )
+            for lang in CORE_INDEX_LANGUAGES:
+                val = translations.get(lang)
+                if val and str(val).strip():
+                    obj[lang] = str(val).strip()
+
+        return obj
+
     def transform_spu_to_doc(
         self,
         tenant_id: str,
@@ -118,10 +150,16 @@ class SPUDocumentTransformer:
         if self.enable_title_embedding and self.encoder:
             self._fill_title_embedding(doc)
-        # Tags
+        # Tags：统一转成与 mapping 一致的 core-language object
         if pd.notna(spu_row.get('tags')):
             tags_str = str(spu_row['tags'])
-            doc['tags'] = split_multi_value_field(tags_str)
+            tags_obj = self._build_core_language_text_object(
+                tags_str,
+                source_lang=primary_lang,
+                scene="general",
+            )
+            if tags_obj:
+                doc['tags'] = tags_obj
         # Category相关字段
         self._fill_category_fields(doc, spu_row)
@@ -202,7 +240,8 @@ class SPUDocumentTransformer:
         """
         批量调用 LLM，为一批 doc 填充：
         - qanchors.{lang}
-        - semantic_attributes (lang/name/value)
+        - tags.{lang}
+        - enriched_attributes[].value.{lang}
         设计目标：
         - 尽可能攒批调用 LLM；
@@ -211,16 +250,8 @@ class SPUDocumentTransformer:
         if not docs or not spu_rows or len(docs) != len(spu_rows):
             return
-        try:
-            index_langs = self.tenant_config.get("index_languages") or ["en", "zh"]
-        except Exception:
-            index_langs = ["en", "zh"]
-        # 不再限制为固定 SUPPORTED_LANGS，直接按照租户配置的 index_languages 调用
-        llm_langs = list(dict.fromkeys(index_langs))  # 去重并保持顺序
-
-        # 只对有 title 的 SPU 参与 LLM；其余跳过
         id_to_idx: Dict[str, int] = {}
-        products: List[Dict[str, str]] = []
+        items: List[Dict[str, str]] = []
         for i, row in enumerate(spu_rows):
             raw_id = row.get("id")
             spu_id = "" if raw_id is None else str(raw_id).strip()
@@ -228,69 +259,45 @@ class SPUDocumentTransformer:
             if not spu_id or not title:
                 continue
             id_to_idx[spu_id] = i
-            products.append({"id": spu_id, "title": title})
-        if not products:
+            items.append(
+                {
+                    "id": spu_id,
+                    "title": title,
+                    "brief": str(row.get("brief") or "").strip(),
+                    "description": str(row.get("description") or "").strip(),
+                    "image_url": str(row.get("image_src") or "").strip(),
+                }
+            )
+        if not items:
             return
         tenant_id = str(docs[0].get("tenant_id") or "").strip() or None
+        try:
+            results = build_index_content_fields(items=items, tenant_id=tenant_id)
+        except Exception as e:
+            logger.warning("LLM batch attribute fill failed: %s", e)
+            return
-        dim_keys = [
-            "tags",
-            "target_audience",
-            "usage_scene",
-            "season",
-            "key_attributes",
-            "material",
-            "features",
-        ]
-
-        for lang in llm_langs:
-            try:
-                rows = analyze_products(
-                    products=products,
-                    target_lang=lang,
-                    batch_size=20,
-                    tenant_id=tenant_id,
-                )
-            except Exception as e:
-                logger.warning("LLM batch attribute fill failed (lang=%s): %s", lang, e)
+        for result in results:
+            spu_id = str(result.get("id") or "").strip()
+            if not spu_id:
                 continue
+            idx = id_to_idx.get(spu_id)
+            if idx is None:
+                continue
+            self._apply_content_enrichment(docs[idx], result)
-            for row in rows or []:
-                spu_id = str(row.get("id") or "").strip()
-                if not spu_id:
-                    continue
-                idx = id_to_idx.get(spu_id)
-                if idx is None:
-                    continue
-                self._apply_llm_row(docs[idx], row=row, lang=lang, dim_keys=dim_keys)
-
-    def _apply_llm_row(self, doc: Dict[str, Any], row: Dict[str, Any], lang: str, dim_keys: List[str]) -> None:
-        """将单条 LLM 输出 row 按既定结构写入 doc（不抛异常）。"""
+    def _apply_content_enrichment(self, doc: Dict[str, Any], enrichment: Dict[str, Any]) -> None:
+        """将 product_enrich 产出的 ES-ready 内容字段写入 doc。"""
         try:
-            if row.get("error"):
-                return
-
-            semantic_list = doc.get("semantic_attributes") or []
-            qanchors_obj = doc.get("qanchors") or {}
-
-            anchor_text = str(row.get("anchor_text") or "").strip()
-            if anchor_text:
-                qanchors_obj[lang] = anchor_text
-
-            for name in dim_keys:
-                raw = row.get(name)
-                if not raw:
-                    continue
-                for value in split_multi_value_field(str(raw)):
-                    semantic_list.append({"lang": lang, "name": name, "value": value})
-
-            if qanchors_obj:
-                doc["qanchors"] = qanchors_obj
-            if semantic_list:
-                doc["semantic_attributes"] = semantic_list
+            if enrichment.get("qanchors"):
+                doc["qanchors"] = enrichment["qanchors"]
+            if enrichment.get("tags"):
+                doc["tags"] = enrichment["tags"]
+            if enrichment.get("enriched_attributes"):
+                doc["enriched_attributes"] = enrichment["enriched_attributes"]
         except Exception as e:
-            logger.warning("Failed to apply LLM row to doc (spu_id=%s, lang=%s): %s", doc.get("spu_id"), lang, e)
+            logger.warning("Failed to apply enrichment to doc (spu_id=%s): %s", doc.get("spu_id"), e)
     def _fill_text_fields(
         self,
@@ -544,6 +551,23 @@ class SPUDocumentTransformer:
                 if pd.notna(position) and pd.notna(name):
                     option_name_map[int(position)] = str(name)
+        primary_lang = self.tenant_config.get('primary_language', 'en')
+
+        def _build_specification(name: str, raw_value: Any, sku_id: str) -> Optional[Dict[str, Any]]:
+            value = "" if raw_value is None else str(raw_value).strip()
+            if not value:
+                return None
+            return {
+                'sku_id': sku_id,
+                'name': name,
+                'value_keyword': value,
+                'value_text': self._build_core_language_text_object(
+                    value,
+                    source_lang=primary_lang,
+                    scene="general",
+                ) or normalize_core_text_field_value(value, primary_lang),
+            }
+
         for _, sku_row in skus.iterrows():
             sku_data = self._transform_sku_row(sku_row, option_name_map)
             if sku_data:
@@ -584,23 +608,17 @@ class SPUDocumentTransformer:
                 # 构建specifications（从SKU的option值和option表的name）
                 sku_id = str(sku_row['id'])
                 if pd.notna(sku_row.get('option1')) and 1 in option_name_map:
-                    specifications.append({
-                        'sku_id': sku_id,
-                        'name': option_name_map[1],
-                        'value': str(sku_row['option1'])
-                    })
+                    spec = _build_specification(option_name_map[1], sku_row['option1'], sku_id)
+                    if spec:
+                        specifications.append(spec)
                 if pd.notna(sku_row.get('option2')) and 2 in option_name_map:
-                    specifications.append({
-                        'sku_id': sku_id,
-                        'name': option_name_map[2],
-                        'value': str(sku_row['option2'])
-                    })
+                    spec = _build_specification(option_name_map[2], sku_row['option2'], sku_id)
+                    if spec:
+                        specifications.append(spec)
                 if pd.notna(sku_row.get('option3')) and 3 in option_name_map:
-                    specifications.append({
-                        'sku_id': sku_id,
-                        'name': option_name_map[3],
-                        'value': str(sku_row['option3'])
-                    })
+                    spec = _build_specification(option_name_map[3], sku_row['option3'], sku_id)
+                    if spec:
+                        specifications.append(spec)
         return skus_list, prices, compare_prices, sku_prices, sku_weights, sku_weight_units, total_inventory, specifications
@@ -636,82 +654,36 @@ class SPUDocumentTransformer:
     def _fill_llm_attributes(self, doc: Dict[str, Any], spu_row: pd.Series) -> None:
         """
-        调用 indexer.product_enrich.analyze_products，为当前 SPU 填充：
+        调用 indexer.product_enrich 的高层内容理解入口，为当前 SPU 填充：
         - qanchors.{lang}
-        - semantic_attributes (lang/name/value)
+        - tags.{lang}
+        - enriched_attributes[].value.{lang}
         """
-        try:
-            index_langs = self.tenant_config.get("index_languages") or ["en", "zh"]
-        except Exception:
-            index_langs = ["en", "zh"]
-
-        # 不再限制为固定 SUPPORTED_LANGS，直接按照租户配置的 index_languages 调用
-        llm_langs = list(dict.fromkeys(index_langs))  # 去重并保持顺序
-
         spu_id = str(spu_row.get("id") or "").strip()
         title = str(spu_row.get("title") or "").strip()
         if not spu_id or not title:
             return
-        semantic_list = doc.get("semantic_attributes") or []
-        qanchors_obj = doc.get("qanchors") or {}
-
-        dim_keys = [
-            "tags",
-            "target_audience",
-            "usage_scene",
-            "season",
-            "key_attributes",
-            "material",
-            "features",
-        ]
-
         tenant_id = doc.get("tenant_id")
+        try:
+            results = build_index_content_fields(
+                items=[
+                    {
+                        "id": spu_id,
+                        "title": title,
+                        "brief": str(spu_row.get("brief") or "").strip(),
+                        "description": str(spu_row.get("description") or "").strip(),
+                        "image_url": str(spu_row.get("image_src") or "").strip(),
+                    }
+                ],
+                tenant_id=str(tenant_id),
+            )
+        except Exception as e:
+            logger.warning("LLM attribute fill failed for SPU %s: %s", spu_id, e)
+            return
-        for lang in llm_langs:
-            try:
-                rows = analyze_products(
-                    products=[{"id": spu_id, "title": title}],
-                    target_lang=lang,
-                    batch_size=1,
-                    tenant_id=str(tenant_id),
-                )
-            except Exception as e:
-                logger.warning(
-                    "LLM attribute fill failed for SPU %s, lang=%s: %s",
-                    spu_id,
-                    lang,
-                    e,
-                )
-                continue
-
-            if not rows:
-                continue
-            row = rows[0] or {}
-
-            # qanchors.{lang}
-            anchor_text = str(row.get("anchor_text") or "").strip()
-            if anchor_text:
-                qanchors_obj[lang] = anchor_text
-
-            # 语义属性：按各维度拆分为短语
-            for name in dim_keys:
-                raw = row.get(name)
-                if not raw:
-                    continue
-                for value in split_multi_value_field(str(raw)):
-                    semantic_list.append(
-                        {
-                            "lang": lang,
-                            "name": name,
-                            "value": value,
-                        }
-                    )
-
-        if qanchors_obj:
-            doc["qanchors"] = qanchors_obj
-        if semantic_list:
-            doc["semantic_attributes"] = semantic_list
+        if results:
+            self._apply_content_enrichment(doc, results[0])
     def _transform_sku_row(self, sku_row: pd.Series, option_name_map: Dict[int, str] = None) -> Optional[Dict[str, Any]]:
         """
@@ -146,6 +146,16 @@ if _missing_prompt_langs:
 # 多值字段分隔：英文逗号、中文逗号、顿号，及历史约定的 ; | / 与空白
 _MULTI_VALUE_FIELD_SPLIT_RE = re.compile(r"[，、,;|/\n\t]+")
+_CORE_INDEX_LANGUAGES = ("zh", "en")
+_ENRICHED_ATTRIBUTE_DIMENSIONS = (
+    "enriched_tags",
+    "target_audience",
+    "usage_scene",
+    "season",
+    "key_attributes",
+    "material",
+    "features",
+)
 def split_multi_value_field(text: Optional[str]) -> List[str]:
@@ -158,6 +168,124 @@ def split_multi_value_field(text: Optional[str]) -&gt; List[str]:
     return [p.strip() for p in _MULTI_VALUE_FIELD_SPLIT_RE.split(s) if p.strip()]
+def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value: Any) -> None:
+    parts = split_multi_value_field(raw_value)
+    if not parts:
+        return
+    existing = target.get(lang) or []
+    merged = list(dict.fromkeys([str(x).strip() for x in existing if str(x).strip()] + parts))
+    if merged:
+        target[lang] = merged
+
+
+def _append_enriched_attribute(
+    target: List[Dict[str, Any]],
+    name: str,
+    lang: str,
+    raw_value: Any,
+) -> None:
+    for value in split_multi_value_field(raw_value):
+        if any(
+            item.get("name") == name
+            and isinstance(item.get("value"), dict)
+            and item["value"].get(lang) == value
+            for item in target
+        ):
+            continue
+        target.append({"name": name, "value": {lang: value}})
+
+
+def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: str) -> None:
+    if not row or row.get("error"):
+        return
+
+    anchor_text = str(row.get("anchor_text") or "").strip()
+    if anchor_text:
+        _append_lang_phrase_map(result["qanchors"], lang=lang, raw_value=anchor_text)
+
+    for name in _ENRICHED_ATTRIBUTE_DIMENSIONS:
+        raw = row.get(name)
+        if not raw:
+            continue
+        _append_enriched_attribute(result["enriched_attributes"], name=name, lang=lang, raw_value=raw)
+        if name == "enriched_tags":
+            _append_lang_phrase_map(result["enriched_tags"], lang=lang, raw_value=raw)
+
+
+def _normalize_index_content_item(item: Dict[str, Any]) -> Dict[str, str]:
+    item_id = str(item.get("id") or item.get("spu_id") or "").strip()
+    return {
+        "id": item_id,
+        "title": str(item.get("title") or "").strip(),
+        "brief": str(item.get("brief") or "").strip(),
+        "description": str(item.get("description") or "").strip(),
+        "image_url": str(item.get("image_url") or "").strip(),
+    }
+
+
+def build_index_content_fields(
+    items: List[Dict[str, Any]],
+    tenant_id: Optional[str] = None,
+) -> List[Dict[str, Any]]:
+    """
+    高层入口：生成与 ES mapping 对齐的内容理解字段。
+
+    输入项需包含：
+    - `id` 或 `spu_id`
+    - `title`
+    - 可选 `brief` / `description` / `image_url`
+
+    返回项结构：
+    - `id`
+    - `qanchors`
+    - `enriched_tags`
+    - `enriched_attributes`
+    - 可选 `error`
+
+    其中：
+    - `qanchors.{lang}` 为短语数组
+    - `enriched_tags.{lang}` 为标签数组
+    """
+    normalized_items = [_normalize_index_content_item(item) for item in items]
+    if not normalized_items:
+        return []
+
+    results_by_id: Dict[str, Dict[str, Any]] = {
+        item["id"]: {
+            "id": item["id"],
+            "qanchors": {},
+            "enriched_tags": {},
+            "enriched_attributes": [],
+        }
+        for item in normalized_items
+    }
+
+    for lang in _CORE_INDEX_LANGUAGES:
+        try:
+            rows = analyze_products(
+                products=normalized_items,
+                target_lang=lang,
+                batch_size=BATCH_SIZE,
+                tenant_id=tenant_id,
+            )
+        except Exception as e:
+            logger.warning("build_index_content_fields failed for lang=%s: %s", lang, e)
+            for item in normalized_items:
+                results_by_id[item["id"]].setdefault("error", str(e))
+            continue
+
+        for row in rows or []:
+            item_id = str(row.get("id") or "").strip()
+            if not item_id or item_id not in results_by_id:
+                continue
+            if row.get("error"):
+                results_by_id[item_id].setdefault("error", row["error"])
+                continue
+            _apply_index_content_row(results_by_id[item_id], row=row, lang=lang)
+
+    return [results_by_id[item["id"]] for item in normalized_items]
+
+
 def _normalize_space(text: str) -> str:
     return re.sub(r"\s+", " ", (text or "").strip())
@@ -526,7 +654,7 @@ def parse_markdown_table(markdown_content: str) -&gt; List[Dict[str, str]]:
                     "seq_no": parts[0],
                     "title": parts[1],  # 商品标题（按目标语言）
                     "category_path": parts[2] if len(parts) > 2 else "",  # 品类路径
-                    "tags": parts[3] if len(parts) > 3 else "",  # 细分标签
+                    "enriched_tags": parts[3] if len(parts) > 3 else "",  # 细分标签
                     "target_audience": parts[4] if len(parts) > 4 else "",  # 适用人群
                     "usage_scene": parts[5] if len(parts) > 5 else "",  # 使用场景
                     "season": parts[6] if len(parts) > 6 else "",  # 适用季节
@@ -603,7 +731,7 @@ def process_batch(
                 "title_input": item.get("title", ""),
                 "title": "",
                 "category_path": "",
-                "tags": "",
+                "enriched_tags": "",
                 "target_audience": "",
                 "usage_scene": "",
                 "season": "",
@@ -643,7 +771,7 @@ def process_batch(
                     "title_input": batch_data[i]["title"],  # 原始输入标题
                     "title": parsed_item.get("title", ""),  # 模型生成的标题
                     "category_path": parsed_item.get("category_path", ""),  # 品类路径
-                    "tags": parsed_item.get("tags", ""),  # 细分标签
+                    "enriched_tags": parsed_item.get("enriched_tags", ""),  # 细分标签
                     "target_audience": parsed_item.get("target_audience", ""),  # 适用人群
                     "usage_scene": parsed_item.get("usage_scene", ""),  # 使用场景
                     "season": parsed_item.get("season", ""),  # 适用季节
@@ -686,7 +814,7 @@ def process_batch(
                 "title_input": item["title"],
                 "title": "",
                 "category_path": "",
-                "tags": "",
+                "enriched_tags": "",
                 "target_audience": "",
                 "usage_scene": "",
                 "season": "",
@@ -2,32 +2,285 @@
 ## 概述
-所有租户共享同一个ES mapping结构，直接使用手写的JSON文件，无需通过config.yaml生成。
+所有租户共享同一个 Elasticsearch mapping 结构。
-## Mapping文件
+当前目录采用“声明式 Python 规格 + 字段模板 + 最终 JSON 产物”的方式维护 `search_products` 的索引定义：
-- `search_products.json`: 完整的ES索引配置，包括settings和mappings
+- `generate_search_products_mapping.py`: 唯一的生成源，包含字段模板、语言列表、分析器配置和递归生成逻辑
+- `search_products.json`: 由脚本生成的完整 ES 索引配置，包括 `settings` 和 `mappings`
+- `search_suggestions.json`: 搜索建议索引配置
-## 使用方式
+默认应修改生成脚本中的规格定义，而不是手工编辑 `search_products.json`。
-### 创建索引
+## 字段抽象
+
+脚本从业务语义上抽象出 4 类文本模板：
+
+- `all_language_text`: 全语言字段，不带 `keyword`
+- `all_language_text_with_keyword`: 全语言字段，所有受支持语言都带 `keyword`
+- `core_language_text`: 核心索引语言字段，不带 `keyword`
+- `core_language_text_with_keyword`: 核心索引语言字段，核心语言都带 `keyword`
+
+这里的“核心索引语言”不是因为系统只支持两种语言，而是因为所有店铺、所有商品都必须至少产出这两种语言的索引内容。目前核心索引语言固定为：
+
+- `zh`
+- `en`
+
+“全语言”表示 mapping 为原始商品语言预留了更多语言槽位。商品实际灌入时，不要求每个字段把所有语言都填满，只要求：
+
+- 核心索引语言字段必须填充 `zh` 和 `en`
+- 全语言字段必须填充 `zh` 和 `en`
+- 如果商品原始语言属于受支持语言，还应额外填充对应的原始语言字段，例如 `ru`
+
+当前字段大致分为几类：
+
+- 全语言字段：`title`、`keywords`、`brief`、`description`、`vendor`、`category_path`、`category_name_text`
+- 核心索引语言字段：`qanchors`、`enriched_tags`、`option1_values`、`option2_values`、`option3_values`、`enriched_attributes.value`
+- 复合嵌套字段：`image_embedding`、`specifications`、`enriched_attributes`、`skus`
+- 其他标量字段：`tenant_id`、`spu_id`、价格、库存、类目等
+
+生成规则里的几个基础约束：
+
+- 中文字段使用 `index_ik`，并额外设置 `search_analyzer: query_ik`
+- 非中文语言使用各自的 Elasticsearch 内置 analyzer
+- 带 `with_keyword` 的模板会为对应语言增加 `.keyword`
+- `settings.analysis`、`normalizer`、`similarity` 也属于生成结果的一部分，不能只维护 `mappings.properties`
+
+## 索引灌入指引
+
+### 基本原则
+
+1. 所有商品都必须生成核心索引语言版本，也就是 `zh` 和 `en`。
+2. 全语言字段除了必须有 `zh` 和 `en`，还应尽量保留商品原始语言版本。
+3. 如果商品原始语言本身就是 `zh` 或 `en`，则原文直接写入对应字段，另一种核心语言通过翻译补齐。
+4. 如果商品原始语言是 `ru` 这类受支持的非核心语言，则应同时写入原始语言字段和 `zh/en` 翻译结果。
+5. 如果某个值为空，不应写入伪造内容；应在上游清洗后决定是否跳过该字段。
+
+### 核心索引语言字段
+
+这类字段的目标是保证所有商品都至少能被中文和英文检索到。无论商品原始语言是什么，都应通过翻译或标准化得到 `zh` 和 `en` 两份结果。
+
+典型字段：
+
+- `qanchors`
+- `enriched_tags`
+- `option1_values`
+- `option2_values`
+- `option3_values`
+- `enriched_attributes.value`
+- `specifications.value_text`
+
+以 `category_path` 和 `option*_values` 为例，核心语言灌入结果应至少包含：
+
+- `category_path.zh`
+- `category_path.en`
+- `option1_values.zh`
+- `option1_values.en`
+- `option2_values.zh`
+- `option2_values.en`
+- `option3_values.zh`
+- `option3_values.en`
+
+示例：原始商品语言为俄语，原始 `option1_values` 为 `красный, синий`
+
+```json
+{
+  "option1_values": {
+    "zh": "红色, 蓝色",
+    "en": "red, blue"
+  }
+}
+```
+
+示例：原始商品语言为俄语，类目路径为 `Одежда > Женская одежда > Куртки`
+
+```json
+{
+  "category_path": {
+    "zh": "服饰 > 女装 > 夹克",
+    "en": "Apparel > Women's Clothing > Jackets",
+    "ru": "Одежда > Женская одежда > Куртки"
+  }
+}
+```
+
+注意：`category_path` 在 mapping 上属于全语言字段，但在灌入规范上依然要求 `zh/en` 必填。
+
+### 全语言字段
+
+这类字段既要保证 `zh/en` 两个核心索引语言可用，也要尽量保留商品原始语言，以便原语种召回和更自然的检索。
+
+典型字段：
+
+- `title`
+- `keywords`
+- `brief`
+- `description`
+- `vendor`
+- `category_path`
+- `category_name_text`
+
+灌入规则：
+
+1. 找到商品原始语言，例如 `ru`
+2. 原文写入对应语言字段，例如 `title.ru`
+3. 将原文翻译成 `zh` 和 `en`
+4. 分别写入 `title.zh` 和 `title.en`
+
+示例：原始商品语言为俄语，标题为 `Женская зимняя куртка`
+
+```json
+{
+  "title": {
+    "zh": "女士冬季夹克",
+    "en": "Women's winter jacket",
+    "ru": "Женская зимняя куртка"
+  }
+}
+```
+
+示例：原始商品语言为俄语，类目名称为 `Женские куртки`
+
+```json
+{
+  "category_name_text": {
+    "zh": "女式夹克",
+    "en": "Women's jackets",
+    "ru": "Женские куртки"
+  }
+}
+```
+
+示例：规格值 `specifications.value_text` / `specifications.value_keyword`
+
+```json
+{
+  "specifications": [
+    {
+      "sku_id": "sku-red-s",
+      "name": "color",
+      "value_keyword": "красный",
+      "value_text": {
+        "zh": "红色",
+        "en": "red"
+      }
+    }
+  ]
+}
+```
+
+其中：
+
+- `specifications.value_keyword` 保存原始规格值，用于精确过滤 / 分面
+- `specifications.value_text` 保存 `zh/en` 两个核心索引语言版本，用于检索召回
+
+### 原始语言为中文或英文时
+
+如果原始语言就是核心索引语言之一，不需要额外再写第三份语言字段。
+
+示例：原始语言为中文
+
+```json
+{
+  "title": {
+    "zh": "女士冬季夹克",
+    "en": "Women's winter jacket"
+  },
+  "option1_values": {
+    "zh": "红色, 蓝色",
+    "en": "red, blue"
+  }
+}
+```
+
+示例：原始语言为英文
+
+```json
+{
+  "title": {
+    "zh": "女士冬季夹克",
+    "en": "Women's winter jacket"
+  },
+  "vendor": {
+    "zh": "北境服饰",
+    "en": "Northern Apparel"
+  }
+}
+```
+
+### 不同字段的灌入方式
+
+可以按下面的方式理解和实现：
+
+- 标量字段：直接写固定值，例如 `tenant_id`、`spu_id`、`min_price`
+- 核心索引语言字段：只生成 `zh/en`
+- 全语言字段：生成 `zh/en`，再按原始语言补一个对应语种字段
+- 嵌套字段：对每个元素内部重复应用同样规则，例如 `specifications[].value_text`、`enriched_attributes[].value`
+
+### 推荐灌入流程
+
+1. 识别商品原始语言
+2. 提取原文标题、描述、类目、规格、属性、选项值等字段
+3. 生成 `zh` 和 `en` 两份核心索引语言内容
+4. 对全语言字段，如果原始语言受支持，则额外写入原始语言字段
+5. 组装最终 ES 文档并写入索引
+
+## 生成 Mapping
+
+在仓库根目录执行：
+
+```bash
+source activate.sh
+python mappings/generate_search_products_mapping.py > mappings/search_products.json
+```
+
+如果只想查看输出而不覆盖文件：
+
+```bash
+source activate.sh
+python mappings/generate_search_products_mapping.py
+```
+
+如果想先生成到临时文件：
+
+```bash
+source activate.sh
+python mappings/generate_search_products_mapping.py > mappings/search_products.generated.json
+```
+
+## 校验 Mapping
+
+确认当前 `search_products.json` 是否与生成规则完全一致：
+
+```bash
+source activate.sh
+python mappings/generate_search_products_mapping.py --check mappings/search_products.json
+```
+
+## 创建索引
 ```python
 from indexer.mapping_generator import load_mapping, create_index_if_not_exists
 from utils.es_client import ESClient
 es_client = ESClient(hosts=["http://localhost:9200"])
-mapping = load_mapping()  # 从mappings/search_products.json加载
+mapping = load_mapping()
 create_index_if_not_exists(es_client, "search_products", mapping)
 ```
-### 修改Mapping
+## 修改 Mapping
+
+推荐流程：
+
+1. 修改 `mappings/generate_search_products_mapping.py`
+2. 重新生成 `mappings/search_products.json`
+3. 用 `--check` 或 diff 确认变更符合预期
+4. 重新创建索引并导入数据
-直接编辑 `mappings/search_products.json` 文件，然后重新创建索引。
+注意：Elasticsearch 不支持直接修改已有字段的 mapping 类型，只能新增字段。如需修改字段类型，需要：
-注意：ES不支持修改已有字段的mapping类型，只能添加新字段。如需修改字段类型，需要：
 1. 删除旧索引
-2. 使用新mapping创建索引
+2. 使用新 mapping 创建索引
 3. 重新导入数据
 ## 字段说明
@@ -0,0 +1,355 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any
+
+ALL_LANGUAGE_CODES = [
+    "zh",
+    "en",
+    "ar",
+    "hy",
+    "eu",
+    "pt_br",
+    "bg",
+    "ca",
+    "cjk",
+    "cs",
+    "da",
+    "nl",
+    "fi",
+    "fr",
+    "gl",
+    "de",
+    "el",
+    "hi",
+    "hu",
+    "id",
+    "it",
+    "no",
+    "fa",
+    "pt",
+    "ro",
+    "ru",
+    "es",
+    "sv",
+    "tr",
+    "th",
+]
+
+CORE_INDEX_LANGUAGES = ["zh", "en"]
+
+LANGUAGE_GROUPS = {
+    "all": ALL_LANGUAGE_CODES,
+    "core": CORE_INDEX_LANGUAGES,
+}
+
+ANALYZERS = {
+    "zh": "index_ik",
+    "en": "english",
+    "ar": "arabic",
+    "hy": "armenian",
+    "eu": "basque",
+    "pt_br": "brazilian",
+    "bg": "bulgarian",
+    "ca": "catalan",
+    "cjk": "cjk",
+    "cs": "czech",
+    "da": "danish",
+    "nl": "dutch",
+    "fi": "finnish",
+    "fr": "french",
+    "gl": "galician",
+    "de": "german",
+    "el": "greek",
+    "hi": "hindi",
+    "hu": "hungarian",
+    "id": "indonesian",
+    "it": "italian",
+    "no": "norwegian",
+    "fa": "persian",
+    "pt": "portuguese",
+    "ro": "romanian",
+    "ru": "russian",
+    "es": "spanish",
+    "sv": "swedish",
+    "tr": "turkish",
+    "th": "thai",
+}
+
+SETTINGS = {
+    "number_of_shards": 1,
+    "number_of_replicas": 0,
+    "refresh_interval": "30s",
+    "analysis": {
+        "analyzer": {
+            "index_ik": {
+                "type": "custom",
+                "tokenizer": "ik_max_word",
+                "filter": ["lowercase", "asciifolding"],
+            },
+            "query_ik": {
+                "type": "custom",
+                "tokenizer": "ik_smart",
+                "filter": ["lowercase", "asciifolding"],
+            },
+        },
+        "normalizer": {
+            "lowercase": {
+                "type": "custom",
+                "filter": ["lowercase"],
+            }
+        },
+    },
+    "similarity": {
+        "default": {
+            "type": "BM25",
+            "b": 0.0,
+            "k1": 0.0,
+        }
+    },
+}
+
+TEXT_FIELD_TEMPLATES = {
+    "all_language_text": {
+        "language_group": "all",
+        "with_keyword": False,
+    },
+    "all_language_text_with_keyword": {
+        "language_group": "all",
+        "with_keyword": True,
+    },
+    "core_language_text": {
+        "language_group": "core",
+        "with_keyword": False,
+    },
+    "core_language_text_with_keyword": {
+        "language_group": "core",
+        "with_keyword": True,
+    },
+}
+
+
+def scalar_field(name: str, field_type: str, **extra: Any) -> dict[str, Any]:
+    spec = {
+        "name": name,
+        "kind": "scalar",
+        "type": field_type,
+    }
+    if extra:
+        spec["extra"] = extra
+    return spec
+
+
+def text_field(name: str, template: str) -> dict[str, Any]:
+    return {
+        "name": name,
+        "kind": "text",
+        "template": template,
+    }
+
+
+def nested_field(name: str, *fields: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "name": name,
+        "kind": "nested",
+        "fields": list(fields),
+    }
+
+TEXT_EMBEDDING_SIZE = 1024
+IMAGE_EMBEDDING_SIZE = 768
+
+FIELD_SPECS = [
+    scalar_field("spu_id", "keyword"),
+    scalar_field("create_time", "date"),
+    scalar_field("update_time", "date"),
+    text_field("title", "all_language_text"),
+    text_field("keywords", "all_language_text_with_keyword"),
+    text_field("brief", "all_language_text"),
+    text_field("description", "all_language_text"),
+    text_field("vendor", "all_language_text_with_keyword"),
+    scalar_field("image_url", "keyword", index=False),
+    scalar_field(
+        "title_embedding",
+        "dense_vector",
+        dims=TEXT_EMBEDDING_SIZE,
+        index=True,
+        similarity="dot_product",
+        element_type="bfloat16",
+    ),
+    nested_field(
+        "image_embedding",
+        scalar_field(
+            "vector",
+            "dense_vector",
+            dims=IMAGE_EMBEDDING_SIZE,
+            index=True,
+            similarity="dot_product",
+            element_type="bfloat16",
+        ),
+        scalar_field("url", "text"),
+    ),
+    text_field("category_path", "all_language_text_with_keyword"),
+    text_field("category_name_text", "all_language_text_with_keyword"),
+    text_field("tags", "all_language_text_with_keyword"),
+    scalar_field("category_id", "keyword"),
+    scalar_field("category_name", "keyword"),
+    scalar_field("category_level", "integer"),
+    scalar_field("category1_name", "keyword"),
+    scalar_field("category2_name", "keyword"),
+    scalar_field("category3_name", "keyword"),
+    nested_field(
+        "specifications",
+        scalar_field("sku_id", "keyword"),
+        scalar_field("name", "keyword"),
+        scalar_field("value_keyword", "keyword"),
+        text_field("value_text", "core_language_text_with_keyword"),
+    ),
+    text_field("qanchors", "core_language_text"),
+    text_field("enriched_tags", "core_language_text_with_keyword"),
+    nested_field(
+        "enriched_attributes",
+        scalar_field("name", "keyword"),
+        text_field("value", "core_language_text_with_keyword"),
+    ),
+    scalar_field("option1_name", "keyword"),
+    scalar_field("option2_name", "keyword"),
+    scalar_field("option3_name", "keyword"),
+    text_field("option1_values", "core_language_text_with_keyword"),
+    text_field("option2_values", "core_language_text_with_keyword"),
+    text_field("option3_values", "core_language_text_with_keyword"),
+    scalar_field("min_price", "float"),
+    scalar_field("max_price", "float"),
+    scalar_field("compare_at_price", "float"),
+    scalar_field("sku_prices", "float"),
+    scalar_field("sku_weights", "long"),
+    scalar_field("sku_weight_units", "keyword"),
+    scalar_field("total_inventory", "long"),
+    scalar_field("sales", "long"),
+    nested_field(
+        "skus",
+        scalar_field("sku_id", "keyword"),
+        scalar_field("price", "float"),
+        scalar_field("compare_at_price", "float"),
+        scalar_field("sku_code", "keyword"),
+        scalar_field("stock", "long"),
+        scalar_field("weight", "float"),
+        scalar_field("weight_unit", "keyword"),
+        scalar_field("option1_value", "keyword"),
+        scalar_field("option2_value", "keyword"),
+        scalar_field("option3_value", "keyword"),
+        scalar_field("image_src", "keyword", index=False),
+    ),
+]
+
+
+def build_keyword_fields() -> dict[str, Any]:
+    return {
+        "keyword": {
+            "type": "keyword",
+            "normalizer": "lowercase",
+        }
+    }
+
+
+def build_text_field(language: str, *, add_keyword: bool) -> dict[str, Any]:
+    field = {
+        "type": "text",
+        "analyzer": ANALYZERS[language],
+    }
+    if language == "zh":
+        field["search_analyzer"] = "query_ik"
+    if add_keyword:
+        field["fields"] = build_keyword_fields()
+    return field
+
+
+def render_field(spec: dict[str, Any]) -> dict[str, Any]:
+    kind = spec["kind"]
+
+    if kind == "scalar":
+        rendered = {"type": spec["type"]}
+        rendered.update(spec.get("extra", {}))
+        return rendered
+
+    if kind == "text":
+        template = TEXT_FIELD_TEMPLATES[spec["template"]]
+        languages = LANGUAGE_GROUPS[template["language_group"]]
+        properties = {}
+        for language in languages:
+            properties[language] = build_text_field(
+                language,
+                add_keyword=template["with_keyword"],
+            )
+        return {
+            "type": "object",
+            "properties": properties,
+        }
+
+    if kind == "nested":
+        properties = {}
+        for child in spec["fields"]:
+            properties[child["name"]] = render_field(child)
+        return {
+            "type": "nested",
+            "properties": properties,
+        }
+
+    raise ValueError(f"Unknown field kind: {kind}")
+
+
+def build_mapping() -> dict[str, Any]:
+    properties = {}
+    for spec in FIELD_SPECS:
+        properties[spec["name"]] = render_field(spec)
+
+    return {
+        "settings": SETTINGS,
+        "mappings": {
+            "properties": properties,
+        },
+    }
+
+
+def render_mapping() -> str:
+    return json.dumps(build_mapping(), indent=2, ensure_ascii=False)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Generate mappings/search_products.json from a compact Python spec.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        help="Write the generated mapping to this file. Defaults to stdout.",
+    )
+    parser.add_argument(
+        "--check",
+        type=Path,
+        help="Fail if the generated output does not exactly match this file.",
+    )
+    args = parser.parse_args()
+
+    rendered = render_mapping()
+
+    if args.check is not None:
+        existing = args.check.read_text(encoding="utf-8")
+        if existing != rendered:
+            print(f"Generated mapping does not match {args.check}")
+            return 1
+        print(f"Generated mapping matches {args.check}")
+
+    if args.output is not None:
+        args.output.write_text(rendered, encoding="utf-8")
+    elif args.check is None:
+        print(rendered, end="")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -41,9 +41,6 @@
   },
   "mappings": {
     "properties": {
-      "tenant_id": {
-        "type": "keyword"
-      },
       "spu_id": {
         "type": "keyword"
       },
@@ -179,133 +176,313 @@
           }
         }
       },
-      "qanchors": {
+      "keywords": {
         "type": "object",
         "properties": {
           "zh": {
             "type": "text",
             "analyzer": "index_ik",
-            "search_analyzer": "query_ik"
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "en": {
             "type": "text",
-            "analyzer": "english"
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ar": {
             "type": "text",
-            "analyzer": "arabic"
+            "analyzer": "arabic",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hy": {
             "type": "text",
-            "analyzer": "armenian"
+            "analyzer": "armenian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "eu": {
             "type": "text",
-            "analyzer": "basque"
+            "analyzer": "basque",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "pt_br": {
             "type": "text",
-            "analyzer": "brazilian"
+            "analyzer": "brazilian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "bg": {
             "type": "text",
-            "analyzer": "bulgarian"
+            "analyzer": "bulgarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ca": {
             "type": "text",
-            "analyzer": "catalan"
+            "analyzer": "catalan",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "cjk": {
             "type": "text",
-            "analyzer": "cjk"
+            "analyzer": "cjk",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "cs": {
             "type": "text",
-            "analyzer": "czech"
+            "analyzer": "czech",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "da": {
             "type": "text",
-            "analyzer": "danish"
+            "analyzer": "danish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "nl": {
             "type": "text",
-            "analyzer": "dutch"
+            "analyzer": "dutch",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fi": {
             "type": "text",
-            "analyzer": "finnish"
+            "analyzer": "finnish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fr": {
             "type": "text",
-            "analyzer": "french"
+            "analyzer": "french",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "gl": {
             "type": "text",
-            "analyzer": "galician"
+            "analyzer": "galician",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "de": {
             "type": "text",
-            "analyzer": "german"
+            "analyzer": "german",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "el": {
             "type": "text",
-            "analyzer": "greek"
+            "analyzer": "greek",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hi": {
             "type": "text",
-            "analyzer": "hindi"
+            "analyzer": "hindi",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hu": {
             "type": "text",
-            "analyzer": "hungarian"
+            "analyzer": "hungarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "id": {
             "type": "text",
-            "analyzer": "indonesian"
+            "analyzer": "indonesian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "it": {
             "type": "text",
-            "analyzer": "italian"
+            "analyzer": "italian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "no": {
             "type": "text",
-            "analyzer": "norwegian"
+            "analyzer": "norwegian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fa": {
             "type": "text",
-            "analyzer": "persian"
+            "analyzer": "persian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "pt": {
             "type": "text",
-            "analyzer": "portuguese"
+            "analyzer": "portuguese",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ro": {
             "type": "text",
-            "analyzer": "romanian"
+            "analyzer": "romanian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ru": {
             "type": "text",
-            "analyzer": "russian"
+            "analyzer": "russian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "es": {
             "type": "text",
-            "analyzer": "spanish"
+            "analyzer": "spanish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "sv": {
             "type": "text",
-            "analyzer": "swedish"
+            "analyzer": "swedish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "tr": {
             "type": "text",
-            "analyzer": "turkish"
+            "analyzer": "turkish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "th": {
             "type": "text",
-            "analyzer": "thai"
+            "analyzer": "thai",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           }
         }
       },
-      "keywords": {
+      "brief": {
         "type": "object",
         "properties": {
           "zh": {
@@ -431,7 +608,7 @@
           }
         }
       },
-      "brief": {
+      "description": {
         "type": "object",
         "properties": {
           "zh": {
@@ -557,139 +734,19 @@
           }
         }
       },
-      "description": {
+      "vendor": {
         "type": "object",
         "properties": {
           "zh": {
             "type": "text",
             "analyzer": "index_ik",
-            "search_analyzer": "query_ik"
-          },
-          "en": {
-            "type": "text",
-            "analyzer": "english"
-          },
-          "ar": {
-            "type": "text",
-            "analyzer": "arabic"
-          },
-          "hy": {
-            "type": "text",
-            "analyzer": "armenian"
-          },
-          "eu": {
-            "type": "text",
-            "analyzer": "basque"
-          },
-          "pt_br": {
-            "type": "text",
-            "analyzer": "brazilian"
-          },
-          "bg": {
-            "type": "text",
-            "analyzer": "bulgarian"
-          },
-          "ca": {
-            "type": "text",
-            "analyzer": "catalan"
-          },
-          "cjk": {
-            "type": "text",
-            "analyzer": "cjk"
-          },
-          "cs": {
-            "type": "text",
-            "analyzer": "czech"
-          },
-          "da": {
-            "type": "text",
-            "analyzer": "danish"
-          },
-          "nl": {
-            "type": "text",
-            "analyzer": "dutch"
-          },
-          "fi": {
-            "type": "text",
-            "analyzer": "finnish"
-          },
-          "fr": {
-            "type": "text",
-            "analyzer": "french"
-          },
-          "gl": {
-            "type": "text",
-            "analyzer": "galician"
-          },
-          "de": {
-            "type": "text",
-            "analyzer": "german"
-          },
-          "el": {
-            "type": "text",
-            "analyzer": "greek"
-          },
-          "hi": {
-            "type": "text",
-            "analyzer": "hindi"
-          },
-          "hu": {
-            "type": "text",
-            "analyzer": "hungarian"
-          },
-          "id": {
-            "type": "text",
-            "analyzer": "indonesian"
-          },
-          "it": {
-            "type": "text",
-            "analyzer": "italian"
-          },
-          "no": {
-            "type": "text",
-            "analyzer": "norwegian"
-          },
-          "fa": {
-            "type": "text",
-            "analyzer": "persian"
-          },
-          "pt": {
-            "type": "text",
-            "analyzer": "portuguese"
-          },
-          "ro": {
-            "type": "text",
-            "analyzer": "romanian"
-          },
-          "ru": {
-            "type": "text",
-            "analyzer": "russian"
-          },
-          "es": {
-            "type": "text",
-            "analyzer": "spanish"
-          },
-          "sv": {
-            "type": "text",
-            "analyzer": "swedish"
-          },
-          "tr": {
-            "type": "text",
-            "analyzer": "turkish"
-          },
-          "th": {
-            "type": "text",
-            "analyzer": "thai"
-          }
-        }
-      },
-      "vendor": {
-        "type": "object",
-        "properties": {
-          "zh": {
-            "type": "text",
-            "analyzer": "index_ik",
-            "search_analyzer": "query_ik"
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "en": {
             "type": "text",
@@ -983,9 +1040,6 @@
           }
         }
       },
-      "tags": {
-        "type": "keyword"
-      },
       "image_url": {
         "type": "keyword",
         "index": false
@@ -1012,144 +1066,309 @@
           }
         }
       },
-      "image_embedding_512": {
-        "type": "nested",
-        "properties": {
-          "vector": {
-            "type": "dense_vector",
-            "dims": 512,
-            "index": true,
-            "similarity": "dot_product",
-            "element_type": "bfloat16"
-          },
-          "url": {
-            "type": "text"
-          }
-        }
-      },
       "category_path": {
         "type": "object",
         "properties": {
           "zh": {
             "type": "text",
             "analyzer": "index_ik",
-            "search_analyzer": "query_ik"
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "en": {
             "type": "text",
-            "analyzer": "english"
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ar": {
             "type": "text",
-            "analyzer": "arabic"
+            "analyzer": "arabic",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hy": {
             "type": "text",
-            "analyzer": "armenian"
+            "analyzer": "armenian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "eu": {
             "type": "text",
-            "analyzer": "basque"
+            "analyzer": "basque",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "pt_br": {
             "type": "text",
-            "analyzer": "brazilian"
+            "analyzer": "brazilian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "bg": {
             "type": "text",
-            "analyzer": "bulgarian"
+            "analyzer": "bulgarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ca": {
             "type": "text",
-            "analyzer": "catalan"
+            "analyzer": "catalan",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "cjk": {
             "type": "text",
-            "analyzer": "cjk"
+            "analyzer": "cjk",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "cs": {
             "type": "text",
-            "analyzer": "czech"
+            "analyzer": "czech",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "da": {
             "type": "text",
-            "analyzer": "danish"
+            "analyzer": "danish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "nl": {
             "type": "text",
-            "analyzer": "dutch"
+            "analyzer": "dutch",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fi": {
             "type": "text",
-            "analyzer": "finnish"
+            "analyzer": "finnish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fr": {
             "type": "text",
-            "analyzer": "french"
+            "analyzer": "french",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "gl": {
             "type": "text",
-            "analyzer": "galician"
-          },
-          "de": {
+            "analyzer": "galician",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "de": {
             "type": "text",
-            "analyzer": "german"
+            "analyzer": "german",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "el": {
             "type": "text",
-            "analyzer": "greek"
+            "analyzer": "greek",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hi": {
             "type": "text",
-            "analyzer": "hindi"
+            "analyzer": "hindi",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hu": {
             "type": "text",
-            "analyzer": "hungarian"
+            "analyzer": "hungarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "id": {
             "type": "text",
-            "analyzer": "indonesian"
+            "analyzer": "indonesian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "it": {
             "type": "text",
-            "analyzer": "italian"
+            "analyzer": "italian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "no": {
             "type": "text",
-            "analyzer": "norwegian"
+            "analyzer": "norwegian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fa": {
             "type": "text",
-            "analyzer": "persian"
+            "analyzer": "persian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "pt": {
             "type": "text",
-            "analyzer": "portuguese"
+            "analyzer": "portuguese",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ro": {
             "type": "text",
-            "analyzer": "romanian"
+            "analyzer": "romanian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ru": {
             "type": "text",
-            "analyzer": "russian"
+            "analyzer": "russian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "es": {
             "type": "text",
-            "analyzer": "spanish"
+            "analyzer": "spanish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "sv": {
             "type": "text",
-            "analyzer": "swedish"
+            "analyzer": "swedish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "tr": {
             "type": "text",
-            "analyzer": "turkish"
+            "analyzer": "turkish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "th": {
             "type": "text",
-            "analyzer": "thai"
+            "analyzer": "thai",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           }
         }
       },
@@ -1159,123 +1378,609 @@
           "zh": {
             "type": "text",
             "analyzer": "index_ik",
-            "search_analyzer": "query_ik"
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "en": {
             "type": "text",
-            "analyzer": "english"
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ar": {
             "type": "text",
-            "analyzer": "arabic"
+            "analyzer": "arabic",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hy": {
             "type": "text",
-            "analyzer": "armenian"
+            "analyzer": "armenian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "eu": {
             "type": "text",
-            "analyzer": "basque"
+            "analyzer": "basque",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "pt_br": {
             "type": "text",
-            "analyzer": "brazilian"
+            "analyzer": "brazilian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "bg": {
             "type": "text",
-            "analyzer": "bulgarian"
+            "analyzer": "bulgarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ca": {
             "type": "text",
-            "analyzer": "catalan"
+            "analyzer": "catalan",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "cjk": {
             "type": "text",
-            "analyzer": "cjk"
+            "analyzer": "cjk",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "cs": {
             "type": "text",
-            "analyzer": "czech"
+            "analyzer": "czech",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "da": {
             "type": "text",
-            "analyzer": "danish"
+            "analyzer": "danish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "nl": {
             "type": "text",
-            "analyzer": "dutch"
+            "analyzer": "dutch",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fi": {
             "type": "text",
-            "analyzer": "finnish"
+            "analyzer": "finnish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fr": {
             "type": "text",
-            "analyzer": "french"
+            "analyzer": "french",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "gl": {
             "type": "text",
-            "analyzer": "galician"
+            "analyzer": "galician",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "de": {
             "type": "text",
-            "analyzer": "german"
+            "analyzer": "german",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "el": {
+            "type": "text",
+            "analyzer": "greek",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "hi": {
+            "type": "text",
+            "analyzer": "hindi",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "hu": {
+            "type": "text",
+            "analyzer": "hungarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "id": {
+            "type": "text",
+            "analyzer": "indonesian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "it": {
+            "type": "text",
+            "analyzer": "italian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "no": {
+            "type": "text",
+            "analyzer": "norwegian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "fa": {
+            "type": "text",
+            "analyzer": "persian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "pt": {
+            "type": "text",
+            "analyzer": "portuguese",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "ro": {
+            "type": "text",
+            "analyzer": "romanian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "ru": {
+            "type": "text",
+            "analyzer": "russian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "es": {
+            "type": "text",
+            "analyzer": "spanish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "sv": {
+            "type": "text",
+            "analyzer": "swedish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "tr": {
+            "type": "text",
+            "analyzer": "turkish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "th": {
+            "type": "text",
+            "analyzer": "thai",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
+      },
+      "tags": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "hy": {
+            "type": "text",
+            "analyzer": "armenian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "eu": {
+            "type": "text",
+            "analyzer": "basque",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "pt_br": {
+            "type": "text",
+            "analyzer": "brazilian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "bg": {
+            "type": "text",
+            "analyzer": "bulgarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "ca": {
+            "type": "text",
+            "analyzer": "catalan",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "cjk": {
+            "type": "text",
+            "analyzer": "cjk",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "cs": {
+            "type": "text",
+            "analyzer": "czech",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "da": {
+            "type": "text",
+            "analyzer": "danish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "nl": {
+            "type": "text",
+            "analyzer": "dutch",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "fi": {
+            "type": "text",
+            "analyzer": "finnish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "fr": {
+            "type": "text",
+            "analyzer": "french",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "gl": {
+            "type": "text",
+            "analyzer": "galician",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "de": {
+            "type": "text",
+            "analyzer": "german",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "el": {
             "type": "text",
-            "analyzer": "greek"
+            "analyzer": "greek",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hi": {
             "type": "text",
-            "analyzer": "hindi"
+            "analyzer": "hindi",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "hu": {
             "type": "text",
-            "analyzer": "hungarian"
+            "analyzer": "hungarian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "id": {
             "type": "text",
-            "analyzer": "indonesian"
+            "analyzer": "indonesian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "it": {
             "type": "text",
-            "analyzer": "italian"
+            "analyzer": "italian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "no": {
             "type": "text",
-            "analyzer": "norwegian"
+            "analyzer": "norwegian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "fa": {
             "type": "text",
-            "analyzer": "persian"
+            "analyzer": "persian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "pt": {
             "type": "text",
-            "analyzer": "portuguese"
+            "analyzer": "portuguese",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ro": {
             "type": "text",
-            "analyzer": "romanian"
+            "analyzer": "romanian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "ru": {
             "type": "text",
-            "analyzer": "russian"
+            "analyzer": "russian",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "es": {
             "type": "text",
-            "analyzer": "spanish"
+            "analyzer": "spanish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "sv": {
             "type": "text",
-            "analyzer": "swedish"
+            "analyzer": "swedish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "tr": {
             "type": "text",
-            "analyzer": "turkish"
+            "analyzer": "turkish",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           },
           "th": {
             "type": "text",
-            "analyzer": "thai"
+            "analyzer": "thai",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
           }
         }
       },
@@ -1306,8 +2011,108 @@
           "name": {
             "type": "keyword"
           },
-          "value": {
+          "value_keyword": {
+            "type": "keyword"
+          },
+          "value_text": {
+            "type": "object",
+            "properties": {
+              "zh": {
+                "type": "text",
+                "analyzer": "index_ik",
+                "search_analyzer": "query_ik",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              },
+              "en": {
+                "type": "text",
+                "analyzer": "english",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
+      "qanchors": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          }
+        }
+      },
+      "enriched_tags": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
+      },
+      "enriched_attributes": {
+        "type": "nested",
+        "properties": {
+          "name": {
             "type": "keyword"
+          },
+          "value": {
+            "type": "object",
+            "properties": {
+              "zh": {
+                "type": "text",
+                "analyzer": "index_ik",
+                "search_analyzer": "query_ik",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              },
+              "en": {
+                "type": "text",
+                "analyzer": "english",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              }
+            }
           }
         }
       },
@@ -1321,13 +2126,82 @@
         "type": "keyword"
       },
       "option1_values": {
-        "type": "keyword"
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
       },
       "option2_values": {
-        "type": "keyword"
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
       },
       "option3_values": {
-        "type": "keyword"
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
       },
       "min_price": {
         "type": "float"
@@ -1391,20 +2265,6 @@
             "index": false
           }
         }
-      },
-      "semantic_attributes": {
-        "type": "nested",
-        "properties": {
-          "lang": {
-            "type": "keyword"
-          },
-          "name": {
-            "type": "keyword"
-          },
-          "value": {
-            "type": "keyword"
-          }
-        }
       }
     }
   }
@@ -0,0 +1,629 @@
+{
+  "settings": {
+    "number_of_shards": 1,
+    "number_of_replicas": 0,
+    "refresh_interval": "30s",
+    "analysis": {
+      "analyzer": {
+        "index_ik": {
+          "type": "custom",
+          "tokenizer": "ik_max_word",
+          "filter": [
+            "lowercase",
+            "asciifolding"
+          ]
+        },
+        "query_ik": {
+          "type": "custom",
+          "tokenizer": "ik_smart",
+          "filter": [
+            "lowercase",
+            "asciifolding"
+          ]
+        }
+      },
+      "normalizer": {
+        "lowercase": {
+          "type": "custom",
+          "filter": [
+            "lowercase"
+          ]
+        }
+      }
+    },
+    "similarity": {
+      "default": {
+        "type": "BM25",
+        "b": 0.0,
+        "k1": 0.0
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "tenant_id": {
+        "type": "keyword"
+      },
+      "spu_id": {
+        "type": "keyword"
+      },
+      "create_time": {
+        "type": "date"
+      },
+      "update_time": {
+        "type": "date"
+      },
+      "title": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic"
+          },
+          "hy": {
+            "type": "text",
+            "analyzer": "armenian"
+          },
+          "eu": {
+            "type": "text",
+            "analyzer": "basque"
+          },
+          "pt_br": {
+            "type": "text",
+            "analyzer": "brazilian"
+          },
+          "bg": {
+            "type": "text",
+            "analyzer": "bulgarian"
+          },
+          "ca": {
+            "type": "text",
+            "analyzer": "catalan"
+          },
+          "cjk": {
+            "type": "text",
+            "analyzer": "cjk"
+          },
+          "cs": {
+            "type": "text",
+            "analyzer": "czech"
+          },
+          "da": {
+            "type": "text",
+            "analyzer": "danish"
+          },
+          "nl": {
+            "type": "text",
+            "analyzer": "dutch"
+          },
+          "fi": {
+            "type": "text",
+            "analyzer": "finnish"
+          },
+          "fr": {
+            "type": "text",
+            "analyzer": "french"
+          },
+          "gl": {
+            "type": "text",
+            "analyzer": "galician"
+          },
+          "de": {
+            "type": "text",
+            "analyzer": "german"
+          },
+          "el": {
+            "type": "text",
+            "analyzer": "greek"
+          },
+          "hi": {
+            "type": "text",
+            "analyzer": "hindi"
+          },
+          "hu": {
+            "type": "text",
+            "analyzer": "hungarian"
+          },
+          "id": {
+            "type": "text",
+            "analyzer": "indonesian"
+          },
+          "it": {
+            "type": "text",
+            "analyzer": "italian"
+          },
+          "no": {
+            "type": "text",
+            "analyzer": "norwegian"
+          },
+          "fa": {
+            "type": "text",
+            "analyzer": "persian"
+          },
+          "pt": {
+            "type": "text",
+            "analyzer": "portuguese"
+          },
+          "ro": {
+            "type": "text",
+            "analyzer": "romanian"
+          },
+          "ru": {
+            "type": "text",
+            "analyzer": "russian"
+          },
+          "es": {
+            "type": "text",
+            "analyzer": "spanish"
+          },
+          "sv": {
+            "type": "text",
+            "analyzer": "swedish"
+          },
+          "tr": {
+            "type": "text",
+            "analyzer": "turkish"
+          },
+          "th": {
+            "type": "text",
+            "analyzer": "thai"
+          }
+        }
+      },
+      "keywords": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+...
+        }
+      },
+      "brief": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic"
+          },
+          ...
+        }
+      },
+      "description": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic"
+          },
+          ...
+        }
+      },
+      "vendor": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          ...
+        }
+      },
+      "image_url": {
+        "type": "keyword",
+        "index": false
+      },
+      "title_embedding": {
+        "type": "dense_vector",
+        "dims": 1024,
+        "index": true,
+        "similarity": "dot_product",
+        "element_type": "bfloat16"
+      },
+      "image_embedding": {
+        "type": "nested",
+        "properties": {
+          "vector": {
+            "type": "dense_vector",
+            "dims": 768,
+            "index": true,
+            "similarity": "dot_product",
+            "element_type": "bfloat16"
+          },
+          "url": {
+            "type": "text"
+          }
+        }
+      },
+      "category_path": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic"
+          },
+          ...
+        }
+        }
+      },
+      "category_name_text": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          },
+          "ar": {
+            "type": "text",
+            "analyzer": "arabic"
+          },
+          ...
+
+        }
+      },
+      "qanchors": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik"
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english"
+          }
+        }
+      },
+      "tags": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
+      },
+      "category_id": {
+        "type": "keyword"
+      },
+      "category_name": {
+        "type": "keyword"
+      },
+      "category_level": {
+        "type": "integer"
+      },
+      "category1_name": {
+        "type": "keyword"
+      },
+      "category2_name": {
+        "type": "keyword"
+      },
+      "category3_name": {
+        "type": "keyword"
+      },
+      "specifications": {
+        "type": "nested",
+        "properties": {
+          "sku_id": {
+            "type": "keyword"
+          },
+          "name": {
+            "type": "keyword"
+          },
+          "value": {
+            "type": "object",
+            "properties": {
+              "zh": {
+                "type": "text",
+                "analyzer": "index_ik",
+                "search_analyzer": "query_ik",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              },
+              "en": {
+                "type": "text",
+                "analyzer": "english",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
+      "enriched_attributes": {
+        "type": "nested",
+        "properties": {
+          "name": {
+            "type": "keyword"
+          },
+          "value": {
+            "type": "object",
+            "properties": {
+              "zh": {
+                "type": "text",
+                "analyzer": "index_ik",
+                "search_analyzer": "query_ik",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              },
+              "en": {
+                "type": "text",
+                "analyzer": "english",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
+      "option1_name": {
+        "type": "keyword"
+      },
+      "option2_name": {
+        "type": "keyword"
+      },
+      "option3_name": {
+        "type": "keyword"
+      },
+      "option1_values": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
+      },
+      "option2_values": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
+      },
+      "option3_values": {
+        "type": "object",
+        "properties": {
+          "zh": {
+            "type": "text",
+            "analyzer": "index_ik",
+            "search_analyzer": "query_ik",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          },
+          "en": {
+            "type": "text",
+            "analyzer": "english",
+            "fields": {
+              "keyword": {
+                "type": "keyword",
+                "normalizer": "lowercase"
+              }
+            }
+          }
+        }
+      },
+      "min_price": {
+        "type": "float"
+      },
+      "max_price": {
+        "type": "float"
+      },
+      "compare_at_price": {
+        "type": "float"
+      },
+      "sku_prices": {
+        "type": "float"
+      },
+      "sku_weights": {
+        "type": "long"
+      },
+      "sku_weight_units": {
+        "type": "keyword"
+      },
+      "total_inventory": {
+        "type": "long"
+      },
+      "sales": {
+        "type": "long"
+      },
+      "skus": {
+        "type": "nested",
+        "properties": {
+          "sku_id": {
+            "type": "keyword"
+          },
+          "price": {
+            "type": "float"
+          },
+          "compare_at_price": {
+            "type": "float"
+          },
+          "sku_code": {
+            "type": "keyword"
+          },
+          "stock": {
+            "type": "long"
+          },
+          "weight": {
+            "type": "float"
+          },
+          "weight_unit": {
+            "type": "keyword"
+          },
+          "option1_value": {
+            "type": "keyword"
+          },
+          "option2_value": {
+            "type": "keyword"
+          },
+          "option3_value": {
+            "type": "keyword"
+          },
+          "image_src": {
+            "type": "keyword",
+            "index": false
+          }
+        }
+      }
+    }
+  }
+}
 \ No newline at end of file
@@ -279,7 +279,9 @@ def _run_es(
     body: Dict[str, Any],
     size: int,
 ) -> List[Dict[str, Any]]:
-    resp = es.search(index=index_name, body=body, size=size)
+    # Avoid passing size= alongside body= (deprecated in elasticsearch-py).
+    payload = {**body, "size": size}
+    resp = es.search(index=index_name, body=payload)
     if hasattr(resp, "body"):
         payload = resp.body
     else:
@@ -239,22 +239,96 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
     }
-def _multiply_fusion_factors(
-    rerank_score: float,
-    fine_score: Optional[float],
+def _format_debug_float(value: float) -> str:
+    return f"{float(value):.6g}"
+
+
+def _build_hit_signal_bundle(
+    hit: Dict[str, Any],
+    fusion: CoarseRankFusionConfig | RerankFusionConfig,
+) -> Dict[str, Any]:
+    es_score = _to_score(hit.get("_score"))
+    matched_queries = hit.get("matched_queries")
+    text_components = _collect_text_score_components(matched_queries, es_score)
+    knn_components = _collect_knn_score_components(matched_queries, fusion)
+    return {
+        "doc_id": hit.get("_id"),
+        "es_score": es_score,
+        "matched_queries": matched_queries,
+        "text_components": text_components,
+        "knn_components": knn_components,
+        "text_score": text_components["text_score"],
+        "knn_score": knn_components["knn_score"],
+    }
+
+
+def _build_formula_summary(
+    term_rows: List[Dict[str, Any]],
+    style_boost: float,
+    final_score: float,
+) -> str:
+    segments = [
+        (
+            f"{row['name']}=("
+            f"{_format_debug_float(row['raw_score'])}"
+            f"+{_format_debug_float(row['bias'])})"
+            f"^{_format_debug_float(row['exponent'])}"
+            f"={_format_debug_float(row['factor'])}"
+        )
+        for row in term_rows
+    ]
+    if style_boost != 1.0:
+        segments.append(f"style_boost={_format_debug_float(style_boost)}")
+    segments.append(f"final={_format_debug_float(final_score)}")
+    return " | ".join(segments)
+
+
+def _compute_multiplicative_fusion(
+    *,
     text_score: float,
     knn_score: float,
     fusion: RerankFusionConfig,
-) -> Tuple[float, float, float, float, float]:
-    """(rerank_factor, fine_factor, text_factor, knn_factor, fused_without_style_boost)."""
-    r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
-    if fine_score is None:
-        f = 1.0
-    else:
-        f = (max(fine_score, 0.0) + fusion.fine_bias) ** fusion.fine_exponent
-    t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
-    k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
-    return r, f, t, k, r * f * t * k
+    rerank_score: Optional[float] = None,
+    fine_score: Optional[float] = None,
+    style_boost: float = 1.0,
+) -> Dict[str, Any]:
+    term_rows: List[Dict[str, Any]] = []
+
+    def _add_term(name: str, raw_score: Optional[float], bias: float, exponent: float) -> None:
+        if raw_score is None:
+            return
+        factor = (max(float(raw_score), 0.0) + bias) ** exponent
+        term_rows.append(
+            {
+                "name": name,
+                "raw_score": float(raw_score),
+                "bias": float(bias),
+                "exponent": float(exponent),
+                "factor": factor,
+            }
+        )
+
+    _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent)
+    _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent)
+    _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent)
+    _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent)
+
+    fused = 1.0
+    factors: Dict[str, float] = {}
+    inputs: Dict[str, float] = {}
+    for row in term_rows:
+        fused *= row["factor"]
+        factors[row["name"]] = row["factor"]
+        inputs[row["name"]] = row["raw_score"]
+    fused *= style_boost
+    factors["style_boost"] = style_boost
+
+    return {
+        "inputs": inputs,
+        "factors": factors,
+        "score": fused,
+        "summary": _build_formula_summary(term_rows, style_boost, fused),
+    }
 def _multiply_coarse_fusion_factors(
@@ -283,12 +357,13 @@ def coarse_resort_hits(
     f = fusion or CoarseRankFusionConfig()
     coarse_debug: List[Dict[str, Any]] = [] if debug else []
     for hit in es_hits:
-        es_score = _to_score(hit.get("_score"))
-        matched_queries = hit.get("matched_queries")
-        knn_components = _collect_knn_score_components(matched_queries, f)
-        text_components = _collect_text_score_components(matched_queries, es_score)
-        text_score = text_components["text_score"]
-        knn_score = knn_components["knn_score"]
+        signal_bundle = _build_hit_signal_bundle(hit, f)
+        es_score = signal_bundle["es_score"]
+        matched_queries = signal_bundle["matched_queries"]
+        text_components = signal_bundle["text_components"]
+        knn_components = signal_bundle["knn_components"]
+        text_score = signal_bundle["text_score"]
+        knn_score = signal_bundle["knn_score"]
         text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors(
             text_score=text_score,
             knn_score=knn_score,
@@ -372,77 +447,81 @@ def fuse_scores_and_resort(
     n = len(es_hits)
     if n == 0 or len(rerank_scores) != n:
         return []
-    if fine_scores is not None and len(fine_scores) != n:
-        fine_scores = None
-
     f = fusion or RerankFusionConfig()
     fused_debug: List[Dict[str, Any]] = [] if debug else []
     for idx, hit in enumerate(es_hits):
-        es_score = _to_score(hit.get("_score"))
+        signal_bundle = _build_hit_signal_bundle(hit, f)
+        text_components = signal_bundle["text_components"]
+        knn_components = signal_bundle["knn_components"]
+        text_score = signal_bundle["text_score"]
+        knn_score = signal_bundle["knn_score"]
         rerank_score = _to_score(rerank_scores[idx])
-        fine_score = _to_score(fine_scores[idx]) if fine_scores is not None else _to_score(hit.get("_fine_score"))
-        matched_queries = hit.get("matched_queries")
-        knn_components = _collect_knn_score_components(matched_queries, f)
-        knn_score = knn_components["knn_score"]
-        text_components = _collect_text_score_components(matched_queries, es_score)
-        text_score = text_components["text_score"]
-        rerank_factor, fine_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
-            rerank_score, fine_score if fine_scores is not None or "_fine_score" in hit else None, text_score, knn_score, f
+        fine_score_raw = (
+            _to_score(fine_scores[idx])
+            if fine_scores is not None and len(fine_scores) == n
+            else _to_score(hit.get("_fine_score"))
         )
+        fine_score = fine_score_raw if (fine_scores is not None and len(fine_scores) == n) or "_fine_score" in hit else None
         sku_selected = _has_selected_sku(hit)
         style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
-        fused *= style_boost
+        fusion_result = _compute_multiplicative_fusion(
+            rerank_score=rerank_score,
+            fine_score=fine_score,
+            text_score=text_score,
+            knn_score=knn_score,
+            fusion=f,
+            style_boost=style_boost,
+        )
+        fused = fusion_result["score"]
         hit["_original_score"] = hit.get("_score")
         hit["_rerank_score"] = rerank_score
-        hit["_fine_score"] = fine_score
+        if fine_score is not None:
+            hit["_fine_score"] = fine_score
         hit["_text_score"] = text_score
         hit["_knn_score"] = knn_score
         hit["_text_knn_score"] = knn_components["text_knn_score"]
         hit["_image_knn_score"] = knn_components["image_knn_score"]
         hit["_fused_score"] = fused
         hit["_style_intent_selected_sku_boost"] = style_boost
-        if debug:
-            hit["_text_source_score"] = text_components["source_score"]
-            hit["_text_translation_score"] = text_components["translation_score"]
-            hit["_text_primary_score"] = text_components["primary_text_score"]
-            hit["_text_support_score"] = text_components["support_text_score"]
-            hit["_knn_primary_score"] = knn_components["primary_knn_score"]
-            hit["_knn_support_score"] = knn_components["support_knn_score"]
         if debug:
             debug_entry = {
                 "doc_id": hit.get("_id"),
-                "es_score": es_score,
+                "score": fused,
+                "es_score": signal_bundle["es_score"],
                 "rerank_score": rerank_score,
                 "fine_score": fine_score,
                 "text_score": text_score,
+                "knn_score": knn_score,
+                "fusion_inputs": fusion_result["inputs"],
+                "fusion_factors": fusion_result["factors"],
+                "fusion_summary": fusion_result["summary"],
                 "text_source_score": text_components["source_score"],
                 "text_translation_score": text_components["translation_score"],
                 "text_weighted_source_score": text_components["weighted_source_score"],
                 "text_weighted_translation_score": text_components["weighted_translation_score"],
                 "text_primary_score": text_components["primary_text_score"],
                 "text_support_score": text_components["support_text_score"],
-                "text_score_fallback_to_es": (
-                    text_score == es_score
-                    and text_components["source_score"] <= 0.0
-                    and text_components["translation_score"] <= 0.0
-                ),
                 "text_knn_score": knn_components["text_knn_score"],
                 "image_knn_score": knn_components["image_knn_score"],
                 "weighted_text_knn_score": knn_components["weighted_text_knn_score"],
                 "weighted_image_knn_score": knn_components["weighted_image_knn_score"],
                 "knn_primary_score": knn_components["primary_knn_score"],
                 "knn_support_score": knn_components["support_knn_score"],
-                "knn_score": knn_score,
-                "rerank_factor": rerank_factor,
-                "fine_factor": fine_factor,
-                "text_factor": text_factor,
-                "knn_factor": knn_factor,
+                "text_score_fallback_to_es": (
+                    text_score == signal_bundle["es_score"]
+                    and text_components["source_score"] <= 0.0
+                    and text_components["translation_score"] <= 0.0
+                ),
+                "rerank_factor": fusion_result["factors"].get("rerank_score"),
+                "fine_factor": fusion_result["factors"].get("fine_score"),
+                "text_factor": fusion_result["factors"].get("text_score"),
+                "knn_factor": fusion_result["factors"].get("knn_score"),
                 "style_intent_selected_sku": sku_selected,
                 "style_intent_selected_sku_boost": style_boost,
-                "matched_queries": matched_queries,
+                "matched_queries": signal_bundle["matched_queries"],
                 "fused_score": fused,
             }
             if rerank_debug_rows is not None and idx < len(rerank_debug_rows):
@@ -530,9 +609,11 @@ def run_lightweight_rerank(
     rerank_doc_template: str = "{title}",
     top_n: Optional[int] = None,
     debug: bool = False,
+    fusion: Optional[RerankFusionConfig] = None,
+    style_intent_selected_sku_boost: float = 1.2,
     service_profile: Optional[str] = "fine",
 ) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
-    """Call lightweight reranker and attach scores to hits without final fusion."""
+    """Call lightweight reranker and rank by lightweight-model fusion."""
     if not es_hits:
         return [], {}, []
@@ -554,18 +635,50 @@ def run_lightweight_rerank(
     if scores is None or len(scores) != len(es_hits):
         return None, None, []
+    f = fusion or RerankFusionConfig()
     debug_rows: List[Dict[str, Any]] = [] if debug else []
     for idx, hit in enumerate(es_hits):
+        signal_bundle = _build_hit_signal_bundle(hit, f)
+        text_score = signal_bundle["text_score"]
+        knn_score = signal_bundle["knn_score"]
         fine_score = _to_score(scores[idx])
+        sku_selected = _has_selected_sku(hit)
+        style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
+        fusion_result = _compute_multiplicative_fusion(
+            fine_score=fine_score,
+            text_score=text_score,
+            knn_score=knn_score,
+            fusion=f,
+            style_boost=style_boost,
+        )
+
         hit["_fine_score"] = fine_score
+        hit["_fine_fused_score"] = fusion_result["score"]
+        hit["_text_score"] = text_score
+        hit["_knn_score"] = knn_score
+        hit["_text_knn_score"] = signal_bundle["knn_components"]["text_knn_score"]
+        hit["_image_knn_score"] = signal_bundle["knn_components"]["image_knn_score"]
+        hit["_style_intent_selected_sku_boost"] = style_boost
+
         if debug:
             row: Dict[str, Any] = {
                 "doc_id": hit.get("_id"),
+                "score": fusion_result["score"],
                 "fine_score": fine_score,
+                "text_score": text_score,
+                "knn_score": knn_score,
+                "fusion_inputs": fusion_result["inputs"],
+                "fusion_factors": fusion_result["factors"],
+                "fusion_summary": fusion_result["summary"],
+                "fine_factor": fusion_result["factors"].get("fine_score"),
+                "text_factor": fusion_result["factors"].get("text_score"),
+                "knn_factor": fusion_result["factors"].get("knn_score"),
+                "style_intent_selected_sku": sku_selected,
+                "style_intent_selected_sku_boost": style_boost,
             }
             if rerank_debug_rows is not None and idx < len(rerank_debug_rows):
                 row["rerank_input"] = rerank_debug_rows[idx]
             debug_rows.append(row)
-    es_hits.sort(key=lambda h: h.get("_fine_score", 0.0), reverse=True)
+    es_hits.sort(key=lambda h: h.get("_fine_fused_score", h.get("_fine_score", 0.0)), reverse=True)
     return scores, meta, debug_rows
@@ -720,6 +720,8 @@ class Searcher:
                         rerank_doc_template=fine_doc_template,
                         top_n=fine_output_window,
                         debug=debug,
+                        fusion=rc.fusion,
+                        style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
                         service_profile=fine_cfg.service_profile,
                     )
                     if fine_scores is not None:
@@ -745,6 +747,7 @@ class Searcher:
                                 "docs_out": len(hits),
                                 "top_n": fine_output_window,
                                 "meta": fine_meta,
+                                "fusion": asdict(rc.fusion),
                             }
                             context.store_intermediate_result("fine_rank_scores", fine_debug_rows)
                         context.logger.info(
@@ -781,7 +784,6 @@ class Searcher:
                     top_n=(from_ + size),
                     debug=debug,
                     fusion=rc.fusion,
-                    fine_scores=fine_scores[:len(final_input)] if fine_scores is not None else None,
                     service_profile=rc.service_profile,
                     style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
                 )
@@ -1026,18 +1028,14 @@ class Searcher:
                     # 若存在重排调试信息，则补充 doc 级别的融合分数信息
                     if rerank_debug:
                         debug_entry["doc_id"] = rerank_debug.get("doc_id")
-                        # 与 rerank_client 中字段保持一致，便于前端直接使用
+                        debug_entry["score"] = rerank_debug.get("score")
                         debug_entry["rerank_score"] = rerank_debug.get("rerank_score")
                         debug_entry["fine_score"] = rerank_debug.get("fine_score")
                         debug_entry["text_score"] = rerank_debug.get("text_score")
-                        debug_entry["text_source_score"] = rerank_debug.get("text_source_score")
-                        debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score")
-                        debug_entry["text_weighted_source_score"] = rerank_debug.get("text_weighted_source_score")
-                        debug_entry["text_weighted_translation_score"] = rerank_debug.get("text_weighted_translation_score")
-                        debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score")
-                        debug_entry["text_support_score"] = rerank_debug.get("text_support_score")
-                        debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es")
                         debug_entry["knn_score"] = rerank_debug.get("knn_score")
+                        debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs")
+                        debug_entry["fusion_factors"] = rerank_debug.get("fusion_factors")
+                        debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary")
                         debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
                         debug_entry["fine_factor"] = rerank_debug.get("fine_factor")
                         debug_entry["text_factor"] = rerank_debug.get("text_factor")
@@ -1047,7 +1045,13 @@ class Searcher:
                         debug_entry["matched_queries"] = rerank_debug.get("matched_queries")
                     elif fine_debug:
                         debug_entry["doc_id"] = fine_debug.get("doc_id")
+                        debug_entry["score"] = fine_debug.get("score")
                         debug_entry["fine_score"] = fine_debug.get("fine_score")
+                        debug_entry["text_score"] = fine_debug.get("text_score")
+                        debug_entry["knn_score"] = fine_debug.get("knn_score")
+                        debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs")
+                        debug_entry["fusion_factors"] = fine_debug.get("fusion_factors")
+                        debug_entry["fusion_summary"] = fine_debug.get("fusion_summary")
                         debug_entry["rerank_input"] = fine_debug.get("rerank_input")
                     initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
@@ -1081,17 +1085,32 @@ class Searcher:
                         "fine_rank": {
                             "rank": fine_rank,
                             "rank_change": _rank_change(coarse_rank, fine_rank),
-                            "score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
+                            "score": (
+                                fine_debug.get("score")
+                                if fine_debug and fine_debug.get("score") is not None
+                                else hit.get("_fine_fused_score", hit.get("_fine_score"))
+                            ),
+                            "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
+                            "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"),
+                            "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"),
+                            "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None,
+                            "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None,
+                            "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None,
                             "rerank_input": fine_debug.get("rerank_input") if fine_debug else None,
+                            "signals": fine_debug,
                         },
                         "rerank": {
                             "rank": rerank_rank,
                             "rank_change": _rank_change(fine_rank, rerank_rank),
+                            "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"),
                             "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"),
                             "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"),
                             "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"),
                             "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"),
                             "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"),
+                            "fusion_summary": rerank_debug.get("fusion_summary") if rerank_debug else None,
+                            "fusion_inputs": rerank_debug.get("fusion_inputs") if rerank_debug else None,
+                            "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None,
                             "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None,
                             "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None,
                             "text_factor": rerank_debug.get("text_factor") if rerank_debug else None,
@@ -166,6 +166,29 @@ class SuggestionIndexBuilder:
         out = [p.strip() for p in parts if p and p.strip()]
         return out if out else [s]
+    def _iter_multilang_product_tags(
+        self,
+        raw: Any,
+        index_languages: List[str],
+        primary_language: str,
+    ) -> List[Tuple[str, str]]:
+        if isinstance(raw, dict):
+            pairs: List[Tuple[str, str]] = []
+            for lang in index_languages:
+                for tag in self._iter_product_tags(raw.get(lang)):
+                    pairs.append((lang, tag))
+            return pairs
+
+        pairs = []
+        for tag in self._iter_product_tags(raw):
+            tag_lang, _, _ = detect_text_language_for_suggestions(
+                tag,
+                index_languages=index_languages,
+                primary_language=primary_language,
+            )
+            pairs.append((tag_lang, tag))
+        return pairs
+
     @staticmethod
     def _looks_noise(text_value: str) -> bool:
         if not text_value:
@@ -487,12 +510,11 @@ class SuggestionIndexBuilder:
                         key_to_candidate[key] = c
                     c.add_product("qanchor", spu_id=product_id)
-            for tag in self._iter_product_tags(src.get("tags")):
-                tag_lang, _, _ = detect_text_language_for_suggestions(
-                    tag,
-                    index_languages=index_languages,
-                    primary_language=primary_language,
-                )
+            for tag_lang, tag in self._iter_multilang_product_tags(
+                src.get("tags"),
+                index_languages=index_languages,
+                primary_language=primary_language,
+            ):
                 text_norm = self._normalize_text(tag)
                 if self._looks_noise(text_norm):
                     continue
@@ -345,33 +345,25 @@ def test_indexer_build_docs_from_db_contract(indexer_client: TestClient):
 def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch):
     import indexer.product_enrich as process_products
-    def _fake_analyze_products(
-        products: List[Dict[str, str]],
-        target_lang: str = "zh",
-        batch_size: int | None = None,
-        tenant_id: str | None = None,
-    ):
-        assert batch_size == 20
+    def _fake_build_index_content_fields(items: List[Dict[str, str]], tenant_id: str | None = None):
+        assert tenant_id == "162"
         return [
             {
-                "id": p["id"],
-                "lang": target_lang,
-                "title_input": p["title"],
-                "title": p["title"],
-                "category_path": "",
-                "tags": "tag1,tag2",
-                "target_audience": "",
-                "usage_scene": "",
-                "season": "",
-                "key_attributes": "",
-                "material": "",
-                "features": "",
-                "anchor_text": f"{target_lang}-anchor-{p['id']}",
+                "id": p["spu_id"],
+                "qanchors": {
+                    "zh": [f"zh-anchor-{p['spu_id']}"],
+                    "en": [f"en-anchor-{p['spu_id']}"],
+                },
+                "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]},
+                "enriched_attributes": [
+                    {"name": "enriched_tags", "value": {"zh": "tag1"}},
+                    {"name": "enriched_tags", "value": {"en": "tag1"}},
+                ],
             }
-            for p in products
+            for p in items
         ]
-    monkeypatch.setattr(process_products, "analyze_products", _fake_analyze_products)
+    monkeypatch.setattr(process_products, "build_index_content_fields", _fake_build_index_content_fields)
     response = indexer_client.post(
         "/indexer/enrich-content",
@@ -381,7 +373,6 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
                 {"spu_id": "1001", "title": "T-shirt"},
                 {"spu_id": "1002", "title": "Toy"},
             ],
-            "languages": ["zh", "en"],
         },
     )
     assert response.status_code == 200
@@ -390,9 +381,14 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
     assert data["total"] == 2
     assert len(data["results"]) == 2
     assert data["results"][0]["spu_id"] == "1001"
-    assert data["results"][0]["qanchors"]["zh"] == "zh-anchor-1001"
-    assert data["results"][0]["qanchors"]["en"] == "en-anchor-1001"
-    assert "tag1" in data["results"][0]["tags"]
+    assert data["results"][0]["qanchors"]["zh"] == ["zh-anchor-1001"]
+    assert data["results"][0]["qanchors"]["en"] == ["en-anchor-1001"]
+    assert data["results"][0]["enriched_tags"]["zh"] == ["tag1", "tag2"]
+    assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"]
+    assert data["results"][0]["enriched_attributes"][0] == {
+        "name": "enriched_tags",
+        "value": {"zh": "tag1"},
+    }
 def test_indexer_documents_contract(indexer_client: TestClient):
@@ -515,7 +511,6 @@ def test_indexer_enrich_content_validation_max_items(indexer_client: TestClient)
         json={
             "tenant_id": "162",
             "items": [{"spu_id": str(i), "title": "x"} for i in range(51)],
-            "languages": ["zh"],
         },
     )
     assert response.status_code == 400
@@ -7,33 +7,30 @@ import pandas as pd
 from indexer.document_transformer import SPUDocumentTransformer
-def test_fill_llm_attributes_batch_calls_analyze_in_batches(monkeypatch):
+def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
     seen_calls: List[Dict[str, Any]] = []
-    def _fake_analyze_products(products, target_lang="zh", batch_size=None, tenant_id=None):
-        # should always request batch_size=20 and pass full list; internal splitter handles >20
-        seen_calls.append(
-            {
-                "n": len(products),
-                "target_lang": target_lang,
-                "batch_size": batch_size,
-                "tenant_id": tenant_id,
-            }
-        )
+    def _fake_build_index_content_fields(items, tenant_id=None):
+        seen_calls.append({"n": len(items), "tenant_id": tenant_id})
         return [
             {
-                "id": p["id"],
-                "lang": target_lang,
-                "title_input": p["title"],
-                "tags": "t1,t2",
-                "anchor_text": f"{target_lang}-anchor-{p['id']}",
+                "id": item["id"],
+                "qanchors": {
+                    "zh": [f"zh-anchor-{item['id']}"],
+                    "en": [f"en-anchor-{item['id']}"],
+                },
+                "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
+                "enriched_attributes": [
+                    {"name": "tags", "value": {"zh": "t1"}},
+                    {"name": "tags", "value": {"en": "t1"}},
+                ],
             }
-            for p in products
+            for item in items
         ]
     import indexer.document_transformer as doc_tr
-    monkeypatch.setattr(doc_tr, "analyze_products", _fake_analyze_products)
+    monkeypatch.setattr(doc_tr, "build_index_content_fields", _fake_build_index_content_fields)
     transformer = SPUDocumentTransformer(
         category_id_to_name={},
@@ -54,11 +51,11 @@ def test_fill_llm_attributes_batch_calls_analyze_in_batches(monkeypatch):
     transformer.fill_llm_attributes_batch(docs, rows)
-    # called once per language, with full list; analyze_products handles splitting
-    assert seen_calls == [
-        {"n": 45, "target_lang": "zh", "batch_size": 20, "tenant_id": "162"},
-        {"n": 45, "target_lang": "en", "batch_size": 20, "tenant_id": "162"},
-    ]
+    assert seen_calls == [{"n": 45, "tenant_id": "162"}]
-    assert docs[0]["qanchors"]["zh"] == "zh-anchor-0"
-    assert docs[0]["qanchors"]["en"] == "en-anchor-0"
+    assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"]
+    assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
+    assert docs[0]["tags"]["zh"] == ["t1", "t2"]
+    assert docs[0]["tags"]["en"] == ["t1", "t2"]
+    assert {"name": "tags", "value": {"zh": "t1"}} in docs[0]["enriched_attributes"]
+    assert {"name": "tags", "value": {"en": "t1"}} in docs[0]["enriched_attributes"]
 from math import isclose
 from config.schema import RerankFusionConfig
-from search.rerank_client import fuse_scores_and_resort
+from search.rerank_client import fuse_scores_and_resort, run_lightweight_rerank
 def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_primary():
@@ -204,3 +204,57 @@ def test_fuse_scores_and_resort_applies_knn_dismax_weights_and_tie_breaker():
     assert isclose(debug[0]["weighted_image_knn_score"], 0.5, rel_tol=1e-9)
     assert isclose(debug[0]["knn_primary_score"], 0.8, rel_tol=1e-9)
     assert isclose(debug[0]["knn_support_score"], 0.5, rel_tol=1e-9)
+
+
+def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch):
+    hits = [
+        {
+            "_id": "fine-raw-better",
+            "_score": 1.0,
+            "_source": {"title": {"en": "Alpha"}},
+            "matched_queries": {"base_query": 0.5, "knn_query": 0.0},
+        },
+        {
+            "_id": "fusion-better",
+            "_score": 1.0,
+            "_source": {"title": {"en": "Beta"}},
+            "matched_queries": {"base_query": 40.0, "knn_query": 0.0},
+        },
+    ]
+
+    monkeypatch.setattr(
+        "search.rerank_client.call_rerank_service",
+        lambda *args, **kwargs: ([0.9, 0.8], {"model": "fine-bge"}),
+    )
+
+    scores, meta, debug_rows = run_lightweight_rerank(
+        query="toy",
+        es_hits=hits,
+        language="en",
+        debug=True,
+    )
+
+    assert scores == [0.9, 0.8]
+    assert meta == {"model": "fine-bge"}
+    assert [hit["_id"] for hit in hits] == ["fusion-better", "fine-raw-better"]
+    assert hits[0]["_fine_fused_score"] > hits[1]["_fine_fused_score"]
+    assert debug_rows[0]["fusion_summary"]
+    assert "fine_score=" in debug_rows[0]["fusion_summary"]
+    assert "text_score=" in debug_rows[0]["fusion_summary"]
+
+
+def test_fuse_scores_and_resort_uses_hit_level_fine_score_when_not_passed_separately():
+    hits = [
+        {
+            "_id": "with-fine",
+            "_score": 1.0,
+            "_fine_score": 0.7,
+            "matched_queries": {"base_query": 2.0, "knn_query": 0.5},
+        }
+    ]
+
+    debug = fuse_scores_and_resort(hits, [0.8], debug=True)
+
+    assert isclose(debug[0]["fine_factor"], (0.7 + 0.00001), rel_tol=1e-9)
+    assert debug[0]["fusion_inputs"]["fine_score"] == 0.7
+    assert "fine_score=" in debug[0]["fusion_summary"]
@@ -403,10 +403,13 @@ def test_build_full_candidates_tags_and_qanchor_phrases(monkeypatch):
                         "spu_id": "900",
                         "title": {"en": "Tee", "zh": "T恤"},
                         "qanchors": {
-                            "en": "slim fit, sporty casual",
-                            "zh": "修身, 显瘦",
+                            "en": ["slim fit", "sporty casual"],
+                            "zh": ["修身", "显瘦"],
+                        },
+                        "tags": {
+                            "en": ["Classic", "ribbed neckline"],
+                            "zh": ["辣妹风"],
                         },
-                        "tags": ["Classic", "辣妹风", "ribbed neckline"],
                     },
                 }
             ]