tangwang · tangwang · tangwang · tangwang · tangwang · tangwang
Showing 137 changed files Show diff stats
.env
.env.example
CLAUDE.md
api/models.py
api/routes/indexer.py
api/translator_app.py
benchmarks/README.md
scripts/perf_api_benchmark.py -> benchmarks/perf_api_benchmark.py
scripts/perf_cases.json.example -> benchmarks/perf_cases.json.example
scripts/benchmark_reranker_1000docs.sh -> benchmarks/reranker/benchmark_reranker_1000docs.sh
scripts/benchmark_reranker_gguf_local.py -> benchmarks/reranker/benchmark_reranker_gguf_local.py
scripts/benchmark_reranker_random_titles.py -> benchmarks/reranker/benchmark_reranker_random_titles.py
tests/reranker_performance/curl1.sh -> benchmarks/reranker/manual/curl1.sh
tests/reranker_performance/curl1_simple.sh -> benchmarks/reranker/manual/curl1_simple.sh
tests/reranker_performance/curl2.sh -> benchmarks/reranker/manual/curl2.sh
tests/reranker_performance/rerank_performance_compare.sh -> benchmarks/reranker/manual/rerank_performance_compare.sh
scripts/patch_rerank_vllm_benchmark_config.py -> benchmarks/reranker/patch_rerank_vllm_benchmark_config.py
scripts/run_reranker_vllm_instruction_benchmark.sh -> benchmarks/reranker/run_reranker_vllm_instruction_benchmark.sh
scripts/smoke_qwen3_vllm_score_backend.py -> benchmarks/reranker/smoke_qwen3_vllm_score_backend.py
scripts/benchmark_nllb_t4_tuning.py -> benchmarks/translation/benchmark_nllb_t4_tuning.py
@@ -4,6 +4,7 @@
 ES_HOST=http://localhost:9200
 ES_USERNAME=saas
 ES_PASSWORD=4hOaLaf41y2VuI8y
+ES_AUTH="${ES_USERNAME}:${ES_PASSWORD}"
 # Redis Configuration (Optional) - AI 生产 10.200.16.14:6479
 REDIS_HOST=10.200.16.14
@@ -8,6 +8,7 @@
 ES_HOST=http://localhost:9200
 ES_USERNAME=saas
 ES_PASSWORD=
+ES_AUTH="${ES_USERNAME}:${ES_PASSWORD}"
 # Redis (生产默认 10.200.16.14:6479，密码见 docs/QUICKSTART.md §1.6)
 REDIS_HOST=10.200.16.14
@@ -77,9 +77,11 @@ source activate.sh
 # Generate test data (Tenant1 Mock + Tenant2 CSV)
 ./scripts/mock_data.sh
-# Ingest data to Elasticsearch
-./scripts/ingest.sh <tenant_id> [recreate]  # e.g., ./scripts/ingest.sh 1 true
-python main.py ingest data.csv --limit 1000 --batch-size 50
+# Create tenant index structure
+./scripts/create_tenant_index.sh <tenant_id>
+
+# Build / refresh suggestion index
+./scripts/build_suggestions.sh <tenant_id> --mode incremental
 ```
 ### Running Services
@@ -100,10 +102,10 @@ python main.py serve --host 0.0.0.0 --port 6002 --reload
 # Run all tests
 pytest tests/
-# Run specific test types
-pytest tests/unit/          # Unit tests
-pytest tests/integration/   # Integration tests
-pytest -m "api"             # API tests only
+# Run focused regression sets
+python -m pytest tests/ci -q
+pytest tests/test_rerank_client.py
+pytest tests/test_query_parser_mixed_language.py
 # Test search from command line
 python main.py search "query" --tenant-id 1 --size 10
@@ -114,12 +116,8 @@ python main.py search &quot;query&quot; --tenant-id 1 --size 10
 # Stop all services
 ./scripts/stop.sh
-# Test environment (for CI/development)
-./scripts/start_test_environment.sh
-./scripts/stop_test_environment.sh
-
-# Install server dependencies
-./scripts/install_server_deps.sh
+# Run CI contract tests
+./scripts/run_ci_tests.sh
 ```
 ## Architecture Overview
@@ -585,7 +583,7 @@ GET /admin/stats               # Index statistics
 ./scripts/start_frontend.sh # Frontend UI (port 6003)
 # Data Operations
-./scripts/ingest.sh <tenant_id> [recreate]  # Index data
+./scripts/create_tenant_index.sh <tenant_id> # Create tenant index
 ./scripts/mock_data.sh                    # Generate test data
 # Testing
@@ -154,7 +154,8 @@ class SearchRequest(BaseModel):
     enable_rerank: Optional[bool] = Field(
         None,
         description=(
-            "是否开启重排（调用外部重排服务对 ES 结果进行二次排序）。"
+            "是否开启最终重排（调用外部 rerank 服务改写上一阶段顺序）。"
+            "关闭时仍保留 coarse/fine 流程，仅在 rerank 阶段保序透传。"
             "不传则使用服务端配置 rerank.enabled（默认开启）。"
         )
     )
@@ -7,7 +7,7 @@
 import asyncio
 import re
 from fastapi import APIRouter, HTTPException
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Literal, Optional
 from pydantic import BaseModel, Field
 import logging
 from sqlalchemy import text
@@ -19,6 +19,11 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/indexer", tags=["indexer"])
+SUPPORTED_CATEGORY_TAXONOMY_PROFILES = (
+    "apparel, 3c, bags, pet_supplies, electronics, outdoor, "
+    "home_appliances, home_living, wigs, beauty, accessories, toys, shoes, sports, others"
+)
+
 class ReindexRequest(BaseModel):
     """全量重建索引请求"""
@@ -88,11 +93,42 @@ class EnrichContentItem(BaseModel):
 class EnrichContentRequest(BaseModel):
     """
-    内容理解字段生成请求：根据商品标题批量生成 qanchors、enriched_attributes、tags。
+    内容理解字段生成请求：根据商品标题批量生成通用增强字段与品类 taxonomy 字段。
     供外部 indexer 在自行组织 doc 时调用，与翻译、向量化等微服务并列。
     """
     tenant_id: str = Field(..., description="租户 ID，用于请求路由与结果归属，不参与缓存键")
     items: List[EnrichContentItem] = Field(..., description="待分析的 SPU 列表（spu_id + title，可附带 brief/description/image_url）")
+    enrichment_scopes: Optional[List[Literal["generic", "category_taxonomy"]]] = Field(
+        default=None,
+        description=(
+            "要执行的增强范围。"
+            "`generic` 返回 qanchors/enriched_tags/enriched_attributes；"
+            "`category_taxonomy` 返回 enriched_taxonomy_attributes。"
+            "默认两者都执行。"
+        ),
+    )
+    category_taxonomy_profile: str = Field(
+        "apparel",
+        description=(
+            "品类 taxonomy profile。默认 `apparel`。"
+            f"当前支持：{SUPPORTED_CATEGORY_TAXONOMY_PROFILES}。"
+            "其中除 `apparel` 外，其余 profile 的 taxonomy 输出仅返回 `en`。"
+        ),
+    )
+    analysis_kinds: Optional[List[Literal["content", "taxonomy"]]] = Field(
+        default=None,
+        description="Deprecated alias of enrichment_scopes. `content` -> `generic`, `taxonomy` -> `category_taxonomy`.",
+    )
+
+    def resolved_enrichment_scopes(self) -> List[str]:
+        if self.enrichment_scopes:
+            return list(self.enrichment_scopes)
+        if self.analysis_kinds:
+            mapped = []
+            for item in self.analysis_kinds:
+                mapped.append("generic" if item == "content" else "category_taxonomy")
+            return mapped
+        return ["generic", "category_taxonomy"]
 @router.post("/reindex")
@@ -440,20 +476,31 @@ async def build_docs_from_db(request: BuildDocsFromDbRequest):
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
-def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]]) -> List[Dict[str, Any]]:
+def _run_enrich_content(
+    tenant_id: str,
+    items: List[Dict[str, str]],
+    enrichment_scopes: Optional[List[str]] = None,
+    category_taxonomy_profile: str = "apparel",
+) -> List[Dict[str, Any]]:
     """
     同步执行内容理解，返回与 ES mapping 对齐的字段结构。
     语言策略由 product_enrich 内部统一决定，路由层不参与。
     """
     from indexer.product_enrich import build_index_content_fields
-    results = build_index_content_fields(items=items, tenant_id=tenant_id)
+    results = build_index_content_fields(
+        items=items,
+        tenant_id=tenant_id,
+        enrichment_scopes=enrichment_scopes,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     return [
         {
             "spu_id": item["id"],
             "qanchors": item["qanchors"],
             "enriched_attributes": item["enriched_attributes"],
             "enriched_tags": item["enriched_tags"],
+            "enriched_taxonomy_attributes": item["enriched_taxonomy_attributes"],
             **({"error": item["error"]} if item.get("error") else {}),
         }
         for item in results
@@ -463,15 +510,15 @@ def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]]) -&gt; List[Dic
 @router.post("/enrich-content")
 async def enrich_content(request: EnrichContentRequest):
     """
-    内容理解字段生成接口：根据商品标题批量生成 qanchors、enriched_attributes、tags。
+    内容理解字段生成接口：根据商品标题批量生成通用增强字段与品类 taxonomy 字段。
     使用场景：
     - 外部 indexer 采用「微服务组合」方式自己组织 doc 时，可调用本接口获取 LLM 生成的
       锚文本与语义属性，再与翻译、向量化结果合并写入 ES。
     - 与 /indexer/build-docs 解耦，避免 build-docs 因 LLM 耗时过长而阻塞；调用方可
-      先拿不含 qanchors/enriched_tags 的 doc，再异步或离线补齐本接口结果后更新 ES。
+      先拿不含 qanchors/enriched_tags/taxonomy attributes 的 doc，再异步或离线补齐本接口结果后更新 ES。
-    实现逻辑与 indexer.product_enrich.analyze_products 一致，支持多语言与 Redis 缓存。
+    实现逻辑与 indexer.product_enrich.build_index_content_fields 一致，支持多语言与 Redis 缓存。
     """
     try:
         if not request.items:
@@ -493,15 +540,20 @@ async def enrich_content(request: EnrichContentRequest):
             for it in request.items
         ]
         loop = asyncio.get_event_loop()
+        enrichment_scopes = request.resolved_enrichment_scopes()
         result = await loop.run_in_executor(
             None,
             lambda: _run_enrich_content(
                 tenant_id=request.tenant_id,
-                items=items_payload
+                items=items_payload,
+                enrichment_scopes=enrichment_scopes,
+                category_taxonomy_profile=request.category_taxonomy_profile,
             ),
         )
         return {
             "tenant_id": request.tenant_id,
+            "enrichment_scopes": enrichment_scopes,
+            "category_taxonomy_profile": request.category_taxonomy_profile,
             "results": result,
             "total": len(result),
         }
@@ -271,16 +271,20 @@ async def lifespan(_: FastAPI):
     """Initialize all enabled translation backends on process startup."""
     logger.info("Starting Translation Service API")
     service = get_translation_service()
+    failed_models = list(getattr(service, "failed_models", []))
+    backend_errors = dict(getattr(service, "backend_errors", {}))
     logger.info(
-        "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s",
+        "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s",
         service.config["default_model"],
         service.config["default_scene"],
         service.available_models,
         service.loaded_models,
+        failed_models,
     )
     logger.info(
-        "Translation backends initialized on startup | models=%s",
+        "Translation backends initialized on startup | loaded=%s failed=%s",
         service.loaded_models,
+        backend_errors,
     )
     verbose_logger.info(
         "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
@@ -316,11 +320,14 @@ async def health_check():
     """Health check endpoint."""
     try:
         service = get_translation_service()
+        failed_models = list(getattr(service, "failed_models", []))
+        backend_errors = dict(getattr(service, "backend_errors", {}))
         logger.info(
-            "Health check | default_model=%s default_scene=%s loaded_models=%s",
+            "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s",
             service.config["default_model"],
             service.config["default_scene"],
             service.loaded_models,
+            failed_models,
         )
         return {
             "status": "healthy",
@@ -330,6 +337,8 @@ async def health_check():
             "available_models": service.available_models,
             "enabled_capabilities": get_enabled_translation_models(service.config),
             "loaded_models": service.loaded_models,
+            "failed_models": failed_models,
+            "backend_errors": backend_errors,
         }
     except Exception as e:
         logger.error(f"Health check failed: {e}")
@@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request):
         latency_ms = (time.perf_counter() - request_started) * 1000
         logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms)
         raise HTTPException(status_code=400, detail=str(e)) from e
+    except RuntimeError as e:
+        latency_ms = (time.perf_counter() - request_started) * 1000
+        logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms)
+        raise HTTPException(status_code=503, detail=str(e)) from e
     except Exception as e:
         latency_ms = (time.perf_counter() - request_started) * 1000
         logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
@@ -0,0 +1,17 @@
+# Benchmarks
+
+基准压测脚本统一放在 `benchmarks/`，不再和 `scripts/` 里的服务启动/运维脚本混放。
+
+目录约定：
+
+- `benchmarks/perf_api_benchmark.py`：通用 HTTP 接口压测入口
+- `benchmarks/reranker/`：reranker 定向 benchmark、smoke、手工对比脚本
+- `benchmarks/translation/`：translation 本地模型 benchmark
+
+这些脚本默认不是 CI 测试的一部分，因为它们通常具备以下特征：
+
+- 依赖真实服务、GPU、模型或特定数据集
+- 结果受机器配置和运行时负载影响，不适合作为稳定回归门禁
+- 更多用于容量评估、调参和问题复现，而不是功能正确性判定
+
+如果某个性能场景需要进入自动化回归，应新增到 `tests/` 下并明确收敛输入、环境和判定阈值，而不是直接复用这里的基准脚本。
@@ -11,13 +11,13 @@ Default scenarios (aligned with docs/搜索API对接指南 分册，如 -01 / -0
 - rerank              POST /rerank
 Examples:
-  python scripts/perf_api_benchmark.py --scenario backend_search --duration 30 --concurrency 20 --tenant-id 162
-  python scripts/perf_api_benchmark.py --scenario backend_suggest --duration 30 --concurrency 50 --tenant-id 162
-  python scripts/perf_api_benchmark.py --scenario all --duration 60 --concurrency 80 --tenant-id 162
-  python scripts/perf_api_benchmark.py --scenario all --cases-file scripts/perf_cases.json.example --output perf_result.json
+  python benchmarks/perf_api_benchmark.py --scenario backend_search --duration 30 --concurrency 20 --tenant-id 162
+  python benchmarks/perf_api_benchmark.py --scenario backend_suggest --duration 30 --concurrency 50 --tenant-id 162
+  python benchmarks/perf_api_benchmark.py --scenario all --duration 60 --concurrency 80 --tenant-id 162
+  python benchmarks/perf_api_benchmark.py --scenario all --cases-file benchmarks/perf_cases.json.example --output perf_result.json
   # Embedding admission / priority (query param `priority`; same semantics as embedding service):
-  python scripts/perf_api_benchmark.py --scenario embed_text --embed-text-priority 1 --duration 30 --concurrency 20
-  python scripts/perf_api_benchmark.py --scenario embed_image --embed-image-priority 1 --duration 30 --concurrency 10
+  python benchmarks/perf_api_benchmark.py --scenario embed_text --embed-text-priority 1 --duration 30 --concurrency 20
+  python benchmarks/perf_api_benchmark.py --scenario embed_image --embed-image-priority 1 --duration 30 --concurrency 10
 """
 from __future__ import annotations
@@ -229,7 +229,7 @@ def apply_embed_priority_params(
 ) -> None:
     """
     Merge default `priority` query param into embed templates when absent.
-    `scripts/perf_cases.json` may set per-request `params.priority` to override.
+    `benchmarks/perf_cases.json` may set per-request `params.priority` to override.
     """
     mapping = {
         "embed_text": max(0, int(embed_text_priority)),
@@ -8,7 +8,7 @@
 # Outputs JSON reports under perf_reports/<date>/reranker_1000docs/
 #
 # Usage:
-#   ./scripts/benchmark_reranker_1000docs.sh
+#   ./benchmarks/reranker/benchmark_reranker_1000docs.sh
 # Optional env:
 #   BATCH_SIZES="24 32 48 64"
 #   C1_REQUESTS=4
@@ -85,7 +85,7 @@ run_bench() {
   local c="$2"
   local req="$3"
   local out="${OUT_DIR}/rerank_bs${bs}_c${c}_r${req}.json"
-  .venv/bin/python scripts/perf_api_benchmark.py \
+  .venv/bin/python benchmarks/perf_api_benchmark.py \
     --scenario rerank \
     --tenant-id "${TENANT_ID}" \
     --reranker-base "${RERANK_BASE}" \
@@ -8,8 +8,8 @@ Runs the backend directly in a fresh process per config to measure:
 - single-request rerank latency
 Example:
-  ./.venv-reranker-gguf/bin/python scripts/benchmark_reranker_gguf_local.py
-  ./.venv-reranker-gguf-06b/bin/python scripts/benchmark_reranker_gguf_local.py --backend-name qwen3_gguf_06b --docs 400
+  ./.venv-reranker-gguf/bin/python benchmarks/reranker/benchmark_reranker_gguf_local.py
+  ./.venv-reranker-gguf-06b/bin/python benchmarks/reranker/benchmark_reranker_gguf_local.py --backend-name qwen3_gguf_06b --docs 400
 """
 from __future__ import annotations
@@ -10,10 +10,10 @@ Each invocation runs 3 warmup requests with n=400 first; those are not timed for
 Example:
   source activate.sh
-  python scripts/benchmark_reranker_random_titles.py 386
-  python scripts/benchmark_reranker_random_titles.py 40,80,100
-  python scripts/benchmark_reranker_random_titles.py 40,80,100 --repeat 3 --seed 42
-  RERANK_BASE=http://127.0.0.1:6007 python scripts/benchmark_reranker_random_titles.py 200
+  python benchmarks/reranker/benchmark_reranker_random_titles.py 386
+  python benchmarks/reranker/benchmark_reranker_random_titles.py 40,80,100
+  python benchmarks/reranker/benchmark_reranker_random_titles.py 40,80,100 --repeat 3 --seed 42
+  RERANK_BASE=http://127.0.0.1:6007 python benchmarks/reranker/benchmark_reranker_random_titles.py 200
 """
 from __future__ import annotations
@@ -73,7 +73,7 @@ def main() -&gt; int:
     p.add_argument(
         "--config",
         type=Path,
-        default=Path(__file__).resolve().parent.parent / "config" / "config.yaml",
+        default=Path(__file__).resolve().parents[2] / "config" / "config.yaml",
     )
     p.add_argument("--backend", choices=("qwen3_vllm", "qwen3_vllm_score"), required=True)
     p.add_argument(
@@ -55,13 +55,13 @@ run_one() {
   local jf="${OUT_DIR}/${backend}_${fmt}.json"
   echo "========== ${tag} =========="
-  "$PYTHON" "${ROOT}/scripts/patch_rerank_vllm_benchmark_config.py" \
+  "$PYTHON" "${ROOT}/benchmarks/reranker/patch_rerank_vllm_benchmark_config.py" \
     --backend "$backend" --instruction-format "$fmt"
   "${ROOT}/restart.sh" reranker
   wait_health "$backend" "$fmt"
-  if ! "$PYTHON" "${ROOT}/scripts/benchmark_reranker_random_titles.py" \
+  if ! "$PYTHON" "${ROOT}/benchmarks/reranker/benchmark_reranker_random_titles.py" \
     100,200,400,600,800,1000 \
     --repeat 5 \
     --seed 42 \
@@ -82,7 +82,7 @@ run_one qwen3_vllm_score compact
 run_one qwen3_vllm_score standard
 # Restore repo-default-style rerank settings (score + compact).
-"$PYTHON" "${ROOT}/scripts/patch_rerank_vllm_benchmark_config.py" \
+"$PYTHON" "${ROOT}/benchmarks/reranker/patch_rerank_vllm_benchmark_config.py" \
   --backend qwen3_vllm_score --instruction-format compact
 "${ROOT}/restart.sh" reranker
 wait_health qwen3_vllm_score compact
@@ -3,7 +3,7 @@
 Smoke test: load Qwen3VLLMScoreRerankerBackend (must run as a file, not stdin — vLLM spawn).
 Usage (from repo root, score venv):
-  PYTHONPATH=. ./.venv-reranker-score/bin/python scripts/smoke_qwen3_vllm_score_backend.py
+  PYTHONPATH=. ./.venv-reranker-score/bin/python benchmarks/reranker/smoke_qwen3_vllm_score_backend.py
 Same as production: vLLM child processes need the venv's ``bin`` on PATH (for pip's ``ninja`` when
 vLLM auto-selects FLASHINFER on T4/Turing). ``start_reranker.sh`` exports that; this script prepends
@@ -20,8 +20,8 @@ import sys
 import sysconfig
 from pathlib import Path
-# Repo root on sys.path when run as scripts/smoke_*.py
-_ROOT = Path(__file__).resolve().parents[1]
+# Repo root on sys.path when run from benchmarks/reranker/.
+_ROOT = Path(__file__).resolve().parents[2]
 if str(_ROOT) not in sys.path:
     sys.path.insert(0, str(_ROOT))
@@ -11,12 +11,12 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
 if str(PROJECT_ROOT) not in sys.path:
     sys.path.insert(0, str(PROJECT_ROOT))
 from config.services_config import get_translation_config
-from scripts.benchmark_translation_local_models import (
+from benchmarks.translation.benchmark_translation_local_models import (
     benchmark_concurrency_case,
     benchmark_serial_case,
     build_environment_info,
@@ -22,7 +22,7 @@ from typing import Any, Dict, Iterable, List, Sequence
 import torch
 import transformers
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
 if str(PROJECT_ROOT) not in sys.path:
     sys.path.insert(0, str(PROJECT_ROOT))
@@ -11,12 +11,12 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
 if str(PROJECT_ROOT) not in sys.path:
     sys.path.insert(0, str(PROJECT_ROOT))
 from config.services_config import get_translation_config
-from scripts.benchmark_translation_local_models import (
+from benchmarks.translation.benchmark_translation_local_models import (
     SCENARIOS,
     benchmark_concurrency_case,
     benchmark_serial_case,
@@ -13,7 +13,7 @@ from pathlib import Path
 import torch
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
 import sys
-# Unified Configuration for Multi-Tenant Search Engine
-# 统一配置文件，所有租户共用一套配置
-# 注意：索引结构由 mappings/search_products.json 定义，此文件只配置搜索行为
-#
-# 约定：下列键为必填；进程环境变量可覆盖 infrastructure / runtime 中同名语义项
-#（如 ES_HOST、API_PORT 等），未设置环境变量时使用本文件中的值。
-
-# Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义)
 runtime:
   environment: prod
   index_namespace: ''
@@ -21,8 +13,6 @@ runtime:
   translator_port: 6006
   reranker_host: 0.0.0.0
   reranker_port: 6007
-
-# 基础设施连接（敏感项优先读环境变量：ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY）
 infrastructure:
   elasticsearch:
     host: http://localhost:9200
@@ -49,23 +39,12 @@ infrastructure:
   secrets:
     dashscope_api_key: null
     deepl_auth_key: null
-
-# Elasticsearch Index
 es_index_name: search_products
-
-# 检索域 / 索引列表（可为空列表；每项字段均需显式给出）
 indexes: []
-
-# Config assets
 assets:
   query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict
-
-# Product content understanding (LLM enrich-content) configuration
 product_enrich:
   max_workers: 40
-
-# 离线 / Web 相关性评估（scripts/evaluation、eval-web）
-# CLI 未显式传参时使用此处默认值；search_base_url 未配置时自动为 http://127.0.0.1:{runtime.api_port}
 search_evaluation:
   artifact_root: artifacts/search_evaluation
   queries_file: scripts/evaluation/queries/queries.txt
@@ -74,10 +53,10 @@ search_evaluation:
   search_base_url: ''
   web_host: 0.0.0.0
   web_port: 6010
-  judge_model: qwen3.5-plus
+  judge_model: qwen3.6-plus
   judge_enable_thinking: false
   judge_dashscope_batch: false
-  intent_model: qwen3-max
+  intent_model: qwen3.6-plus
   intent_enable_thinking: true
   judge_batch_completion_window: 24h
   judge_batch_poll_interval_sec: 10.0
@@ -98,20 +77,17 @@ search_evaluation:
   rebuild_irrelevant_stop_ratio: 0.799
   rebuild_irrel_low_combined_stop_ratio: 0.959
   rebuild_irrelevant_stop_streak: 3
-
-# ES Index Settings (基础设置)
 es_settings:
   number_of_shards: 1
   number_of_replicas: 0
   refresh_interval: 30s
-# 字段权重配置（用于搜索时的字段boost）
-# 统一按“字段基名”配置；查询时按实际检索语言动态拼接 .{lang}。
-# 若需要按某个语言单独调权，也可以加显式 key（例如 title.de: 3.2）。
+# 统一按“字段基名”配置；查询时按实际检索语言动态拼接 .{lang}
 field_boosts:
   title: 3.0
-  qanchors: 1.8
-  enriched_tags: 1.8
+  # qanchors enriched_tags 在 enriched_attributes.value中也存在，所以其实他的权重为自身权重+enriched_attributes.value的权重
+  qanchors: 1.0
+  enriched_tags: 1.0
   enriched_attributes.value: 1.5
   category_name_text: 2.0
   category_path: 2.0
@@ -124,38 +100,25 @@ field_boosts:
   description: 1.0
   vendor: 1.0
-# Query Configuration（查询配置）
 query_config:
-  # 支持的语言
   supported_languages:
   - zh
   - en
   default_language: en
-
-  # 功能开关（翻译开关由tenant_config控制）
   enable_text_embedding: true
   enable_query_rewrite: true
-  # 查询翻译模型（须与 services.translation.capabilities 中某项一致）
-  # 源语种在租户 index_languages 内：主召回可打在源语种字段，用下面三项。
-  zh_to_en_model: nllb-200-distilled-600m  # "opus-mt-zh-en"
-  en_to_zh_model: nllb-200-distilled-600m  # "opus-mt-en-zh"
-  default_translation_model: nllb-200-distilled-600m
-  # zh_to_en_model: deepl
-  # en_to_zh_model: deepl
-  # default_translation_model: deepl
-  # 源语种不在 index_languages：翻译对可检索文本更关键，可单独指定（缺省则与上一组相同）
-  zh_to_en_model__source_not_in_index: nllb-200-distilled-600m
-  en_to_zh_model__source_not_in_index: nllb-200-distilled-600m
-  default_translation_model__source_not_in_index: nllb-200-distilled-600m
-  # zh_to_en_model__source_not_in_index: deepl
-  # en_to_zh_model__source_not_in_index: deepl
-  # default_translation_model__source_not_in_index: deepl
+  zh_to_en_model: deepl # nllb-200-distilled-600m
+  en_to_zh_model: deepl
+  default_translation_model: deepl
+  # 源语种不在 index_languages时翻译质量比较重要，因此单独配置
+  zh_to_en_model__source_not_in_index: deepl
+  en_to_zh_model__source_not_in_index: deepl
+  default_translation_model__source_not_in_index: deepl
-  # 查询解析阶段：翻译与 query 向量并发执行，共用同一等待预算（毫秒）。
-  # 检测语言已在租户 index_languages 内：较短；不在索引语言内：较长（翻译对召回更关键）。
-  translation_embedding_wait_budget_ms_source_in_index: 300  # 80
-  translation_embedding_wait_budget_ms_source_not_in_index: 400  # 200
+  # 查询解析阶段：翻译与 query 向量并发执行，共用同一等待预算（毫秒）
+  translation_embedding_wait_budget_ms_source_in_index: 300
+  translation_embedding_wait_budget_ms_source_not_in_index: 400
   style_intent:
     enabled: true
     selected_sku_boost: 1.2
@@ -182,17 +145,15 @@ query_config:
   product_title_exclusion:
     enabled: true
     dictionary_path: config/dictionaries/product_title_exclusion.tsv
-
-  # 动态多语言检索字段配置
-  # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式；
-  # shared_fields 为无语言后缀字段。
   search_fields:
+    # 统一按“字段基名”配置；查询时按实际检索语言动态拼接 .{lang}
     multilingual_fields:
     - title
     - keywords
     - qanchors
     - enriched_tags
     - enriched_attributes.value
+    # - enriched_taxonomy_attributes.value
     - option1_values
     - option2_values
     - option3_values
@@ -202,13 +163,14 @@ query_config:
     # - description
     # - vendor
     # shared_fields: 无语言后缀字段；示例: tags, option1_values, option2_values, option3_values
+
     shared_fields: null
     core_multilingual_fields:
     - title
     - qanchors
     - category_name_text
-  # 统一文本召回策略（主查询 + 翻译查询）
+  # 文本召回（主查询 + 翻译查询）
   text_query_strategy:
     base_minimum_should_match: 60%
     translation_minimum_should_match: 60%
@@ -223,14 +185,10 @@ query_config:
       title: 5.0
       qanchors: 4.0
     phrase_match_boost: 3.0
-
-  # Embedding字段名称
   text_embedding_field: title_embedding
   image_embedding_field: image_embedding.vector
-  # 返回字段配置（_source includes）
-  # null表示返回所有字段，[]表示不返回任何字段，列表表示只返回指定字段
-  # 下列字段与 api/result_formatter.py（SpuResult 填充）及 search/searcher.py（SKU 排序/主图替换）一致
+  # null表示返回所有字段，[]表示不返回任何字段
   source_fields:
   - spu_id
   - handle
@@ -251,6 +209,8 @@ query_config:
   # - qanchors
   # - enriched_tags
   # - enriched_attributes
+  # - # enriched_taxonomy_attributes.value
+
   - min_price
   - compare_at_price
   - image_url
@@ -270,26 +230,21 @@ query_config:
   # KNN：文本向量与多模态（图片）向量各自 boost 与召回（k / num_candidates）
   knn_text_boost: 4
   knn_image_boost: 4
-
-  # knn_text_num_candidates = k * 3.4
   knn_text_k: 160
-  knn_text_num_candidates: 560
+  knn_text_num_candidates: 560 #  k * 3.4
   knn_text_k_long: 400
   knn_text_num_candidates_long: 1200
   knn_image_k: 400
   knn_image_num_candidates: 1200
-# Function Score配置（ES层打分规则）
 function_score:
   score_mode: sum
   boost_mode: multiply
   functions: []
-
-# 粗排配置（仅融合 ES 文本/向量信号，不调用模型）
 coarse_rank:
   enabled: true
-  input_window: 700
-  output_window: 240
+  input_window: 480
+  output_window: 160
   fusion:
     es_bias: 10.0
     es_exponent: 0.05
@@ -301,30 +256,29 @@ coarse_rank:
     knn_text_weight: 1.0
     knn_image_weight: 2.0
     knn_tie_breaker: 0.3
-    knn_bias: 0.6
-    knn_exponent: 0.4
-
-# 精排配置（轻量 reranker）
+    knn_bias: 0.0
+    knn_exponent: 5.6
+    knn_text_exponent: 0.0
+    knn_image_exponent: 0.0
 fine_rank:
-  enabled: false
+  enabled: false # false 时保序透传
   input_window: 160
   output_window: 80
   timeout_sec: 10.0
   rerank_query_template: '{query}'
   rerank_doc_template: '{title}'
   service_profile: fine
-
-# 重排配置（provider/URL 在 services.rerank）
 rerank:
-  enabled: true
+  enabled: false # false 时保序透传
   rerank_window: 160
+  exact_knn_rescore_enabled: true
+  exact_knn_rescore_window: 160
   timeout_sec: 15.0
   weight_es: 0.4
   weight_ai: 0.6
   rerank_query_template: '{query}'
   rerank_doc_template: '{title}'
   service_profile: default
-
   # 乘法融合：fused = Π (max(score,0) + bias) ** exponent（es / rerank / fine / text / knn）
   # 其中 knn_score 先做一层 dis_max：
   #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
@@ -337,30 +291,28 @@ rerank:
     fine_bias: 0.1
     fine_exponent: 1.0
     text_bias: 0.1
-    text_exponent: 0.25
     # base_query_trans_* 相对 base_query 的权重（见 search/rerank_client 中文本 dismax 融合）
+    text_exponent: 0.25
     text_translation_weight: 0.8
     knn_text_weight: 1.0
     knn_image_weight: 2.0
     knn_tie_breaker: 0.3
-    knn_bias: 0.6
-    knn_exponent: 0.4
+    knn_bias: 0.0
+    knn_exponent: 5.6
-# 可扩展服务/provider 注册表（单一配置源）
 services:
   translation:
     service_url: http://127.0.0.1:6006
-    # default_model: nllb-200-distilled-600m
     default_model: nllb-200-distilled-600m
     default_scene: general
     timeout_sec: 10.0
     cache:
       ttl_seconds: 62208000
       sliding_expiration: true
-      # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
-      enable_model_quality_tier_cache: true
+      # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups)
       # Higher tier = better quality. Multiple models may share one tier (同级).
       # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
+      enable_model_quality_tier_cache: true
       model_quality_tiers:
         deepl: 30
         qwen-mt: 30
@@ -454,13 +406,12 @@ services:
         num_beams: 1
         use_cache: true
   embedding:
-    provider: http  # http
+    provider: http
     providers:
       http:
         text_base_url: http://127.0.0.1:6005
         image_base_url: http://127.0.0.1:6008
-    # 服务内文本后端（embedding 进程启动时读取）
-    backend: tei  # tei | local_st
+    backend: tei
     backends:
       tei:
         base_url: http://127.0.0.1:8080
@@ -500,13 +451,13 @@ services:
     request:
       max_docs: 1000
       normalize: true
-    default_instance: default
     # 命名实例：同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
+    default_instance: default
     instances:
       default:
         host: 0.0.0.0
         port: 6007
-        backend: qwen3_vllm_score
+        backend: bge
         runtime_dir: ./.runtime/reranker/default
       fine:
         host: 0.0.0.0
@@ -543,6 +494,7 @@ services:
         enforce_eager: false
         infer_batch_size: 100
         sort_by_doc_length: true
+
         # standard=_format_instruction__standard（固定 yes/no system）；compact=_format_instruction（instruction 作 system 且 user 内重复 Instruct）
         instruction_format: standard  # compact standard
         # instruction: "Given a query, score the product for relevance"
@@ -556,6 +508,7 @@ services:
         # instruction: "Rank products by query with category & style match prioritized"
         # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
         instruction: rank products by given query
+
       # vLLM LLM.score()（跨编码打分）。独立高性能环境 .venv-reranker-score（vllm 0.18 固定版）：./scripts/setup_reranker_venv.sh qwen3_vllm_score
       # 与 qwen3_vllm 可共用同一 model_name / HF 缓存；venv 分离以便升级 vLLM 而不影响 generate 后端。
       qwen3_vllm_score:
@@ -583,15 +536,10 @@ services:
       qwen3_transformers:
         model_name: Qwen/Qwen3-Reranker-0.6B
         instruction: rank products by given query
-        # instruction: "Score the product’s relevance to the given query"
         max_length: 8192
         batch_size: 64
         use_fp16: true
-        # sdpa：默认无需 flash-attn；若已安装 flash_attn 可改为 flash_attention_2
         attn_implementation: sdpa
-      # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask.
-      # For 1 query + many short docs (for example 400 product titles), this usually reduces
-      # repeated prefix work and padding waste compared with pairwise batching.
       qwen3_transformers_packed:
         model_name: Qwen/Qwen3-Reranker-0.6B
         instruction: Rank products by query with category & style match prioritized
@@ -600,8 +548,6 @@ services:
         max_docs_per_pack: 0
         use_fp16: true
         sort_by_doc_length: true
-        # Packed mode relies on a custom 4D attention mask. "eager" is the safest default.
-        # If your torch/transformers stack validates it, you can benchmark "sdpa".
         attn_implementation: eager
       qwen3_gguf:
         repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF
@@ -609,7 +555,6 @@ services:
         cache_dir: ./model_cache
         local_dir: ./models/reranker/qwen3-reranker-4b-gguf
         instruction: Rank products by query with category & style match prioritized
-        # T4 16GB / 性能优先配置：全量层 offload，实测比保守配置明显更快
         n_ctx: 512
         n_batch: 512
         n_ubatch: 512
@@ -632,8 +577,6 @@ services:
         cache_dir: ./model_cache
         local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf
         instruction: Rank products by query with category & style match prioritized
-        # 0.6B GGUF / online rerank baseline:
-        # 实测 400 titles 单请求约 265s，因此它更适合作为低显存功能后备，不适合在线低延迟主路由。
         n_ctx: 256
         n_batch: 256
         n_ubatch: 256
@@ -653,20 +596,15 @@ services:
         verbose: false
       dashscope_rerank:
         model_name: qwen3-rerank
-        # 按地域选择 endpoint:
-        # 中国:   https://dashscope.aliyuncs.com/compatible-api/v1/reranks
-        # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
-        # 美国:   https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
         endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
         api_key_env: RERANK_DASHSCOPE_API_KEY_CN
         timeout_sec: 10.0
-        top_n_cap: 0   # 0 表示 top_n=当前请求文档数；>0 则限制 top_n 上限
-        batchsize: 64  # 0 关闭；>0 启用并发小包调度（top_n/top_n_cap 仍生效，分包后全局截断）
+        top_n_cap: 0 # 0 表示 top_n=当前请求文档数
+        batchsize: 64 # 0 关闭；>0 启用并发小包调度（top_n/top_n_cap 仍生效，分包后全局截断）
         instruct: Given a shopping query, rank product titles by relevance
         max_retries: 2
         retry_backoff_sec: 0.2
-# SPU配置（已启用，使用嵌套skus）
 spu_config:
   enabled: true
   spu_field: spu_id
@@ -678,7 +616,6 @@ spu_config:
   - option2
   - option3
-# 租户配置（Tenant Configuration）
 # 每个租户可配置主语言 primary_language 与索引语言 index_languages（主市场语言，商家可勾选）
 # 默认 index_languages: [en, zh]，可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
 tenant_config:
@@ -587,6 +587,14 @@ class AppConfigLoader:
                     knn_tie_breaker=float(coarse_fusion_raw.get("knn_tie_breaker", 0.0)),
                     knn_bias=float(coarse_fusion_raw.get("knn_bias", 0.6)),
                     knn_exponent=float(coarse_fusion_raw.get("knn_exponent", 0.2)),
+                    knn_text_bias=float(
+                        coarse_fusion_raw.get("knn_text_bias", coarse_fusion_raw.get("knn_bias", 0.6))
+                    ),
+                    knn_text_exponent=float(coarse_fusion_raw.get("knn_text_exponent", 0.0)),
+                    knn_image_bias=float(
+                        coarse_fusion_raw.get("knn_image_bias", coarse_fusion_raw.get("knn_bias", 0.6))
+                    ),
+                    knn_image_exponent=float(coarse_fusion_raw.get("knn_image_exponent", 0.0)),
                     text_translation_weight=float(
                         coarse_fusion_raw.get("text_translation_weight", 0.8)
                     ),
@@ -608,6 +616,12 @@ class AppConfigLoader:
             rerank=RerankConfig(
                 enabled=bool(rerank_cfg.get("enabled", True)),
                 rerank_window=int(rerank_cfg.get("rerank_window", 384)),
+                exact_knn_rescore_enabled=bool(
+                    rerank_cfg.get("exact_knn_rescore_enabled", False)
+                ),
+                exact_knn_rescore_window=int(
+                    rerank_cfg.get("exact_knn_rescore_window", 0)
+                ),
                 timeout_sec=float(rerank_cfg.get("timeout_sec", 15.0)),
                 weight_es=float(rerank_cfg.get("weight_es", 0.4)),
                 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)),
@@ -630,6 +644,14 @@ class AppConfigLoader:
                     knn_tie_breaker=float(fusion_raw.get("knn_tie_breaker", 0.0)),
                     knn_bias=float(fusion_raw.get("knn_bias", 0.6)),
                     knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)),
+                    knn_text_bias=float(
+                        fusion_raw.get("knn_text_bias", fusion_raw.get("knn_bias", 0.6))
+                    ),
+                    knn_text_exponent=float(fusion_raw.get("knn_text_exponent", 0.0)),
+                    knn_image_bias=float(
+                        fusion_raw.get("knn_image_bias", fusion_raw.get("knn_bias", 0.6))
+                    ),
+                    knn_image_exponent=float(fusion_raw.get("knn_image_exponent", 0.0)),
                     fine_bias=float(fusion_raw.get("fine_bias", 0.00001)),
                     fine_exponent=float(fusion_raw.get("fine_exponent", 1.0)),
                     text_translation_weight=float(
@@ -655,6 +677,14 @@ class AppConfigLoader:
         translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {}
         normalized_translation = build_translation_config(translation_raw)
+        local_translation_backends = {"local_nllb", "local_marian"}
+        for capability_name, capability_cfg in normalized_translation["capabilities"].items():
+            backend_name = str(capability_cfg.get("backend") or "").strip().lower()
+            if backend_name not in local_translation_backends:
+                continue
+            for path_key in ("model_dir", "ct2_model_dir"):
+                if capability_cfg.get(path_key) not in (None, ""):
+                    capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve())
         translation_config = TranslationServiceConfig(
             endpoint=str(normalized_translation["service_url"]).rstrip("/"),
             timeout_sec=float(normalized_translation["timeout_sec"]),
@@ -749,7 +779,7 @@ class AppConfigLoader:
                 port=port,
                 backend=backend_name,
                 runtime_dir=(
-                    str(v)
+                    str(self._resolve_project_path_value(v).resolve())
                     if (v := instance_raw.get("runtime_dir")) not in (None, "")
                     else None
                 ),
@@ -787,6 +817,12 @@ class AppConfigLoader:
             rerank=rerank_config,
         )
+    def _resolve_project_path_value(self, value: Any) -> Path:
+        candidate = Path(str(value)).expanduser()
+        if candidate.is_absolute():
+            return candidate
+        return self.project_root / candidate
+
     def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig:
         if not isinstance(raw, dict):
             raise ConfigurationError("tenant_config must be a mapping")
@@ -119,6 +119,18 @@ class RerankFusionConfig:
     knn_tie_breaker: float = 0.0
     knn_bias: float = 0.6
     knn_exponent: float = 0.2
+    #: Optional additive floor for the weighted text KNN term.
+    #: Falls back to knn_bias when omitted in config loading.
+    knn_text_bias: float = 0.6
+    #: Optional extra multiplicative term on weighted text KNN.
+    #: Uses knn_text_bias as the additive floor.
+    knn_text_exponent: float = 0.0
+    #: Optional additive floor for the weighted image KNN term.
+    #: Falls back to knn_bias when omitted in config loading.
+    knn_image_bias: float = 0.6
+    #: Optional extra multiplicative term on weighted image KNN.
+    #: Uses knn_image_bias as the additive floor.
+    knn_image_exponent: float = 0.0
     fine_bias: float = 0.00001
     fine_exponent: float = 1.0
     #: 翻译子句 named query 分数相对原文 base_query 的权重（加权后再与原文做 dismax 融合）
@@ -143,6 +155,18 @@ class CoarseRankFusionConfig:
     knn_tie_breaker: float = 0.0
     knn_bias: float = 0.6
     knn_exponent: float = 0.2
+    #: Optional additive floor for the weighted text KNN term.
+    #: Falls back to knn_bias when omitted in config loading.
+    knn_text_bias: float = 0.6
+    #: Optional extra multiplicative term on weighted text KNN.
+    #: Uses knn_text_bias as the additive floor.
+    knn_text_exponent: float = 0.0
+    #: Optional additive floor for the weighted image KNN term.
+    #: Falls back to knn_bias when omitted in config loading.
+    knn_image_bias: float = 0.6
+    #: Optional extra multiplicative term on weighted image KNN.
+    #: Uses knn_image_bias as the additive floor.
+    knn_image_exponent: float = 0.0
     #: 翻译子句 named query 分数相对原文 base_query 的权重（加权后再与原文做 dismax 融合）
     text_translation_weight: float = 0.8
@@ -176,6 +200,9 @@ class RerankConfig:
     enabled: bool = True
     rerank_window: int = 384
+    exact_knn_rescore_enabled: bool = False
+    #: topN exact vector scoring window; <=0 means "follow rerank_window"
+    exact_knn_rescore_window: int = 0
     timeout_sec: float = 15.0
     weight_es: float = 0.4
     weight_ai: float = 0.6
@@ -389,7 +389,7 @@ services:
 - **位置**：`tests/`，可按 `unit/`、`integration/` 或按模块划分子目录；公共 fixture 在 `conftest.py`。
 - **标记**：使用 `@pytest.mark.unit`、`@pytest.mark.integration`、`@pytest.mark.api` 等区分用例类型，便于按需运行。
 - **依赖**：单元测试通过 mock（如 `mock_es_client`、`sample_search_config`）不依赖真实 ES/DB；集成测试需在说明中注明依赖服务。
-- **运行**：`python -m pytest tests/`；仅单元：`python -m pytest tests/unit/` 或 `-m unit`。
+- **运行**：`python -m pytest tests/`；推荐最小回归：`python -m pytest tests/ci -q`；按模块聚焦可直接指定具体测试文件。
 - **原则**：新增逻辑应有对应测试；修改协议或配置契约时更新相关测试与 fixture。
 ### 8.3 配置与环境
@@ -69,7 +69,7 @@ source activate.sh
 ./run.sh all
 # 仅为薄封装：等价于 ./scripts/service_ctl.sh up all
 # 说明：
-# - all = tei cnclip embedding embedding-image translator reranker reranker-fine backend indexer frontend eval-web
+# - all = tei cnclip embedding embedding-image translator reranker backend indexer frontend eval-web
 # - up 会同时启动 monitor daemon（运行期连续失败自动重启）
 # - reranker 为 GPU 强制模式（资源不足会直接启动失败）
 # - TEI 默认使用 GPU；当 TEI_DEVICE=cuda 且 GPU 不可用时会直接失败（不会自动降级到 CPU）
@@ -166,7 +166,7 @@ curl -X POST http://localhost:6008/embed/image \
 ```bash
 ./scripts/setup_translator_venv.sh
-./.venv-translator/bin/python scripts/download_translation_models.py --all-local   # 如需本地模型
+./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local   # 如需本地模型
 ./scripts/start_translator.sh
 curl -X POST http://localhost:6006/translate \
@@ -126,7 +126,7 @@ cd /data/saas-search
 这个脚本会自动：
 1. 创建日志目录
-2. 按目标启动服务（`all`：`tei cnclip embedding embedding-image translator reranker reranker-fine backend indexer frontend eval-web`）
+2. 按目标启动服务（`all`：`tei cnclip embedding embedding-image translator reranker backend indexer frontend eval-web`）
 3. 写入 PID 到 `logs/*.pid`
 4. 执行健康检查
 5. 启动 monitor daemon（运行期连续失败自动重启）
@@ -202,7 +202,7 @@ python -m pytest -q tests/test_rerank_client.py tests/test_es_query_builder.py t
 ./scripts/service_ctl.sh restart backend
 sleep 3
 ./scripts/service_ctl.sh status backend
-./scripts/evaluation/start_eval.sh.sh batch
+./scripts/evaluation/start_eval.sh batch
 ```
 离线批量评估会把标注与报表写到 `artifacts/search_evaluation/`（SQLite、`batch_reports/` 下的 JSON/Markdown 等）。说明与命令见 [scripts/evaluation/README.md](../scripts/evaluation/README.md)。
@@ -0,0 +1,133 @@
+# 本项目缓存一览
+
+本文档梳理仓库内**与业务相关的各类缓存**：说明用途、键与过期策略，并汇总运维脚本。按「分布式（Redis）→ 进程内 → 磁盘/模型 → 第三方」组织。
+
+---
+
+## 一、Redis 集中式缓存（生产主路径）
+
+所有下列缓存默认连接 **`infrastructure.redis`**（`config/config.yaml` 与 `REDIS_*` 环境变量），**数据库编号一般为 `db=0`**（脚本可通过参数覆盖）。`snapshot_db` 仅在配置中存在，供快照/运维场景选用，应用代码未按该字段切换业务缓存的 DB。
+
+### 1. 文本 / 图像向量缓存（Embedding）
+
+- **作用**：缓存 BGE/TEI 文本向量与 CN-CLIP 图像向量、CLIP 文本塔向量，避免重复推理。
+- **实现**：`embeddings/redis_embedding_cache.py` 的 `RedisEmbeddingCache`；键构造见 `embeddings/cache_keys.py`。
+- **Key 形态**（最终 Redis 键 = `前缀` + `可选 namespace` + `逻辑键`）：
+  - **前缀**：`infrastructure.redis.embedding_cache_prefix`（默认 `embedding`，可用 `REDIS_EMBEDDING_CACHE_PREFIX` 覆盖）。
+  - **命名空间**：`embeddings/server.py` 与客户端中分为：
+    - 文本：`namespace=""` → `{prefix}:{embed:norm0|1:...}`
+    - 图像：`namespace="image"` → `{prefix}:image:{embed:模型名:txt:norm0|1:...}`
+    - CLIP 文本：`namespace="clip_text"` → `{prefix}:clip_text:{embed:模型名:img:norm0|1:...}`
+  - 逻辑键段含 `embed:`、`norm0/1`、模型名（多模态）、过长文本/URL 时用 `h:sha256:...` 摘要（见 `cache_keys.py` 注释）。
+- **值格式**：BF16 压缩后的字节（`embeddings/bf16.py`），非 JSON。
+- **TTL**：`infrastructure.redis.cache_expire_days`（默认 **720 天**，`REDIS_CACHE_EXPIRE_DAYS`）。写入用 `SETEX`；**命中时滑动续期**（`EXPIRE` 刷新为同一时长）。
+- **Redis 客户端**：`decode_responses=False`（二进制）。
+
+**主要代码**：`embeddings/server.py`、`embeddings/text_encoder.py`、`embeddings/image_encoder.py`。
+
+---
+
+### 2. 翻译结果缓存（Translation）
+
+- **作用**：按「翻译模型 + 目标语言 + 原文」缓存译文；支持**模型质量分层探测**（高 tier 模型写入的缓存可被同 tier 或更高 tier 的请求命中，见 `translation/settings.py` 中 `translation_cache_probe_models`）。
+- **Key 形态**：`trans:{model}:{target_lang}:{text前4字符}{sha256全文}`（`translation/cache.py` 的 `build_key`）。
+- **值格式**：UTF-8 译文字符串。
+- **TTL**：`services.translation.cache.ttl_seconds`（默认 **62208000 秒 = 720 天**）。若 `sliding_expiration: true`，命中时刷新 TTL。
+- **能力级开关**：各 `capabilities.*.use_cache` 为 `false` 时该后端不落 Redis。
+- **Redis 客户端**：`decode_responses=True`。
+
+**主要代码**：`translation/cache.py`、`translation/service.py`；翻译 HTTP 服务：`api/translator_app.py`（`get_translation_service()` 使用 `lru_cache` 单例，见下文进程内缓存）。
+
+---
+
+### 3. 商品内容理解 / Anchors 与语义分析缓存（Indexer）
+
+- **作用**：缓存 LLM 对商品标题等拼出的 **prompt 输入** 所做的分析结果（anchors、语义属性等），避免重复调用大模型。键与 `analysis_kind`、`prompt` 契约版本、`target_lang` 及输入摘要相关。
+- **Key 形态**：`{anchor_cache_prefix}:{analysis_kind}:{prompt_contract_hash[:12]}:{target_lang}:{prompt_input[:4]}{md5}`（`indexer/product_enrich.py` 中 `_make_analysis_cache_key`）。
+- **前缀**：`infrastructure.redis.anchor_cache_prefix`（默认 `product_anchors`，`REDIS_ANCHOR_CACHE_PREFIX`）。
+- **值格式**：JSON 字符串（规范化后的分析结果）。
+- **TTL**：`anchor_cache_expire_days`（默认 **30 天**），以秒写入 `SETEX`（**非滑动**，与向量/翻译不同）。
+- **读逻辑**：无 TTL 刷新；仅校验内容是否「有意义」再返回。
+
+**主要代码**：`indexer/product_enrich.py`；与 HTTP 侧对齐说明见 `api/routes/indexer.py` 注释。
+
+---
+
+## 二、进程内缓存（非共享、随进程重启失效）
+
+| 名称 | 用途 | 范围/生命周期 |
+|------|------|----------------|
+| **`get_app_config()`** | 解析并缓存全局 `AppConfig` | `config/loader.py`：`@lru_cache(maxsize=1)`；`reload_app_config()` 可 `cache_clear()` |
+| **`TranslationService` 单例** | 翻译服务进程内复用后端与 Redis 客户端 | `api/translator_app.py`：`get_translation_service()` |
+| **`_nllb_tokenizer_code_by_normalized_key`** | NLLB tokenizer 语言码映射 | `translation/languages.py`：`@lru_cache(maxsize=1)` |
+| **`QueryTextAnalysisCache`** | 单次查询解析内复用分词、tokenizer 结果 | `query/tokenization.py`，随 `QueryParser` 一次 parse |
+| **`_SelectionContext`（SKU 意图）** | 归一化文本、分词、匹配布尔等小字典 | `search/sku_intent_selector.py`，单次选择流程 |
+| **`incremental_service` transformer 缓存** | 按 `tenant_id` 缓存文档转换器 | `indexer/incremental_service.py`，**无界**、多租户进程长期存活时需注意内存 |
+| **NLLB batch 内 `token_count_cache`** | 同一 batch 内避免重复计 token | `translation/backends/local_ctranslate2.py` |
+| **CLIP 分词器 `@lru_cache`**（第三方） | 简单 tokenizer 缓存 | `third-party/clip-as-service/.../simple_tokenizer.py` |
+
+**说明**：`utils/cache.py` 中的 **`DictCache`**（文件 JSON：默认 `.cache/dict_cache.json`）已导出，但仓库内**无直接 `DictCache(` 调用**，视为可复用工具/预留，非当前主路径。
+
+---
+
+## 三、磁盘与模型相关「缓存」（非 Redis）
+
+| 名称 | 用途 | 配置/位置 |
+|------|------|-----------|
+| **Hugging Face / 本地模型目录** | 重排器、翻译本地模型等权重下载与缓存 | `services.rerank.backends.*.cache_dir` 等，常见默认 **`./model_cache`**（`config/config.yaml`） |
+| **vLLM `enable_prefix_caching`** | 重排服务内 **Prefix KV 缓存**（加速同前缀批推理） | `services.rerank.backends.qwen3_vllm*`、`reranker/backends/qwen3_vllm*.py` |
+| **运行时目录** | 重排服务状态/引擎文件 | `services.rerank.instances.*.runtime_dir`（如 `./.runtime/reranker/...`） |
+
+翻译能力里的 **`use_cache: true`**（如 NLLB、Marian）在多数后端指 **推理时的 KV cache（Transformer）**，与 Redis 译文缓存是不同层次；Redis 译文缓存仍由 `TranslationCache` 控制。
+
+---
+
+## 四、Elasticsearch 内部缓存
+
+索引设置中的 `refresh_interval` 等影响近实时可见性，但**不属于应用层键值缓存**。若需调优 ES 查询缓存、节点堆等，见运维文档与集群配置，此处不展开。
+
+---
+
+## 五、运维与巡检脚本（Redis）
+
+| 脚本 | 作用 |
+|------|------|
+| `scripts/redis/redis_cache_health_check.py` | 按 **embedding / translation / anchors** 三类前缀巡检：key 数量估算、TTL 采样、`IDLETIME` 等 |
+| `scripts/redis/redis_cache_prefix_stats.py` | 按前缀统计 key 数量与 **MEMORY USAGE**（可多 DB） |
+| `scripts/redis/redis_memory_heavy_keys.py` | 扫描占用内存最大的 key，辅助排查「统计与总内存不一致」 |
+| `scripts/redis/monitor_eviction.py` | 实时监控 **eviction** 相关事件，用于容量与驱逐策略排查 |
+
+使用前需加载项目配置（如 `source activate.sh`）以保证 `REDIS_CONFIG` 与生产一致。脚本注释中给出了 **`redis-cli` 手工统计**示例（按前缀 `wc -l`、`MEMORY STATS` 等）。
+
+---
+
+## 六、总表（Redis 与各层缓存）
+
+| 缓存名称 | 业务模块 | 存储 | Key 前缀 / 命名模式 | 过期时间 | 过期策略 | 值摘要 | 配置键 / 环境变量 |
+|----------|----------|------|---------------------|----------|----------|--------|-------------------|
+| 文本向量 | 检索 / 索引 / Embedding 服务 | Redis db≈0 | `{embedding_cache_prefix}:*`（逻辑键以 `embed:norm…` 开头） | `cache_expire_days`（默认 720 天） | 写入 TTL + 命中滑动续期 | BF16 字节向量 | `infrastructure.redis.*`；`REDIS_EMBEDDING_CACHE_PREFIX`、`REDIS_CACHE_EXPIRE_DAYS` |
+| 图像向量（CLIP 图） | 图搜 / 多模态 | 同上 | `{prefix}:image:*` | 同上 | 同上 | BF16 字节 | 同上 |
+| CLIP 文本塔向量 | 图搜文本侧 | 同上 | `{prefix}:clip_text:*` | 同上 | 同上 | BF16 字节 | 同上 |
+| 翻译译文 | 查询翻译、翻译服务 | 同上 | `trans:{model}:{lang}:*` | `services.translation.cache.ttl_seconds`（默认 720 天） | 可配置滑动（`sliding_expiration`） | UTF-8 字符串 | `services.translation.cache.*`；各能力 `use_cache` |
+| 商品分析 / Anchors | 索引富化、LLM 内容理解 | 同上 | `{anchor_cache_prefix}:{kind}:{hash}:{lang}:*` | `anchor_cache_expire_days`（默认 30 天） | 固定 TTL，不滑动 | JSON 字符串 | `anchor_cache_prefix`、`anchor_cache_expire_days`；`REDIS_ANCHOR_*` |
+| 应用配置 | 全栈 | 进程内存 | N/A（单例） | 进程生命周期 | `reload_app_config` 清除 | `AppConfig` 对象 | `config/loader.py` |
+| 翻译服务实例 | 翻译 API | 进程内存 | N/A | 进程生命周期 | 单例 | `TranslationService` | `api/translator_app.py` |
+| 查询分词缓存 | 查询解析 | 单次请求内 | N/A | 单次 parse | — | 分词与中间结果 | `query/tokenization.py` |
+| SKU 意图辅助字典 | 搜索排序辅助 | 单次请求内 | N/A | 单次选择 | — | 小 dict | `search/sku_intent_selector.py` |
+| 增量索引 Transformer | 索引管道 | 进程内存 | `tenant_id` 字符串键 | 长期（无界） | 无自动淘汰 | Transformer 元组 | `indexer/incremental_service.py` |
+| 重排 / 翻译模型权重 | 推理服务 | 本地磁盘 | 目录路径 | 无自动删除（人工清理） | — | 模型文件 | `cache_dir: ./model_cache` 等 |
+| vLLM Prefix 缓存 | 重排（Qwen3 等） | GPU/引擎内 | 引擎内部 | 引擎管理 | — | KV Cache | `enable_prefix_caching` |
+| 文件 Dict 缓存（可选） | 通用 | `.cache/dict_cache.json` | 分类 + 自定义 key | 持久直至删除 | — | JSON 可序列化值 | `utils/cache.py`（当前无调用方） |
+
+---
+
+## 七、维护建议（简要）
+
+1. **容量**：三类 Redis 缓存（embedding / trans / anchors）可共用同一实例；大租户或图搜多时 **embedding** 与 **trans** 往往占主要内存，可用 `redis_cache_prefix_stats.py` 分前缀观察。
+2. **键迁移**：变更 `embedding_cache_prefix`、CLIP `model_name` 或 prompt 契约会自然**隔离新键空间**；旧键依赖 TTL 或人工批量删除。
+3. **一致性**：向量缓存对异常向量会 **delete key**（`RedisEmbeddingCache.get`）；anchors 依赖 `cache_version` 与契约 hash 防止错误复用。
+4. **监控**：除脚本外，Embedding HTTP 服务健康检查会报告各 lane 的 **`cache_enabled`**（`embeddings/server.py`）。
+
+---
+
+*文档随代码扫描生成；若新增 Redis 用途，请同步更新本文件与 `scripts/redis/redis_cache_health_check.py` 中的 `_load_known_cache_types()`。*
@@ -0,0 +1,120 @@
+1. 目前检索系统评测的主要指标是这几个
+            "NDCG@20, NDCG@50, ERR@10, Strong_Precision@10, Strong_Precision@20, "
+参考_err_at_k，计算逻辑好像没问题
+现在的问题是，ERR 指标跟其他几个指标好像经常有相反的趋势。请再分析他是否适合作为主指标之一，目前有什么问题。
+
+2. 目前bm25参数是：
+"b": 0.1, 
+"k1": 0.3
+对应的基线是 /data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T055948Z_00b6a8aa3d.md （Primary_Metric_Score: 0.604555
+
+） 
+
+（比之前b和k1都设置为0好了很多，之前都设置为0的情况：/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260407T150946Z_00b6a8aa3d.md
+ Primary_Metric_Score: 0.602598
+
+）
+
+这两个参数从0改为0.1/0.3的背景是：
+This change adjusts the BM25 parameters used by the combined query.
+
+Previously, both `b` and `k1` were set to `0.0`. The original intention was to avoid two common issues in e-commerce search relevance:
+
+1. Over-penalizing longer product titles  
+   In product search, a shorter title should not automatically rank higher just because BM25 favors shorter fields. For example, for a query like “遥控车”, a product whose title is simply “遥控车” is not necessarily a better candidate than a product with a slightly longer but more descriptive title. In practice, extremely short titles may even indicate lower-quality catalog data.
+
+2. Over-rewarding repeated occurrences of the same term  
+   For longer queries such as “遥控喷雾翻滚多功能车玩具车”, the default BM25 behavior may give too much weight to a term that appears multiple times (for example “遥控”), even when other important query terms such as “喷雾” or “翻滚” are missing. This can cause products with repeated partial matches to outrank products that actually cover more of the user intent.
+
+Setting both parameters to zero was an intentional way to suppress length normalization and term-frequency amplification. However, after introducing a `combined_fields` query, this configuration becomes too aggressive. Since `combined_fields` scores multiple fields as a unified relevance signal, completely disabling both effects may also remove useful ranking information, especially when we still want documents matching more query terms across fields to be distinguishable from weaker matches.
+
+This update therefore relaxes the previous setting and reintroduces a controlled amount of BM25 normalization/scoring behavior. The goal is to keep the original intent — avoiding short-title bias and excessive repeated-term gain — while allowing the combined query to better preserve meaningful relevance differences across candidates.
+
+Expected effect:
+- reduce the bias toward unnaturally short product titles
+- limit score inflation caused by repeated occurrences of the same term
+- improve ranking stability for `combined_fields` queries
+- better reward candidates that cover more of the overall query intent, instead of those that only repeat a subset of terms
+
+
+因为实验有效，因此帮我继续进行实验
+
+请帮我再进行这四轮实验，对比效果，优化bm25参数：
+{ "b": 0.10, "k1": 0.30 }
+{ "b": 0.20, "k1": 0.60 }
+{ "b": 0.50, "k1": 1.0 } 
+{ "b": 0.10, "k1": 0.75 }
+
+参考修改索引级设置的方法：（ BM25 `similarity.default`） 
+
+`mappings/search_products.json` 里的 `settings.similarity` 只在**创建新索引**时生效；**已有索引**需先关闭索引，再 `PUT _settings`，最后重新打开。
+
+**适用场景**：调整默认 BM25 的 `b`、`k1`（例如与仓库映射对齐：`b: 0.1`、`k1: 0.3`）。
+
+```bash
+# 按需替换：索引名、账号密码、ES 地址
+INDEX="search_products_tenant_163"
+AUTH='saas:4hOaLaf41y2VuI8y'
+ES="http://localhost:9200"
+
+# 1) 关闭索引（写入类请求会失败，注意维护窗口）
+curl -s -u "$AUTH" -X POST "$ES/${INDEX}/_close"
+
+# 2) 更新设置（仅示例：与 mappings 中 default 一致时可照抄）
+curl -s -u "$AUTH" -X PUT "$ES/${INDEX}/_settings" \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "index": {
+    "similarity": {
+      "default": {
+        "type": "BM25",
+        "b": 0.1,
+        "k1": 0.3
+      }
+    }
+  }
+}'
+
+# 3) 重新打开索引
+curl -s -u "$AUTH" -X POST "$ES/${INDEX}/_open"
+```
+
+**检查是否生效**：
+
+```bash
+curl -s -u "$AUTH" -X GET "$ES/${INDEX}/_settings?filter_path=**.similarity&pretty"
+```
+
+期望在响应中看到 `similarity.default` 的 `type`、`b`、`k1`（API 可能将数值以字符串形式返回，属正常）。
+
+**多租户批量**：先列出索引，再对每个 `search_products_tenant_*` 重复上述 close → settings → open。
+
+```bash
+curl -s -u "$AUTH" -X GET "$ES/_cat/indices/search_products_tenant_*?h=index&v"
+```
+
+
+
+实验结果：
+已完成四轮实验，并把 `search_products_tenant_163` 当前索引参数留在了最优组：`b=0.5, k1=1.0`。刚刚已校验生效。
+
+对比结果如下：
+
+- 基线当前线上组 `b=0.10, k1=0.30`：`0.604555`
+  报告：[batch_20260408T055948Z_00b6a8aa3d.md](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T055948Z_00b6a8aa3d.md)
+- 复跑 `b=0.10, k1=0.30`：`0.604298`
+  报告：[batch_20260408T064011Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064011Z_00b6a8aa3d.json)
+- 实验 `b=0.20, k1=0.60`：`0.604274`
+  报告：[batch_20260408T064123Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064123Z_00b6a8aa3d.json)
+- 实验 `b=0.50, k1=1.0`：`0.606041`
+  报告：[batch_20260408T064216Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064216Z_00b6a8aa3d.json)
+- 实验 `b=0.10, k1=0.75`：`0.603438`
+  报告：[batch_20260408T064304Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064304Z_00b6a8aa3d.json)
+- 老基线 `b=0.0, k1=0.0`：`0.602598`
+  报告：[batch_20260407T150946Z_00b6a8aa3d.md](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260407T150946Z_00b6a8aa3d.md)
+
+结论是这四组里 `b=0.50, k1=1.0` 最好，相比你给的当前基线 `0.604555` 提升了 `+0.001486`，相比 `0/0` 提升了 `+0.003443`。`0.20/0.60` 基本和 `0.10/0.30` 持平，`0.10/0.75` 反而更差，说明这轮里单独加大 `k1` 不如同时把 `b` 和 `k1` 一起放宽。
+
+补充一点：我只改了现有索引 `search_products_tenant_163` 的 index-level setting，没有改仓库里的默认映射。[search_products.json](/data/saas-search/mappings/search_products.json) 现在仍是 `0.1/0.3`。如果你要，我下一步可以直接帮你把映射默认值也改成 `0.5/1.0`，并顺手批量同步其他 `search_products_tenant_*` 索引。
+
+
@@ -0,0 +1,43 @@
+120.76.41.98   端口22 用户名和密码：
+tw  twtw@123 （有sudo权限）
+这台机器上的目录/home/tw/saas-search 已经部署了本项目
+请帮我运行项目
+1. 帮我checkout一个test环境的分支，这个分支，把重排、翻译模型 都关闭掉，因为这台机gpu显存较小（embedding模型可以保留）
+2. 在这个分支，把服务都启动起来
+3. 使用docker，安装一个ES，参考本项目的文档 ES9*.md。因为这台机器已经有一个系统的elasticsearch，为了不相互干扰，将本项目依赖的es9安装到docker，并且在测试环境配置的es地址做适配的工作
+
+
+1. 不是要禁用6005，而是6005端口已经有对应的文本服务了，直接用就行
+2. 6005其实就是本项目的一个历史早期版本启动起来的，在另外一个目录：/home/tw/SearchEngine，请看他的启动配置
+nohup bash scripts/start_embedding_service.sh > log.start_embedding_service.0412 2>&1 &
+是这样启动起来的
+看他陪的文本是用的哪套方案、哪个模型，跟他对齐（我指的是当前的测试分支）
+
+
+
+
+
+
+
+我在这个机器上部署了一个测试环境：
+120.76.41.98   端口22 用户名和密码：
+tw  twtw@123 （有sudo权限）
+cd /home/tw/saas-search
+$ git branch
+  masters RETURN)
+* test/small-gpu-es9
+
+我希望差异只是：
+1. es配置不同（测试环境要连接到那台机器的一个docer的es 19200端口）、redis配置不同
+2. reranker关闭、不要启动reranker服务
+
+其余没什么不同。
+
+但是启动有问题，现在翻译报错。
+这体现了当前项目移植性比较差，我希望你检查一下失败原因，然后先到本地（本机 即当前目录master分支）优化好、提升移植性之后，那边更新，保持测试分支跟master只有少量的、配置层面的不同，让后到测试机器把翻译启动起来，最后包括整个服务都要启动起来。
+
+
+
+
+
+
@@ -0,0 +1,25 @@
+需求：
+目前160条结果（rerank_window: 160）会进入重排，重排中 文本和图片向量的相关性，都会作为融合公式的因子之一（粗排和reranker都有）：
+knn_score
+text_knn
+image_knn
+text_factor
+knn_factor
+但是文本向量召回和图片向量召回，是使用 KNN 索引召回的方式，并不是所有结果都有这两个得分，这两项得分都有为0的。
+为了解决这个问题，有一个方法是对最终能进入重排的 160 条，看其中还有哪些分别缺失文本和图片向量召回的得分，再通过某种方式让 ES 去算，或者从 ES 把向量拉回来，自己算，或者在召回的时候请求 ES 的时候，就通过某种设定，确保前面的若干条都带有这两个分数，不知道有哪些方法，我感觉这些方法都不太好，请你思考一下
+
+考虑的一个方案：
+想在“第一次 ES 搜索”里，只对 topN 补向量精算，考虑 rescore 或 retriever.rescorer的方案(官方明确支持多段 rescore/支持 score_mode: multiply，甚至示例里就有 function_score/script_score 放进 rescore 的写法。)
+这意味着你完全可以：
+初检仍然用现在的 lexical + text knn + image knn 召回候选
+对 window_size=160 做 rescore
+用 exact script_score 给 top160 补 text/image vector 分
+顺手把你现在本地 coarse 融合迁回 ES
+
+export ES_AUTH="saas:4hOaLaf41y2VuI8y"
+export ES="http://127.0.0.1:9200"
+"index":"search_products_tenant_163"
+
+有个细节暴露出来了：dotProduct() 这类向量函数在 script_score 评分上下文能用，但在 script_fields 取字段上下文里不认。所以如果我们要把 exact 分顺手回传给 rerank，用 script_fields 的话得自己写数组循环，不能直接调向量内建函数。
+
+重排打分公式需要的base_query base_query_trans_zh knn_query image_knn_query还能不能拿到？请你考虑，尽量想想如何得到这些打分，如果实在拿不到去想替代的办法比如简化打分公式。
@@ -98,7 +98,7 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot;
 **能力**：支持根据商品标题批量生成 **qanchors**（锚文本）、**enriched_attributes**、**tags**，供索引与 suggest 使用。
 **具体内容**：
-- **接口**：`POST /indexer/enrich-content`（Indexer 服务端口 **6004**）。请求体为 `items` 数组，每项含 `spu_id`、`title`（必填）及可选多语言标题等；单次请求最多 **50 条**，建议批量调用。响应 `results` 与 `items` 一一对应，每项含 `spu_id`、`qanchors`（按语言键，如 `qanchors.zh`、`qanchors.en`，逗号分隔短语）、`enriched_attributes`、`tags`。
+- **接口**：`POST /indexer/enrich-content`（FacetAwareMatching 服务端口 **6001**）。请求体为 `items` 数组，每项含 `spu_id`、`title`（必填）及可选多语言标题等；单次请求最多 **50 条**，建议批量调用。响应 `results` 与 `items` 一一对应，每项含 `spu_id`、`qanchors`（按语言键，如 `qanchors.zh`、`qanchors.en`，逗号分隔短语）、`enriched_attributes`、`tags`。
 - **索引侧**：微服务组合方式下，调用方先拿不含 qanchors/tags 的 doc，再调用本接口补齐后写入 ES 的 `qanchors.{lang}` 等字段；索引 transformer（`indexer/document_transformer.py`、`indexer/product_enrich.py`）内也可在构建 doc 时调用内容理解逻辑，写入 `qanchors.{lang}`。
 - **Suggest 侧**：`suggestion/builder.py` 从 ES 商品索引读取 `_source: ["id", "spu_id", "title", "qanchors"]`，对 `qanchors.{lang}` 用 `_split_qanchors` 拆成词条，以 `source="qanchor"` 加入候选，排序时 `qanchor` 权重大于纯 title（`add_product("qanchor", ...)`）；suggest 配置中 `sources: ["query_log", "qanchor"]` 表示候选来源包含 qanchor。
 - **实现与依赖**：内容理解内部使用大模型（需 `DASHSCOPE_API_KEY`），支持多语言与 Redis 缓存（如 `product_anchors`）；逻辑与 `indexer/product_enrich` 一致。
@@ -129,12 +129,12 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot;
   - 可选：embedding(text) **6005**、embedding-image **6008**、translator **6006**、reranker **6007**、tei **8080**、cnclip **51000**。
   - 端口可由环境变量覆盖：`API_PORT`、`INDEXER_PORT`、`FRONTEND_PORT`、`EVAL_WEB_PORT`、`EMBEDDING_TEXT_PORT`、`EMBEDDING_IMAGE_PORT`、`TRANSLATION_PORT`、`RERANKER_PORT`、`TEI_PORT`、`CNCLIP_PORT`。
 - **命令**：
-  - `./scripts/service_ctl.sh start [service...]` 或 `up all` / `start all`（all 含 tei、cnclip、embedding、embedding-image、translator、reranker、reranker-fine、backend、indexer、frontend、eval-web，按依赖顺序）；`stop`、`restart`、`down` 同参数；`status` 默认列出所有服务。
+  - `./scripts/service_ctl.sh start [service...]` 或 `up all` / `start all`（all 含 tei、cnclip、embedding、embedding-image、translator、reranker、backend、indexer、frontend、eval-web，按依赖顺序）；`stop`、`restart`、`down` 同参数；`status` 默认列出所有服务。
   - 启动时：backend/indexer/frontend/embedding/translator/reranker 会写 pid 到 `logs/<service>.pid`，并执行 `wait_for_health`（GET `http://127.0.0.1:<port>/health`）；reranker 健康重试 90 次，其余 30 次；TEI 校验 Docker 容器存在且 `/health` 成功；cnclip 无 HTTP 健康则仅校验进程/端口。
 - **监控常驻**：
   - `./scripts/service_ctl.sh monitor-start <targets>` 启动后台监控进程，将 targets 写入 `logs/service-monitor.targets`，pid 写入 `logs/service-monitor.pid`，日志追加到 `logs/service-monitor.log`。
-  - 轮询间隔 `MONITOR_INTERVAL_SEC` 默认 **10** 秒；连续 **3** 次（`MONITOR_FAIL_THRESHOLD`）健康失败则触发重启；重启冷却 `MONITOR_RESTART_COOLDOWN_SEC` 默认 **30** 秒；每小时最多重启 `MONITOR_MAX_RESTARTS_PER_HOUR` 默认 **6** 次；超限时调用 `scripts/wechat_alert.py` 告警（若存在）。
-- **日志**：各服务按日滚动到 `logs/<service>-<date>.log`，通过 `scripts/daily_log_router.sh` 与 `LOG_RETENTION_DAYS`（默认 30）控制保留。
+  - 轮询间隔 `MONITOR_INTERVAL_SEC` 默认 **10** 秒；连续 **3** 次（`MONITOR_FAIL_THRESHOLD`）健康失败则触发重启；重启冷却 `MONITOR_RESTART_COOLDOWN_SEC` 默认 **30** 秒；每小时最多重启 `MONITOR_MAX_RESTARTS_PER_HOUR` 默认 **6** 次；超限时调用 `scripts/ops/wechat_alert.py` 告警（若存在）。
+- **日志**：各服务按日滚动到 `logs/<service>-<date>.log`，通过 `scripts/ops/daily_log_router.sh` 与 `LOG_RETENTION_DAYS`（默认 30）控制保留。
 详见：`scripts/service_ctl.sh` 内注释及 `docs/Usage-Guide.md`。
@@ -153,12 +153,12 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot;
 ## 三、性能测试报告摘要
-以下数据来自 `docs/性能测试报告.md`，测试时间 **2026-03-12**，环境：**8 vCPU**（Intel Xeon Platinum 8255C @ 2.50GHz）、**约 15Gi 可用内存**；租户 **162** 文档数约 **53**（search/search/suggestions/rerank 与文档规模相关）。压测工具：`scripts/perf_api_benchmark.py`，场景×并发矩阵，每档 **20s**。
+以下数据来自 `docs/性能测试报告.md`，测试时间 **2026-03-12**，环境：**8 vCPU**（Intel Xeon Platinum 8255C @ 2.50GHz）、**约 15Gi 可用内存**；租户 **162** 文档数约 **53**（search/search/suggestions/rerank 与文档规模相关）。压测工具：`benchmarks/perf_api_benchmark.py`，场景×并发矩阵，每档 **20s**。
 **复现命令（四场景×四并发）**：
 ```bash
 cd /data/saas-search
-.venv/bin/python scripts/perf_api_benchmark.py \
+.venv/bin/python benchmarks/perf_api_benchmark.py \
   --scenario backend_search,backend_suggest,embed_text,rerank \
   --concurrency-list 1,5,10,20 \
   --duration 20 \
@@ -188,7 +188,7 @@ cd /data/saas-search
 口径：query 固定 `wireless mouse`，每次请求 **386 docs**，句长 15–40 词随机（从 1000 词池采样）；配置 `rerank_window=384`。复现命令：
 ```bash
-.venv/bin/python scripts/perf_api_benchmark.py \
+.venv/bin/python benchmarks/perf_api_benchmark.py \
   --scenario rerank --duration 20 --concurrency-list 1,5,10,20 --timeout 60 \
   --rerank-dynamic-docs --rerank-doc-count 386 --rerank-vocab-size 1000 \
   --rerank-sentence-min-words 15 --rerank-sentence-max-words 40 \
@@ -217,7 +217,7 @@ cd /data/saas-search
 | 10   | 181   | 100%  | 8.78      | 1129.23| 1295.88| 1330.96|
 | 20   | 161   | 100%  | 7.63      | 2594.00| 4706.44| 4783.05|
-**结论**：吞吐约 **8 rps** 平台化，延迟随并发上升明显，符合“检索 + 向量 + 重排”重链路特征。多租户补测（文档数 500–10000，见报告 §12）表明：文档数越大，RPS 下降、延迟升高；tenant 0（10000 doc）在并发 20 出现部分 ReadTimeout（成功率 59.02%），需注意 timeout 与容量规划；补测命令示例：`for t in 0 1 2 3 4; do .venv/bin/python scripts/perf_api_benchmark.py --scenario backend_search --concurrency-list 1,5,10,20 --duration 20 --tenant-id $t --output perf_reports/2026-03-12/search_tenant_matrix/tenant_${t}.json; done`。
+**结论**：吞吐约 **8 rps** 平台化，延迟随并发上升明显，符合“检索 + 向量 + 重排”重链路特征。多租户补测（文档数 500–10000，见报告 §12）表明：文档数越大，RPS 下降、延迟升高；tenant 0（10000 doc）在并发 20 出现部分 ReadTimeout（成功率 59.02%），需注意 timeout 与容量规划；补测命令示例：`for t in 0 1 2 3 4; do .venv/bin/python benchmarks/perf_api_benchmark.py --scenario backend_search --concurrency-list 1,5,10,20 --duration 20 --tenant-id $t --output perf_reports/2026-03-12/search_tenant_matrix/tenant_${t}.json; done`。
 ---
@@ -247,5 +247,5 @@ cd /data/saas-search
 **关键文件与复现**：
 - 配置：`config/config.yaml`（services、rerank、query_config）、`.env`（端口与 API Key）。
-- 脚本：`scripts/service_ctl.sh`（启停与监控）、`scripts/perf_api_benchmark.py`（压测）、`scripts/build_suggestions.sh`（suggest 构建）。
+- 脚本：`scripts/service_ctl.sh`（启停与监控）、`benchmarks/perf_api_benchmark.py`（压测）、`scripts/build_suggestions.sh`（suggest 构建）。
 - 完整步骤与多租户/rerank 对比见：`docs/性能测试报告.md`。
-
-
 ## Elasticsearch 排查流程
+使用前加载环境变量：
+```bash
+set -a; source .env; set +a
+# 或直接 export
+export ES_AUTH="saas:4hOaLaf41y2VuI8y"
+export ES="http://127.0.0.1:9200"
+```
+
 ### 1. 集群健康状态
 ```bash
 # 集群整体健康（green / yellow / red）
-curl -s -u 'saas:4hOaLaf41y2VuI8y' 'http://127.0.0.1:9200/_cluster/health?pretty'
+curl -s -u "$ES_AUTH" 'http://127.0.0.1:9200/_cluster/health?pretty'
 ```
 ### 2. 索引概览
 ```bash
 # 查看所有租户索引状态与体积
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/_cat/indices/search_products_tenant_*?v'
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/_cat/indices/search_products_tenant_*?v'
 # 或查看全部索引
-curl -s -u 'saas:4hOaLaf41y2VuI8y' 'http://127.0.0.1:9200/_cat/indices?v'
+curl -s -u "$ES_AUTH" 'http://127.0.0.1:9200/_cat/indices?v'
 ```
 ### 3. 分片分布
 ```bash
 # 查看分片在各节点的分布情况
-curl -s -u 'saas:4hOaLaf41y2VuI8y' 'http://127.0.0.1:9200/_cat/shards?v'
+curl -s -u "$ES_AUTH" 'http://127.0.0.1:9200/_cat/shards?v'
 ```
 ### 4. 分配诊断（如有异常）
 ```bash
 # 当 health 非 green 或 shards 状态异常时，定位具体原因
-curl -s -u 'saas:4hOaLaf41y2VuI8y' -X POST 'http://127.0.0.1:9200/_cluster/allocation/explain?pretty' \
+curl -s -u "$ES_AUTH" -X POST 'http://127.0.0.1:9200/_cluster/allocation/explain?pretty' \
   -H 'Content-Type: application/json' \
   -d '{"index":"search_products_tenant_163","shard":0,"primary":true}'
 ```
@@ -60,6 +66,54 @@ cat /etc/elasticsearch/elasticsearch.yml
 journalctl -u elasticsearch -f
 ```
+### 7. 修改索引级设置（如 BM25 `similarity.default`）
+
+`mappings/search_products.json` 里的 `settings.similarity` 只在**创建新索引**时生效；**已有索引**需先关闭索引，再 `PUT _settings`，最后重新打开。
+
+**适用场景**：调整默认 BM25 的 `b`、`k1`（例如与仓库映射对齐：`b: 0.1`、`k1: 0.3`）。
+
+```bash
+# 按需替换：索引名、账号密码、ES 地址
+INDEX="search_products_tenant_163"
+AUTH="$ES_AUTH"
+ES="http://localhost:9200"
+
+# 1) 关闭索引（写入类请求会失败，注意维护窗口）
+curl -s -u "$AUTH" -X POST "$ES/${INDEX}/_close"
+
+# 2) 更新设置（仅示例：与 mappings 中 default 一致时可照抄）
+curl -s -u "$AUTH" -X PUT "$ES/${INDEX}/_settings" \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "index": {
+    "similarity": {
+      "default": {
+        "type": "BM25",
+        "b": 0.1,
+        "k1": 0.3
+      }
+    }
+  }
+}'
+
+# 3) 重新打开索引
+curl -s -u "$AUTH" -X POST "$ES/${INDEX}/_open"
+```
+
+**检查是否生效**：
+
+```bash
+curl -s -u "$AUTH" -X GET "$ES/${INDEX}/_settings?filter_path=**.similarity&pretty"
+```
+
+期望在响应中看到 `similarity.default` 的 `type`、`b`、`k1`（API 可能将数值以字符串形式返回，属正常）。
+
+**多租户批量**：先列出索引，再对每个 `search_products_tenant_*` 重复上述 close → settings → open。
+
+```bash
+curl -s -u "$AUTH" -X GET "$ES/_cat/indices/search_products_tenant_*?h=index&v"
+```
+
 ---
 ### 快速排查路径
@@ -93,7 +147,7 @@ systemctl / df / 日志     → 系统层验证
 #### 查询指定 spu_id 的商品（返回 title）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 11,
     "_source": ["title"],
     "query": {
@@ -108,7 +162,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 查询所有商品（返回 title）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 100,
     "_source": ["title"],
     "query": {
@@ -119,7 +173,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 查询指定 spu_id 的商品（返回 title、keywords、tags）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 5,
     "_source": ["title", "keywords", "tags"],
     "query": {
@@ -134,7 +188,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 组合查询：匹配标题 + 过滤标签
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["title", "keywords", "tags"],
     "query": {
@@ -158,7 +212,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 组合查询：匹配标题 + 过滤租户（冗余示例）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["title"],
     "query": {
@@ -186,7 +240,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 测试 index_ik 分析器
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
     "analyzer": "index_ik",
     "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
 }'
@@ -194,7 +248,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 测试 query_ik 分析器
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
     "analyzer": "query_ik",
     "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
 }'
@@ -206,7 +260,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 多字段匹配 + 聚合（category1、color、size、material）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "from": 0,
     "query": {
@@ -316,7 +370,7 @@ GET /search_products_tenant_2/_search
 #### 按 spu_id 查询（通用索引）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 5,
     "query": {
         "bool": {
@@ -333,7 +387,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
 ### 5. 统计租户总文档数
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "match_all": {}
     }
@@ -348,7 +402,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 1.1 查询特定租户的商品，显示分面相关字段
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "term": { "tenant_id": "162" }
     },
@@ -363,7 +417,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 1.2 验证 category1_name 字段是否有数据
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -378,7 +432,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 1.3 验证 specifications 字段是否有数据
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -397,7 +451,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 2.1 category1_name 分面聚合
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": { "match_all": {} },
     "size": 0,
     "aggs": {
@@ -410,7 +464,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 2.2 specifications.color 分面聚合
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": { "match_all": {} },
     "size": 0,
     "aggs": {
@@ -431,7 +485,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 2.3 specifications.size 分面聚合
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": { "match_all": {} },
     "size": 0,
     "aggs": {
@@ -452,7 +506,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 2.4 specifications.material 分面聚合
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": { "match_all": {} },
     "size": 0,
     "aggs": {
@@ -473,7 +527,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 2.5 综合分面聚合（category + color + size + material）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": { "match_all": {} },
     "size": 0,
     "aggs": {
@@ -515,7 +569,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 3.1 查看 specifications 的 name 字段有哪些值
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": { "term": { "tenant_id": "162" } },
     "size": 0,
     "aggs": {
@@ -531,7 +585,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
 #### 3.2 查看某个商品的完整 specifications 数据
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -552,7 +606,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
 **keyword 精确匹配**（示例词：中文 `法式风格`，英文 `long skirt`）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "enriched_attributes"],
     "query": {
@@ -575,7 +629,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 **text 全文匹配**（经 `index_ik` / `english` 分词；可与上式对照）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "enriched_attributes"],
     "query": {
@@ -602,7 +656,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 **keyword 精确匹配**
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "option1_values"],
     "query": {
@@ -620,7 +674,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 **text 全文匹配**
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "option1_values"],
     "query": {
@@ -640,7 +694,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 **keyword 精确匹配**
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "enriched_tags"],
     "query": {
@@ -658,7 +712,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 **text 全文匹配**
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "enriched_tags"],
     "query": {
@@ -678,7 +732,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 > `specifications` 为 **nested**，`value_keyword` 为整词匹配；`value_text.*` 可同时 `term` 子字段或 `match` 主 text。
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "size": 1,
     "_source": ["spu_id", "title", "specifications"],
     "query": {
@@ -710,7 +764,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 4.1 统计有 category1_name 的文档数量
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_count?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_count?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -723,7 +777,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 4.2 统计有 specifications 的文档数量
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_count?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_count?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -740,7 +794,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 5.1 查找没有 category1_name 但有 category 的文档（MySQL 有数据但 ES 没有）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -758,7 +812,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
 #### 5.2 查找有 option 但没有 specifications 的文档（数据转换问题）
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
+curl -u "$ES_AUTH" -X GET 'http://localhost:9200/search_products_tenant_163/_search?pretty' -H 'Content-Type: application/json' -d '{
     "query": {
         "bool": {
             "filter": [
@@ -814,7 +868,7 @@ GET search_products_tenant_163/_mapping
 GET search_products_tenant_163/_field_caps?fields=*
 ```bash
-curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
+curl -u "$ES_AUTH" -X POST \
 'http://localhost:9200/search_products_tenant_163/_count' \
 -H 'Content-Type: application/json' \
 -d '{
@@ -827,7 +881,7 @@ curl -u &#39;saas:4hOaLaf41y2VuI8y&#39; -X POST \
     }
 }'
-curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
+curl -u "$ES_AUTH" -X POST \
 'http://localhost:9200/search_products_tenant_163/_count' \
 -H 'Content-Type: application/json' \
 -d '{
@@ -18,13 +18,13 @@
 执行方式：
 - 每组压测持续 `20s`
-- 使用统一脚本 `scripts/perf_api_benchmark.py`
+- 使用统一脚本 `benchmarks/perf_api_benchmark.py`
 - 通过 `--scenario` 多值 + `--concurrency-list` 一次性跑完 `场景 x 并发`
 ## 3. 压测工具优化说明（复用现有脚本）
 为了解决原脚本“一次只能跑一个场景+一个并发”的可用性问题，本次直接扩展现有脚本：
-- `scripts/perf_api_benchmark.py`
+- `benchmarks/perf_api_benchmark.py`
 能力：
 - 一条命令执行 `场景列表 x 并发列表` 全矩阵
@@ -33,7 +33,7 @@
 示例：
 ```bash
-.venv/bin/python scripts/perf_api_benchmark.py \
+.venv/bin/python benchmarks/perf_api_benchmark.py \
   --scenario backend_search,backend_suggest,embed_text,rerank \
   --concurrency-list 1,5,10,20 \
   --duration 20 \
@@ -106,7 +106,7 @@ curl -sS http://127.0.0.1:6007/health
 ```bash
 cd /data/saas-search
-.venv/bin/python scripts/perf_api_benchmark.py \
+.venv/bin/python benchmarks/perf_api_benchmark.py \
   --scenario backend_search,backend_suggest,embed_text,rerank \
   --concurrency-list 1,5,10,20 \
   --duration 20 \
@@ -164,7 +164,7 @@ cd /data/saas-search
 复现命令：
 ```bash
-.venv/bin/python scripts/perf_api_benchmark.py \
+.venv/bin/python benchmarks/perf_api_benchmark.py \
   --scenario rerank \
   --duration 20 \
   --concurrency-list 1,5,10,20 \
@@ -237,7 +237,7 @@ cd /data/saas-search
 - 使用项目虚拟环境执行：
 ```bash
-.venv/bin/python scripts/perf_api_benchmark.py -h
+.venv/bin/python benchmarks/perf_api_benchmark.py -h
 ```
 ### 10.3 某场景成功率下降
@@ -249,7 +249,7 @@ cd /data/saas-search
 ## 11. 关联文件
-- 压测脚本：`scripts/perf_api_benchmark.py`
+- 压测脚本：`benchmarks/perf_api_benchmark.py`
 - 本次结果：`perf_reports/2026-03-12/perf_matrix_report.json`
 - Search 多租户补测：`perf_reports/2026-03-12/search_tenant_matrix/`
 - Reranker 386 docs 口径补测：`perf_reports/2026-03-12/rerank_realistic/rerank_386docs.json`
@@ -280,7 +280,7 @@ cd /data/saas-search
 cd /data/saas-search
 mkdir -p perf_reports/2026-03-12/search_tenant_matrix
 for t in 0 1 2 3 4; do
-  .venv/bin/python scripts/perf_api_benchmark.py \
+  .venv/bin/python benchmarks/perf_api_benchmark.py \
     --scenario backend_search \
     --concurrency-list 1,5,10,20 \
     --duration 20 \
@@ -90,7 +90,7 @@ curl -X POST &quot;http://43.166.252.75:6002/search/&quot; \
 | 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据（不写入ES） |
 | 构建ES文档（正式对接） | POST | `/indexer/build-docs` | 基于上游提供的 MySQL 行数据构建 ES doc，不写入 ES，供 Java 等调用后自行写入 |
 | 构建ES文档（测试用） | POST | `/indexer/build-docs-from-db` | 仅在测试/调试时使用，根据 `tenant_id + spu_ids` 内部查库并构建 ES doc |
-| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags，供微服务组合方式使用 |
+| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags，供微服务组合方式使用（独立服务端口 6001） |
 | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 |
 | 健康检查 | GET | `/admin/health` | 服务健康检查 |
 | 获取配置 | GET | `/admin/config` | 获取租户配置 |
@@ -104,7 +104,6 @@ curl -X POST &quot;http://43.166.252.75:6002/search/&quot; \
 | 向量服务（图片） | 6008 | `POST /embed/image` | 图片向量化 |
 | 翻译服务 | 6006 | `POST /translate` | 文本翻译（支持 qwen-mt / llm / deepl / 本地模型） |
 | 重排服务 | 6007 | `POST /rerank` | 检索结果重排 |
-| 内容理解（Indexer 内） | 6004 | `POST /indexer/enrich-content` | 根据商品标题生成 qanchors、tags 等，供 indexer 微服务组合方式使用 |
+| 内容理解（独立服务） | 6001 | `POST /indexer/enrich-content` | 根据商品标题生成 qanchors、tags 等，供 indexer 微服务组合方式使用 |
 ---
-
@@ -13,7 +13,7 @@
 | 查询文档 | POST | `/indexer/documents` | 按 SPU ID 列表查询 ES 文档，不写入 ES |
 | 构建 ES 文档（正式） | POST | `/indexer/build-docs` | 由上游提供 MySQL 行数据，返回 ES-ready 文档，不写 ES |
 | 构建 ES 文档（测试） | POST | `/indexer/build-docs-from-db` | 由本服务查库并构建文档，仅测试/调试用 |
-| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags（供微服务组合方式使用） |
+| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags（供微服务组合方式使用；独立服务端口 6001） |
 | 索引健康检查 | GET | `/indexer/health` | 检查索引服务与数据库连接状态 |
 #### 5.0 支撑外部 indexer 的三种方式
@@ -23,7 +23,7 @@
 | 方式 | 说明 | 适用场景 |
 |------|------|----------|
 | **1）doc 填充接口** | 调用 `POST /indexer/build-docs` 或 `POST /indexer/build-docs-from-db`，由本服务基于 MySQL 行数据构建完整 ES 文档（含多语言、向量、规格等），**不写入 ES**，由调用方自行写入。 | 希望一站式拿到 ES-ready doc，由己方控制写 ES 的时机与索引名。 |
-| **2）微服务组合** | 单独调用**翻译**、**向量化**、**内容理解字段生成**等接口，由 indexer 程序自己组装 doc 并写入 ES。翻译与向量化为独立微服务（见第 7 节）；内容理解为 Indexer 服务内接口 `POST /indexer/enrich-content`。 | 需要灵活编排、或希望将 LLM/向量等耗时步骤与主链路解耦（如异步补齐 qanchors/tags）。 |
+| **2）微服务组合** | 单独调用**翻译**、**向量化**、**内容理解字段生成**等接口，由 indexer 程序自己组装 doc 并写入 ES。翻译与向量化为独立微服务（见第 7 节）；内容理解为 FacetAwareMatching 独立服务接口 `POST /indexer/enrich-content`（端口 6001）。 | 需要灵活编排、或希望将 LLM/向量等耗时步骤与主链路解耦（如异步补齐 qanchors/tags）。 |
 | **3）本服务直接写 ES** | 调用全量索引 `POST /indexer/reindex`、增量索引 `POST /indexer/index`（指定 SPU ID 列表），由本服务从 MySQL 拉数并直接写入 ES。 | 自建运维、联调或不需要由 Java 写 ES 的场景。 |
 - **方式 1** 与 **方式 2** 下，ES 的写入方均为外部 indexer（或 Java），职责清晰。
@@ -498,7 +498,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
 #### 请求示例（完整 curl）
-> 完整请求体参考 `scripts/test_build_docs_api.py` 中的 `build_sample_request()`。
+> 完整请求体参考 `tests/manual/test_build_docs_api.py` 中的 `build_sample_request()`。
 ```bash
 # 单条 SPU 示例（含 spu、skus、options）
@@ -648,13 +648,38 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 ### 5.8 内容理解字段生成接口
 - **端点**: `POST /indexer/enrich-content`
-- **描述**: 根据商品内容信息批量生成 **qanchors**（锚文本）、**enriched_attributes**（语义属性）、**enriched_tags**（细分标签），供外部 indexer 在「微服务组合」方式下自行拼装 doc 时使用。请求以 `items[]` 传入商品内容字段（必填/可选见下表）。接口只暴露商品内容输入，语言选择、分析维度与最终字段结构统一由 `indexer.product_enrich` 内部决定；当前返回结果与 `search_products` mapping 保持一致。单次请求在线程池中执行，避免阻塞其他接口。
+- **服务**: FacetAwareMatching 独立服务（默认端口 **6001**；由 `/data/FacetAwareMatching/scripts/service_ctl.sh` 管理）
+- **描述**: 根据商品内容信息批量生成 **qanchors**（锚文本）、**enriched_attributes**（通用语义属性）、**enriched_tags**（细分标签）、**enriched_taxonomy_attributes**（taxonomy 结构化属性），供外部 indexer 在「微服务组合」方式下自行拼装 doc 时使用。请求以 `items[]` 传入商品内容字段（必填/可选见下表）。接口只暴露商品内容输入，语言选择、分析维度与最终字段结构统一由 FacetAwareMatching 的 `product_enrich` 内部决定；当前返回结果与 `search_products` mapping 保持一致。单次请求在线程池中执行，避免阻塞其他接口。
+
+当前支持的 `category_taxonomy_profile`：
+- `apparel`
+- `3c`
+- `bags`
+- `pet_supplies`
+- `electronics`
+- `outdoor`
+- `home_appliances`
+- `home_living`
+- `wigs`
+- `beauty`
+- `accessories`
+- `toys`
+- `shoes`
+- `sports`
+- `others`
+
+说明：
+- 所有 profile 的 `enriched_taxonomy_attributes.value` 都统一返回 `zh` + `en`。
+- 外部调用 `/indexer/enrich-content` 时，以请求中的 `category_taxonomy_profile` 为准。
+- 若 indexer 内部仍接入内容理解能力，taxonomy profile 请在调用侧显式传入（建议仍以租户行业配置为准）。
 #### 请求参数
 ```json
 {
   "tenant_id": "170",
+  "enrichment_scopes": ["generic", "category_taxonomy"],
+  "category_taxonomy_profile": "apparel",
   "items": [
     {
       "spu_id": "223167",
@@ -675,6 +700,8 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 | 参数 | 类型 | 必填 | 默认值 | 说明 |
 |------|------|------|--------|------|
 | `tenant_id` | string | Y | - | 租户 ID。目前仅用于记录日志，不产生实际作用|
+| `enrichment_scopes` | array[string] | N | `["generic", "category_taxonomy"]` | 选择要执行的增强范围。`generic` 生成 `qanchors`/`enriched_tags`/`enriched_attributes`，`category_taxonomy` 生成 `enriched_taxonomy_attributes` |
+| `category_taxonomy_profile` | string | N | `apparel` | 品类 taxonomy profile。支持：`apparel`、`3c`、`bags`、`pet_supplies`、`electronics`、`outdoor`、`home_appliances`、`home_living`、`wigs`、`beauty`、`accessories`、`toys`、`shoes`、`sports`、`others` |
 | `items` | array | Y | - | 待分析列表；**单次最多 50 条** |
 `items[]` 字段说明：
@@ -683,21 +710,24 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 |------|------|------|------|
 | `spu_id` | string | Y | SPU ID，用于回填结果；目前仅用于记录日志，不产生实际作用|
 | `title` | string | Y | 商品标题 |
-| `image_url` | string | N | 商品主图 URL；当前会参与内容缓存键，后续可用于图像/多模态内容理解 |
-| `brief` | string | N | 商品简介/短描述；当前会参与内容缓存键 |
-| `description` | string | N | 商品详情/长描述；当前会参与内容缓存键 |
+| `image_url` | string | N | 商品主图 URL；当前仅透传，暂未参与 prompt 与缓存键，后续可用于图像/多模态内容理解 |
+| `brief` | string | N | 商品简介/短描述；当前会参与 prompt 与缓存键 |
+| `description` | string | N | 商品详情/长描述；当前会参与 prompt 与缓存键 |
 缓存说明：
-- 内容缓存键仅由 `target_lang + items[]` 中会影响内容理解结果的输入文本构成，目前包括：`title`、`brief`、`description`、`image_url` 的规范化内容 hash。
+- 内容缓存按 **增强范围 + taxonomy profile** 拆分；`generic` 与 `category_taxonomy:apparel` 等使用不同缓存命名空间，互不污染、可独立演进。
+- 缓存键由 `analysis_kind + target_lang + prompt/schema 版本指纹 + prompt 输入文本 hash` 构成；对 category taxonomy 来说，profile 会进入 schema 标识与版本指纹。
+- 当前真正参与 prompt 输入的字段是：`title`、`brief`、`description`；这些字段任一变化，都会落到新的缓存 key。
+- `prompt/schema 版本指纹` 会综合 system prompt、shared instruction、localized table headers、result fields、user instruction template 等信息生成；因此只要提示词或输出契约变化，旧缓存会自然失效。
 - `tenant_id`、`spu_id` 只用于请求归属与结果回填，不参与缓存键。
-- 因此，输入内容不变时可跨请求直接命中缓存；任一输入字段变化时，会自然落到新的缓存 key。
+- 因此，输入内容与 prompt 契约都不变时可跨请求直接命中缓存；任一一侧变化，都会自然落到新的缓存 key。
 语言说明：
 - 接口不接受语言控制参数。
 - 返回哪些语言、返回哪些语义维度，统一由 `indexer.product_enrich` 内部逻辑决定。
-- 当前为了与 `search_products` mapping 对齐，返回结果只包含核心索引语言 `zh`、`en`。
+- 当前为了与 `search_products` mapping 对齐，通用增强字段与 taxonomy 字段都统一只返回核心索引语言 `zh`、`en`。
 批量请求建议：
 - **全量**：强烈建议 尽可能 **20 个 SPU/doc** 攒成一个批次后再请求一次。
@@ -709,6 +739,8 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 ```json
 {
   "tenant_id": "170",
+  "enrichment_scopes": ["generic", "category_taxonomy"],
+  "category_taxonomy_profile": "apparel",
   "total": 2,
   "results": [
     {
@@ -725,6 +757,11 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
         { "name": "enriched_tags", "value": { "zh": "纯棉" } },
         { "name": "usage_scene", "value": { "zh": "日常" } },
         { "name": "enriched_tags", "value": { "en": "cotton" } }
+      ],
+      "enriched_taxonomy_attributes": [
+        { "name": "Product Type", "value": { "zh": ["T恤"], "en": ["t-shirt"] } },
+        { "name": "Target Gender", "value": { "zh": ["男"], "en": ["men"] } },
+        { "name": "Season", "value": { "zh": ["夏季"], "en": ["summer"] } }
       ]
     },
     {
@@ -735,7 +772,8 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
       "enriched_tags": {
         "en": ["dolls", "toys"]
       },
-      "enriched_attributes": []
+      "enriched_attributes": [],
+      "enriched_taxonomy_attributes": []
     }
   ]
 }
@@ -743,10 +781,13 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 | 字段 | 类型 | 说明 |
 |------|------|------|
-| `results` | array | 与请求 `items` 一一对应，每项含 `spu_id`、`qanchors`、`enriched_attributes`、`enriched_tags` |
+| `enrichment_scopes` | array | 实际执行的增强范围列表 |
+| `category_taxonomy_profile` | string | 实际使用的品类 taxonomy profile |
+| `results` | array | 与请求 `items` 一一对应，每项含 `spu_id`、`qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes` |
 | `results[].qanchors` | object | 与 ES `qanchors` 字段同结构，按语言键返回短语数组 |
 | `results[].enriched_tags` | object | 与 ES `enriched_tags` 字段同结构，按语言键返回标签数组 |
 | `results[].enriched_attributes` | array | 与 ES `enriched_attributes` nested 字段同结构，每项为 `{ "name", "value": { "zh"?: "...", "en"?: "..." } }` |
+| `results[].enriched_taxonomy_attributes` | array | 与 ES `enriched_taxonomy_attributes` nested 字段同结构。每项通常为 `{ "name", "value": { "zh"?: [...], "en"?: [...] } }` |
 | `results[].error` | string | 若该条处理失败（如 LLM 异常），会在此字段返回错误信息 |
 **错误响应**:
@@ -756,10 +797,12 @@ curl -X POST &quot;http://127.0.0.1:6004/indexer/build-docs-from-db&quot; \
 #### 请求示例
 ```bash
-curl -X POST "http://localhost:6004/indexer/enrich-content" \
+curl -X POST "http://localhost:6001/indexer/enrich-content" \
   -H "Content-Type: application/json" \
   -d '{
-    "tenant_id": "170",
+    "tenant_id": "163",
+    "enrichment_scopes": ["generic", "category_taxonomy"],
+    "category_taxonomy_profile": "apparel",
     "items": [
       {
         "spu_id": "223167",
@@ -773,4 +816,3 @@ curl -X POST &quot;http://localhost:6004/indexer/enrich-content&quot; \
 ```
 ---
-
@@ -444,7 +444,7 @@ curl &quot;http://localhost:6006/health&quot;
 - **Base URL**: Indexer 服务地址，如 `http://localhost:6004`
 - **路径**: `POST /indexer/enrich-content`
-- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`tags`，用于拼装 ES 文档。内部使用大模型（需配置 `DASHSCOPE_API_KEY`），支持多语言与 Redis 缓存；单次最多 50 条，建议批量调用以提升效率。
+- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`，用于拼装 ES 文档。支持通过 `enrichment_scopes` 选择执行 `generic` / `category_taxonomy`，并通过 `category_taxonomy_profile` 选择对应大类的 taxonomy prompt/profile；默认执行 `generic + category_taxonomy(apparel)`。当前支持的 taxonomy profile 包括 `apparel`、`3c`、`bags`、`pet_supplies`、`electronics`、`outdoor`、`home_appliances`、`home_living`、`wigs`、`beauty`、`accessories`、`toys`、`shoes`、`sports`、`others`。所有 profile 的 taxonomy 输出都统一返回 `zh` + `en`，`category_taxonomy_profile` 只决定字段集合。内部使用大模型（需配置 `DASHSCOPE_API_KEY`），支持多语言与 Redis 缓存；单次最多 50 条，建议批量调用以提升效率。
 请求/响应格式、示例及错误码见 [-05-索引接口（Indexer）](./搜索API对接指南-05-索引接口（Indexer）.md#58-内容理解字段生成接口)。
@@ -4,7 +4,7 @@
 ## 10. 接口级压测脚本
-仓库提供统一压测脚本：`scripts/perf_api_benchmark.py`，用于对以下接口做并发压测：
+仓库提供统一压测脚本：`benchmarks/perf_api_benchmark.py`，用于对以下接口做并发压测：
 - 后端搜索：`POST /search/`
 - 搜索建议：`GET /search/suggestions`
@@ -18,21 +18,21 @@
 ```bash
 # suggest 压测（tenant 162）
-python scripts/perf_api_benchmark.py \
+python benchmarks/perf_api_benchmark.py \
   --scenario backend_suggest \
   --tenant-id 162 \
   --duration 30 \
   --concurrency 50
 # search 压测
-python scripts/perf_api_benchmark.py \
+python benchmarks/perf_api_benchmark.py \
   --scenario backend_search \
   --tenant-id 162 \
   --duration 30 \
   --concurrency 20
 # 全链路压测（search + suggest + embedding + translate + rerank）
-python scripts/perf_api_benchmark.py \
+python benchmarks/perf_api_benchmark.py \
   --scenario all \
   --tenant-id 162 \
   --duration 60 \
@@ -45,17 +45,16 @@ python scripts/perf_api_benchmark.py \
 可通过 `--cases-file` 覆盖默认请求模板。示例文件：
 ```bash
-scripts/perf_cases.json.example
+benchmarks/perf_cases.json.example
 ```
 执行示例：
 ```bash
-python scripts/perf_api_benchmark.py \
+python benchmarks/perf_api_benchmark.py \
   --scenario all \
   --tenant-id 162 \
-  --cases-file scripts/perf_cases.json.example \
+  --cases-file benchmarks/perf_cases.json.example \
   --duration 60 \
   --concurrency 40
 ```
-
@@ -330,7 +330,7 @@ python -m pytest -q tests/test_rerank_client.py tests/test_es_query_builder.py t
 ./scripts/service_ctl.sh restart backend
 sleep 3
 ./scripts/service_ctl.sh status backend
-./scripts/evaluation/start_eval.sh.sh batch
+./scripts/evaluation/start_eval.sh batch
 ```
 评估产物在 `artifacts/search_evaluation/`（如 `search_eval.sqlite3`、`batch_reports/` 下的 JSON/Markdown）。流程与参数说明见 [scripts/evaluation/README.md](../scripts/evaluation/README.md)。
@@ -895,4 +895,3 @@ rerank_score:0.4784
 rerank_score:0.5849
     "zh": "新款女士修身仿旧牛仔短裤 – 休闲性感磨边水洗牛仔短裤，时尚舒",
     "en": "New Women's Slim-fit Vintage Washed Denim Shorts – Casual Sexy Frayed Hem, Fashionable & Comfortable"
-
@@ -196,18 +196,25 @@ services:
 - 配置项：
   - `ANCHOR_CACHE_PREFIX = REDIS_CONFIG.get("anchor_cache_prefix", "product_anchors")`
   - `ANCHOR_CACHE_EXPIRE_DAYS = int(REDIS_CONFIG.get("anchor_cache_expire_days", 30))`
-- Key 构造函数：`_make_anchor_cache_key(title, target_lang, tenant_id)`
+- Key 构造函数：`_make_analysis_cache_key(product, target_lang, analysis_kind)`
 - 模板：
 ```text
-{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}
+{ANCHOR_CACHE_PREFIX}:{analysis_kind}:{prompt_contract_hash}:{target_lang}:{prompt_input_prefix}{md5(prompt_input)}
 ```
 - 字段说明：
   - `ANCHOR_CACHE_PREFIX`：默认 `"product_anchors"`，可通过 `.env` 中的 `REDIS_ANCHOR_CACHE_PREFIX`（若存在）间接配置到 `REDIS_CONFIG`；
-  - `tenant_or_global`：`tenant_id` 去空白后的字符串，若为空则使用 `"global"`；
+  - `analysis_kind`：分析族，目前至少包括 `content` 与 `taxonomy`，两者缓存隔离；
+  - `prompt_contract_hash`：基于 system prompt、shared instruction、localized headers、result fields、user instruction template、schema cache version 等生成的短 hash；只要提示词或输出契约变化，缓存会自动失效；
   - `target_lang`：内容理解输出语言，例如 `zh`；
-  - `md5(title)`：对原始商品标题（UTF-8）做 MD5。
+  - `prompt_input_prefix + md5(prompt_input)`：对真正送入 prompt 的商品文本做前缀 + MD5；当前 prompt 输入来自 `title`、`brief`、`description` 的规范化拼接结果。
+
+设计原则：
+
+- 只让**实际影响 LLM 输出**的输入参与 key；
+- 不让 `tenant_id`、`spu_id` 这类“结果归属信息”污染缓存；
+- prompt 或 schema 变更时，不依赖人工清理 Redis，也能自然切换到新 key。
 ### 4.2 Value 与类型
@@ -229,6 +236,7 @@ services:
 ```
 - 读取时通过 `json.loads(raw)` 还原为 `Dict[str, Any]`。
+- `content` 与 `taxonomy` 的 value 结构会随各自 schema 不同而不同，但都会先通过统一的 normalize 逻辑再写缓存。
 ### 4.3 过期策略
@@ -98,10 +98,10 @@
 ### 性能与压测（沿用仓库脚本）
-- 接口级压测（与 `perf_reports/2026-03-12/matrix_report/` 等方法一致）：`scripts/perf_api_benchmark.py`
-  - 示例：`python scripts/perf_api_benchmark.py --scenario embed_text --duration 30 --concurrency 20`
+- 接口级压测（与 `perf_reports/2026-03-12/matrix_report/` 等方法一致）：`benchmarks/perf_api_benchmark.py`
+  - 示例：`python benchmarks/perf_api_benchmark.py --scenario embed_text --duration 30 --concurrency 20`
   - 文本/图片向量可带 `priority`（与线上 admission 语义一致）：`--embed-text-priority 1`、`--embed-image-priority 1`
-  - 自定义请求模板：`--cases-file scripts/perf_cases.json.example`
+  - 自定义请求模板：`--cases-file benchmarks/perf_cases.json.example`
 - 历史矩阵结果与说明见 `perf_reports/2026-03-12/matrix_report/summary.md`。
 ### 启动服务
@@ -316,7 +316,10 @@ async function performSearch(page = 1) {
     document.getElementById('productGrid').innerHTML = '';
     try {
-        const response = await fetch(`${API_BASE_URL}/search/`, {
+        const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin);
+        searchUrl.searchParams.set('tenant_id', tenantId);
+
+        const response = await fetch(searchUrl.toString(), {
             method: 'POST',
             headers: {
                 'Content-Type': 'application/json',
@@ -8,7 +8,7 @@
 ### 1.1 系统角色划分
-- **Java 索引程序（/home/tw/saas-server）**
+- **Java 索引程序**
   - 负责“**什么时候、对哪些 SPU 做索引**”（调度 & 触发）。
   - 负责**商品/店铺/类目等基础数据同步**（写 MySQL）。
   - 负责**多租户环境下的全量/增量索引调度**，但不再关心具体 doc 字段细节。
@@ -0,0 +1 @@
+taxonomy
 \ No newline at end of file
@@ -242,6 +242,7 @@ class SPUDocumentTransformer:
         - qanchors.{lang}
         - enriched_tags.{lang}
         - enriched_attributes[].value.{lang}
+        - enriched_taxonomy_attributes[].value.{lang}
         设计目标：
         - 尽可能攒批调用 LLM；
@@ -273,7 +274,12 @@ class SPUDocumentTransformer:
         tenant_id = str(docs[0].get("tenant_id") or "").strip() or None
         try:
-            results = build_index_content_fields(items=items, tenant_id=tenant_id)
+            # TODO: 从数据库读取该 tenant 的真实行业，并据此替换当前默认的 apparel profile。
+            results = build_index_content_fields(
+                items=items,
+                tenant_id=tenant_id,
+                category_taxonomy_profile="apparel",
+            )
         except Exception as e:
             logger.warning("LLM batch attribute fill failed: %s", e)
             return
@@ -296,6 +302,8 @@ class SPUDocumentTransformer:
                 doc["enriched_tags"] = enrichment["enriched_tags"]
             if enrichment.get("enriched_attributes"):
                 doc["enriched_attributes"] = enrichment["enriched_attributes"]
+            if enrichment.get("enriched_taxonomy_attributes"):
+                doc["enriched_taxonomy_attributes"] = enrichment["enriched_taxonomy_attributes"]
         except Exception as e:
             logger.warning("Failed to apply enrichment to doc (spu_id=%s): %s", doc.get("spu_id"), e)
@@ -666,6 +674,7 @@ class SPUDocumentTransformer:
         tenant_id = doc.get("tenant_id")
         try:
+            # TODO: 从数据库读取该 tenant 的真实行业，并据此替换当前默认的 apparel profile。
             results = build_index_content_fields(
                 items=[
                     {
@@ -677,6 +686,7 @@ class SPUDocumentTransformer:
                     }
                 ],
                 tenant_id=str(tenant_id),
+                category_taxonomy_profile="apparel",
             )
         except Exception as e:
             logger.warning("LLM attribute fill failed for SPU %s: %s", spu_id, e)
@@ -14,10 +14,11 @@ import time
 import hashlib
 import uuid
 import threading
+from dataclasses import dataclass, field
 from collections import OrderedDict
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Dict, Tuple, Any, Optional
+from typing import List, Dict, Tuple, Any, Optional, FrozenSet
 import redis
 import requests
@@ -30,6 +31,7 @@ from indexer.product_enrich_prompts import (
     USER_INSTRUCTION_TEMPLATE,
     LANGUAGE_MARKDOWN_TABLE_HEADERS,
     SHARED_ANALYSIS_INSTRUCTION,
+    CATEGORY_TAXONOMY_PROFILES,
 )
 # 配置
@@ -144,10 +146,26 @@ if _missing_prompt_langs:
     )
-# 多值字段分隔：英文逗号、中文逗号、顿号，及历史约定的 ; | / 与空白
+# 多值字段分隔
 _MULTI_VALUE_FIELD_SPLIT_RE = re.compile(r"[，、,;|/\n\t]+")
+# 表格单元格中视为「无内容」的占位
+_MARKDOWN_EMPTY_CELL_LITERALS: Tuple[str, ...] = ("-","–", "—", "none", "null", "n/a", "无")
+_MARKDOWN_EMPTY_CELL_TOKENS_CF: FrozenSet[str] = frozenset(
+    lit.casefold() for lit in _MARKDOWN_EMPTY_CELL_LITERALS
+)
+
+def _normalize_markdown_table_cell(raw: Optional[str]) -> str:
+    """strip；将占位符统一视为空字符串。"""
+    s = str(raw or "").strip()
+    if not s:
+        return ""
+    if s.casefold() in _MARKDOWN_EMPTY_CELL_TOKENS_CF:
+        return ""
+    return s
 _CORE_INDEX_LANGUAGES = ("zh", "en")
-_ANALYSIS_ATTRIBUTE_FIELD_MAP = (
+_DEFAULT_ENRICHMENT_SCOPES = ("generic", "category_taxonomy")
+_DEFAULT_CATEGORY_TAXONOMY_PROFILE = "apparel"
+_CONTENT_ANALYSIS_ATTRIBUTE_FIELD_MAP = (
     ("tags", "enriched_tags"),
     ("target_audience", "target_audience"),
     ("usage_scene", "usage_scene"),
@@ -156,7 +174,7 @@ _ANALYSIS_ATTRIBUTE_FIELD_MAP = (
     ("material", "material"),
     ("features", "features"),
 )
-_ANALYSIS_RESULT_FIELDS = (
+_CONTENT_ANALYSIS_RESULT_FIELDS = (
     "title",
     "category_path",
     "tags",
@@ -168,7 +186,7 @@ _ANALYSIS_RESULT_FIELDS = (
     "features",
     "anchor_text",
 )
-_ANALYSIS_MEANINGFUL_FIELDS = (
+_CONTENT_ANALYSIS_MEANINGFUL_FIELDS = (
     "tags",
     "target_audience",
     "usage_scene",
@@ -178,9 +196,111 @@ _ANALYSIS_MEANINGFUL_FIELDS = (
     "features",
     "anchor_text",
 )
-_ANALYSIS_FIELD_ALIASES = {
+_CONTENT_ANALYSIS_FIELD_ALIASES = {
     "tags": ("tags", "enriched_tags"),
 }
+_CONTENT_ANALYSIS_QUALITY_FIELDS = ("title", "category_path", "anchor_text")
+
+
+@dataclass(frozen=True)
+class AnalysisSchema:
+    name: str
+    shared_instruction: str
+    markdown_table_headers: Dict[str, List[str]]
+    result_fields: Tuple[str, ...]
+    meaningful_fields: Tuple[str, ...]
+    cache_version: str = "v1"
+    field_aliases: Dict[str, Tuple[str, ...]] = field(default_factory=dict)
+    quality_fields: Tuple[str, ...] = ()
+
+    def get_headers(self, target_lang: str) -> Optional[List[str]]:
+        return self.markdown_table_headers.get(target_lang)
+
+
+_ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = {
+    "content": AnalysisSchema(
+        name="content",
+        shared_instruction=SHARED_ANALYSIS_INSTRUCTION,
+        markdown_table_headers=LANGUAGE_MARKDOWN_TABLE_HEADERS,
+        result_fields=_CONTENT_ANALYSIS_RESULT_FIELDS,
+        meaningful_fields=_CONTENT_ANALYSIS_MEANINGFUL_FIELDS,
+        cache_version="v2",
+        field_aliases=_CONTENT_ANALYSIS_FIELD_ALIASES,
+        quality_fields=_CONTENT_ANALYSIS_QUALITY_FIELDS,
+    ),
+}
+
+def _build_taxonomy_profile_schema(profile: str, config: Dict[str, Any]) -> AnalysisSchema:
+    return AnalysisSchema(
+        name=f"taxonomy:{profile}",
+        shared_instruction=config["shared_instruction"],
+        markdown_table_headers=config["markdown_table_headers"],
+        result_fields=tuple(field["key"] for field in config["fields"]),
+        meaningful_fields=tuple(field["key"] for field in config["fields"]),
+        cache_version="v1",
+    )
+
+
+_CATEGORY_TAXONOMY_PROFILE_SCHEMAS: Dict[str, AnalysisSchema] = {
+    profile: _build_taxonomy_profile_schema(profile, config)
+    for profile, config in CATEGORY_TAXONOMY_PROFILES.items()
+}
+
+_CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS: Dict[str, Tuple[Tuple[str, str], ...]] = {
+    profile: tuple((field["key"], field["label"]) for field in config["fields"])
+    for profile, config in CATEGORY_TAXONOMY_PROFILES.items()
+}
+
+
+def get_supported_category_taxonomy_profiles() -> Tuple[str, ...]:
+    return tuple(_CATEGORY_TAXONOMY_PROFILE_SCHEMAS.keys())
+
+
+def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str] = None) -> str:
+    profile = str(category_taxonomy_profile or _DEFAULT_CATEGORY_TAXONOMY_PROFILE).strip()
+    if profile not in _CATEGORY_TAXONOMY_PROFILE_SCHEMAS:
+        supported = ", ".join(get_supported_category_taxonomy_profiles())
+        raise ValueError(
+            f"Unsupported category_taxonomy_profile: {profile}. Supported profiles: {supported}"
+        )
+    return profile
+
+
+def _get_analysis_schema(
+    analysis_kind: str,
+    *,
+    category_taxonomy_profile: Optional[str] = None,
+) -> AnalysisSchema:
+    if analysis_kind == "content":
+        return _ANALYSIS_SCHEMAS["content"]
+    if analysis_kind == "taxonomy":
+        profile = _normalize_category_taxonomy_profile(category_taxonomy_profile)
+        return _CATEGORY_TAXONOMY_PROFILE_SCHEMAS[profile]
+    raise ValueError(f"Unsupported analysis_kind: {analysis_kind}")
+
+
+def _get_taxonomy_attribute_field_map(
+    category_taxonomy_profile: Optional[str] = None,
+) -> Tuple[Tuple[str, str], ...]:
+    profile = _normalize_category_taxonomy_profile(category_taxonomy_profile)
+    return _CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS[profile]
+
+
+def _normalize_enrichment_scopes(
+    enrichment_scopes: Optional[List[str]] = None,
+) -> Tuple[str, ...]:
+    requested = _DEFAULT_ENRICHMENT_SCOPES if not enrichment_scopes else tuple(enrichment_scopes)
+    normalized: List[str] = []
+    seen = set()
+    for enrichment_scope in requested:
+        scope = str(enrichment_scope).strip()
+        if scope not in {"generic", "category_taxonomy"}:
+            raise ValueError(f"Unsupported enrichment_scope: {scope}")
+        if scope in seen:
+            continue
+        seen.add(scope)
+        normalized.append(scope)
+    return tuple(normalized)
 def split_multi_value_field(text: Optional[str]) -> List[str]:
@@ -235,12 +355,12 @@ def _get_product_id(product: Dict[str, Any]) -&gt; str:
     return str(product.get("id") or product.get("spu_id") or "").strip()
-def _get_analysis_field_aliases(field_name: str) -> Tuple[str, ...]:
-    return _ANALYSIS_FIELD_ALIASES.get(field_name, (field_name,))
+def _get_analysis_field_aliases(field_name: str, schema: AnalysisSchema) -> Tuple[str, ...]:
+    return schema.field_aliases.get(field_name, (field_name,))
-def _get_analysis_field_value(row: Dict[str, Any], field_name: str) -> Any:
-    for alias in _get_analysis_field_aliases(field_name):
+def _get_analysis_field_value(row: Dict[str, Any], field_name: str, schema: AnalysisSchema) -> Any:
+    for alias in _get_analysis_field_aliases(field_name, schema):
         if alias in row:
             return row.get(alias)
     return None
@@ -261,6 +381,7 @@ def _has_meaningful_value(value: Any) -&gt; bool:
 def _make_empty_analysis_result(
     product: Dict[str, Any],
     target_lang: str,
+    schema: AnalysisSchema,
     error: Optional[str] = None,
 ) -> Dict[str, Any]:
     result = {
@@ -268,7 +389,7 @@ def _make_empty_analysis_result(
         "lang": target_lang,
         "title_input": str(product.get("title") or "").strip(),
     }
-    for field in _ANALYSIS_RESULT_FIELDS:
+    for field in schema.result_fields:
         result[field] = ""
     if error:
         result["error"] = error
@@ -279,42 +400,59 @@ def _normalize_analysis_result(
     result: Dict[str, Any],
     product: Dict[str, Any],
     target_lang: str,
+    schema: AnalysisSchema,
 ) -> Dict[str, Any]:
-    normalized = _make_empty_analysis_result(product, target_lang)
+    normalized = _make_empty_analysis_result(product, target_lang, schema)
     if not isinstance(result, dict):
         return normalized
     normalized["lang"] = str(result.get("lang") or target_lang).strip() or target_lang
-    normalized["title"] = str(result.get("title") or "").strip()
-    normalized["category_path"] = str(result.get("category_path") or "").strip()
     normalized["title_input"] = str(
         product.get("title") or result.get("title_input") or ""
     ).strip()
-    for field in _ANALYSIS_RESULT_FIELDS:
-        if field in {"title", "category_path"}:
-            continue
-        normalized[field] = str(_get_analysis_field_value(result, field) or "").strip()
+    for field in schema.result_fields:
+        normalized[field] = str(_get_analysis_field_value(result, field, schema) or "").strip()
     if result.get("error"):
         normalized["error"] = str(result.get("error"))
     return normalized
-def _has_meaningful_analysis_content(result: Dict[str, Any]) -> bool:
-    return any(_has_meaningful_value(result.get(field)) for field in _ANALYSIS_MEANINGFUL_FIELDS)
+def _has_meaningful_analysis_content(result: Dict[str, Any], schema: AnalysisSchema) -> bool:
+    return any(_has_meaningful_value(result.get(field)) for field in schema.meaningful_fields)
+
+
+def _append_analysis_attributes(
+    target: List[Dict[str, Any]],
+    row: Dict[str, Any],
+    lang: str,
+    schema: AnalysisSchema,
+    field_map: Tuple[Tuple[str, str], ...],
+) -> None:
+    for source_name, output_name in field_map:
+        raw = _get_analysis_field_value(row, source_name, schema)
+        if not raw:
+            continue
+        _append_named_lang_phrase_map(
+            target,
+            name=output_name,
+            lang=lang,
+            raw_value=raw,
+        )
 def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: str) -> None:
     if not row or row.get("error"):
         return
-    anchor_text = str(_get_analysis_field_value(row, "anchor_text") or "").strip()
+    content_schema = _get_analysis_schema("content")
+    anchor_text = str(_get_analysis_field_value(row, "anchor_text", content_schema) or "").strip()
     if anchor_text:
         _append_lang_phrase_map(result["qanchors"], lang=lang, raw_value=anchor_text)
-    for source_name, output_name in _ANALYSIS_ATTRIBUTE_FIELD_MAP:
-        raw = _get_analysis_field_value(row, source_name)
+    for source_name, output_name in _CONTENT_ANALYSIS_ATTRIBUTE_FIELD_MAP:
+        raw = _get_analysis_field_value(row, source_name, content_schema)
         if not raw:
             continue
         _append_named_lang_phrase_map(
@@ -327,6 +465,28 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: 
             _append_lang_phrase_map(result["enriched_tags"], lang=lang, raw_value=raw)
+def _apply_index_taxonomy_row(
+    result: Dict[str, Any],
+    row: Dict[str, Any],
+    lang: str,
+    *,
+    category_taxonomy_profile: Optional[str] = None,
+) -> None:
+    if not row or row.get("error"):
+        return
+
+    _append_analysis_attributes(
+        result["enriched_taxonomy_attributes"],
+        row=row,
+        lang=lang,
+        schema=_get_analysis_schema(
+            "taxonomy",
+            category_taxonomy_profile=category_taxonomy_profile,
+        ),
+        field_map=_get_taxonomy_attribute_field_map(category_taxonomy_profile),
+    )
+
+
 def _normalize_index_content_item(item: Dict[str, Any]) -> Dict[str, str]:
     item_id = _get_product_id(item)
     return {
@@ -341,6 +501,8 @@ def _normalize_index_content_item(item: Dict[str, Any]) -&gt; Dict[str, str]:
 def build_index_content_fields(
     items: List[Dict[str, Any]],
     tenant_id: Optional[str] = None,
+    enrichment_scopes: Optional[List[str]] = None,
+    category_taxonomy_profile: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """
     高层入口：生成与 ES mapping 对齐的内容理解字段。
@@ -349,18 +511,23 @@ def build_index_content_fields(
     - `id` 或 `spu_id`
     - `title`
     - 可选 `brief` / `description` / `image_url`
+    - 可选 `enrichment_scopes`，默认同时执行 `generic` 与 `category_taxonomy`
+    - 可选 `category_taxonomy_profile`，默认 `apparel`
     返回项结构：
     - `id`
     - `qanchors`
     - `enriched_tags`
     - `enriched_attributes`
+    - `enriched_taxonomy_attributes`
     - 可选 `error`
     其中：
     - `qanchors.{lang}` 为短语数组
     - `enriched_tags.{lang}` 为标签数组
     """
+    requested_enrichment_scopes = _normalize_enrichment_scopes(enrichment_scopes)
+    normalized_taxonomy_profile = _normalize_category_taxonomy_profile(category_taxonomy_profile)
     normalized_items = [_normalize_index_content_item(item) for item in items]
     if not normalized_items:
         return []
@@ -371,32 +538,72 @@ def build_index_content_fields(
             "qanchors": {},
             "enriched_tags": {},
             "enriched_attributes": [],
+            "enriched_taxonomy_attributes": [],
         }
         for item in normalized_items
     }
     for lang in _CORE_INDEX_LANGUAGES:
-        try:
-            rows = analyze_products(
-                products=normalized_items,
-                target_lang=lang,
-                batch_size=BATCH_SIZE,
-                tenant_id=tenant_id,
-            )
-        except Exception as e:
-            logger.warning("build_index_content_fields failed for lang=%s: %s", lang, e)
-            for item in normalized_items:
-                results_by_id[item["id"]].setdefault("error", str(e))
-            continue
-
-        for row in rows or []:
-            item_id = str(row.get("id") or "").strip()
-            if not item_id or item_id not in results_by_id:
+        if "generic" in requested_enrichment_scopes:
+            try:
+                rows = analyze_products(
+                    products=normalized_items,
+                    target_lang=lang,
+                    batch_size=BATCH_SIZE,
+                    tenant_id=tenant_id,
+                    analysis_kind="content",
+                    category_taxonomy_profile=normalized_taxonomy_profile,
+                )
+            except Exception as e:
+                logger.warning("build_index_content_fields content enrichment failed for lang=%s: %s", lang, e)
+                for item in normalized_items:
+                    results_by_id[item["id"]].setdefault("error", str(e))
                 continue
-            if row.get("error"):
-                results_by_id[item_id].setdefault("error", row["error"])
+
+            for row in rows or []:
+                item_id = str(row.get("id") or "").strip()
+                if not item_id or item_id not in results_by_id:
+                    continue
+                if row.get("error"):
+                    results_by_id[item_id].setdefault("error", row["error"])
+                    continue
+                _apply_index_content_row(results_by_id[item_id], row=row, lang=lang)
+
+    if "category_taxonomy" in requested_enrichment_scopes:
+        for lang in _CORE_INDEX_LANGUAGES:
+            try:
+                taxonomy_rows = analyze_products(
+                    products=normalized_items,
+                    target_lang=lang,
+                    batch_size=BATCH_SIZE,
+                    tenant_id=tenant_id,
+                    analysis_kind="taxonomy",
+                    category_taxonomy_profile=normalized_taxonomy_profile,
+                )
+            except Exception as e:
+                logger.warning(
+                    "build_index_content_fields taxonomy enrichment failed for profile=%s lang=%s: %s",
+                    normalized_taxonomy_profile,
+                    lang,
+                    e,
+                )
+                for item in normalized_items:
+                    results_by_id[item["id"]].setdefault("error", str(e))
                 continue
-            _apply_index_content_row(results_by_id[item_id], row=row, lang=lang)
+
+            for row in taxonomy_rows or []:
+                item_id = str(row.get("id") or "").strip()
+                if not item_id or item_id not in results_by_id:
+                    continue
+                if row.get("error"):
+                    results_by_id[item_id].setdefault("error", row["error"])
+                    continue
+                _apply_index_taxonomy_row(
+                    results_by_id[item_id],
+                    row=row,
+                    lang=lang,
+                    category_taxonomy_profile=normalized_taxonomy_profile,
+                )
     return [results_by_id[item["id"]] for item in normalized_items]
@@ -463,52 +670,129 @@ def _build_prompt_input_text(product: Dict[str, Any]) -&gt; str:
     return _truncate_by_words(candidate, PROMPT_INPUT_MAX_WORDS)
-def _make_anchor_cache_key(
+def _make_analysis_cache_key(
     product: Dict[str, Any],
     target_lang: str,
+    analysis_kind: str,
+    category_taxonomy_profile: Optional[str] = None,
 ) -> str:
-    """构造缓存 key，仅由 prompt 实际输入文本内容 + 目标语言决定。"""
+    """构造缓存 key，仅由分析类型、prompt 实际输入文本内容与目标语言决定。"""
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     prompt_input = _build_prompt_input_text(product)
     h = hashlib.md5(prompt_input.encode("utf-8")).hexdigest()
-    return f"{ANCHOR_CACHE_PREFIX}:{target_lang}:{prompt_input[:4]}{h}"
+    prompt_contract = {
+        "schema_name": schema.name,
+        "cache_version": schema.cache_version,
+        "system_message": SYSTEM_MESSAGE,
+        "user_instruction_template": USER_INSTRUCTION_TEMPLATE,
+        "shared_instruction": schema.shared_instruction,
+        "assistant_headers": schema.get_headers(target_lang),
+        "result_fields": schema.result_fields,
+        "meaningful_fields": schema.meaningful_fields,
+        "field_aliases": schema.field_aliases,
+    }
+    prompt_contract_hash = hashlib.md5(
+        json.dumps(prompt_contract, ensure_ascii=False, sort_keys=True).encode("utf-8")
+    ).hexdigest()[:12]
+    return (
+        f"{ANCHOR_CACHE_PREFIX}:{analysis_kind}:{prompt_contract_hash}:"
+        f"{target_lang}:{prompt_input[:4]}{h}"
+    )
-def _get_cached_anchor_result(
+def _make_anchor_cache_key(
     product: Dict[str, Any],
     target_lang: str,
+) -> str:
+    return _make_analysis_cache_key(product, target_lang, analysis_kind="content")
+
+
+def _get_cached_analysis_result(
+    product: Dict[str, Any],
+    target_lang: str,
+    analysis_kind: str,
+    category_taxonomy_profile: Optional[str] = None,
 ) -> Optional[Dict[str, Any]]:
     if not _anchor_redis:
         return None
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     try:
-        key = _make_anchor_cache_key(product, target_lang)
+        key = _make_analysis_cache_key(
+            product,
+            target_lang,
+            analysis_kind,
+            category_taxonomy_profile=category_taxonomy_profile,
+        )
         raw = _anchor_redis.get(key)
         if not raw:
             return None
-        result = _normalize_analysis_result(json.loads(raw), product=product, target_lang=target_lang)
-        if not _has_meaningful_analysis_content(result):
+        result = _normalize_analysis_result(
+            json.loads(raw),
+            product=product,
+            target_lang=target_lang,
+            schema=schema,
+        )
+        if not _has_meaningful_analysis_content(result, schema):
             return None
         return result
     except Exception as e:
-        logger.warning(f"Failed to get anchor cache: {e}")
+        logger.warning("Failed to get %s analysis cache: %s", analysis_kind, e)
         return None
-def _set_cached_anchor_result(
+def _get_cached_anchor_result(
+    product: Dict[str, Any],
+    target_lang: str,
+) -> Optional[Dict[str, Any]]:
+    return _get_cached_analysis_result(product, target_lang, analysis_kind="content")
+
+
+def _set_cached_analysis_result(
     product: Dict[str, Any],
     target_lang: str,
     result: Dict[str, Any],
+    analysis_kind: str,
+    category_taxonomy_profile: Optional[str] = None,
 ) -> None:
     if not _anchor_redis:
         return
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     try:
-        normalized = _normalize_analysis_result(result, product=product, target_lang=target_lang)
-        if not _has_meaningful_analysis_content(normalized):
+        normalized = _normalize_analysis_result(
+            result,
+            product=product,
+            target_lang=target_lang,
+            schema=schema,
+        )
+        if not _has_meaningful_analysis_content(normalized, schema):
             return
-        key = _make_anchor_cache_key(product, target_lang)
+        key = _make_analysis_cache_key(
+            product,
+            target_lang,
+            analysis_kind,
+            category_taxonomy_profile=category_taxonomy_profile,
+        )
         ttl = ANCHOR_CACHE_EXPIRE_DAYS * 24 * 3600
         _anchor_redis.setex(key, ttl, json.dumps(normalized, ensure_ascii=False))
     except Exception as e:
-        logger.warning(f"Failed to set anchor cache: {e}")
+        logger.warning("Failed to set %s analysis cache: %s", analysis_kind, e)
+
+
+def _set_cached_anchor_result(
+    product: Dict[str, Any],
+    target_lang: str,
+    result: Dict[str, Any],
+) -> None:
+    _set_cached_analysis_result(product, target_lang, result, analysis_kind="content")
 def _build_assistant_prefix(headers: List[str]) -> str:
@@ -517,8 +801,8 @@ def _build_assistant_prefix(headers: List[str]) -&gt; str:
     return f"{header_line}\n{separator_line}\n"
-def _build_shared_context(products: List[Dict[str, str]]) -> str:
-    shared_context = SHARED_ANALYSIS_INSTRUCTION
+def _build_shared_context(products: List[Dict[str, str]], schema: AnalysisSchema) -> str:
+    shared_context = schema.shared_instruction
     for idx, product in enumerate(products, 1):
         prompt_input = _build_prompt_input_text(product)
         shared_context += f"{idx}. {prompt_input}\n"
@@ -550,16 +834,23 @@ def reset_logged_shared_context_keys() -&gt; None:
 def create_prompt(
     products: List[Dict[str, str]],
     target_lang: str = "zh",
-) -> Tuple[str, str, str]:
+    analysis_kind: str = "content",
+    category_taxonomy_profile: Optional[str] = None,
+) -> Tuple[Optional[str], Optional[str], Optional[str]]:
     """根据目标语言创建共享上下文、本地化输出要求和 Partial Mode assistant 前缀。"""
-    markdown_table_headers = LANGUAGE_MARKDOWN_TABLE_HEADERS.get(target_lang)
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
+    markdown_table_headers = schema.get_headers(target_lang)
     if not markdown_table_headers:
         logger.warning(
-            "Unsupported target_lang for markdown table headers: %s",
+            "Unsupported target_lang for markdown table headers: kind=%s lang=%s",
+            analysis_kind,
             target_lang,
         )
         return None, None, None
-    shared_context = _build_shared_context(products)
+    shared_context = _build_shared_context(products, schema)
     language_label = SOURCE_LANG_CODE_MAP.get(target_lang, target_lang)
     user_prompt = USER_INSTRUCTION_TEMPLATE.format(language=language_label).strip()
     assistant_prefix = _build_assistant_prefix(markdown_table_headers)
@@ -592,6 +883,7 @@ def call_llm(
     user_prompt: str,
     assistant_prefix: str,
     target_lang: str = "zh",
+    analysis_kind: str = "content",
 ) -> Tuple[str, str]:
     """调用大模型 API（带重试机制），使用 Partial Mode 强制 markdown 表格前缀。"""
     headers = {
@@ -631,8 +923,9 @@ def call_llm(
     if _mark_shared_context_logged_once(shared_context_key):
         logger.info(f"\n{'=' * 80}")
         logger.info(
-            "LLM Shared Context [model=%s, shared_key=%s, chars=%s] (logged once per process key)",
+            "LLM Shared Context [model=%s, kind=%s, shared_key=%s, chars=%s] (logged once per process key)",
             MODEL_NAME,
+            analysis_kind,
             shared_context_key,
             len(shared_context),
         )
@@ -641,8 +934,9 @@ def call_llm(
     verbose_logger.info(f"\n{'=' * 80}")
     verbose_logger.info(
-        "LLM Request [model=%s, lang=%s, shared_key=%s, tail_key=%s]:",
+        "LLM Request [model=%s, kind=%s, lang=%s, shared_key=%s, tail_key=%s]:",
         MODEL_NAME,
+        analysis_kind,
         target_lang,
         shared_context_key,
         localized_tail_key,
@@ -654,7 +948,8 @@ def call_llm(
     verbose_logger.info(f"\nAssistant Prefix:\n{assistant_prefix}")
     logger.info(
-        "\nLLM Request Variant [lang=%s, shared_key=%s, tail_key=%s, prompt_chars=%s, prefix_chars=%s]",
+        "\nLLM Request Variant [kind=%s, lang=%s, shared_key=%s, tail_key=%s, prompt_chars=%s, prefix_chars=%s]",
+        analysis_kind,
         target_lang,
         shared_context_key,
         localized_tail_key,
@@ -685,8 +980,9 @@ def call_llm(
                 usage = result.get("usage") or {}
                 verbose_logger.info(
-                    "\nLLM Response [model=%s, lang=%s, shared_key=%s, tail_key=%s]:",
+                    "\nLLM Response [model=%s, kind=%s, lang=%s, shared_key=%s, tail_key=%s]:",
                     MODEL_NAME,
+                    analysis_kind,
                     target_lang,
                     shared_context_key,
                     localized_tail_key,
@@ -697,7 +993,8 @@ def call_llm(
                 full_markdown = _merge_partial_response(assistant_prefix, generated_content)
                 logger.info(
-                    "\nLLM Response Summary [lang=%s, shared_key=%s, tail_key=%s, generated_chars=%s, completion_tokens=%s, prompt_tokens=%s, total_tokens=%s]",
+                    "\nLLM Response Summary [kind=%s, lang=%s, shared_key=%s, tail_key=%s, generated_chars=%s, completion_tokens=%s, prompt_tokens=%s, total_tokens=%s]",
+                    analysis_kind,
                     target_lang,
                     shared_context_key,
                     localized_tail_key,
@@ -742,8 +1039,16 @@ def call_llm(
         session.close()
-def parse_markdown_table(markdown_content: str) -> List[Dict[str, str]]:
+def parse_markdown_table(
+    markdown_content: str,
+    analysis_kind: str = "content",
+    category_taxonomy_profile: Optional[str] = None,
+) -> List[Dict[str, str]]:
     """解析markdown表格内容"""
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     lines = markdown_content.strip().split("\n")
     data = []
     data_started = False
@@ -768,22 +1073,16 @@ def parse_markdown_table(markdown_content: str) -&gt; List[Dict[str, str]]:
             # 解析数据行
             parts = [p.strip() for p in line.split("|")]
-            parts = [p for p in parts if p]  # 移除空字符串
+            if parts and parts[0] == "":
+                parts = parts[1:]
+            if parts and parts[-1] == "":
+                parts = parts[:-1]
             if len(parts) >= 2:
-                row = {
-                    "seq_no": parts[0],
-                    "title": parts[1],  # 商品标题（按目标语言）
-                    "category_path": parts[2] if len(parts) > 2 else "",  # 品类路径
-                    "tags": parts[3] if len(parts) > 3 else "",  # 细分标签
-                    "target_audience": parts[4] if len(parts) > 4 else "",  # 适用人群
-                    "usage_scene": parts[5] if len(parts) > 5 else "",  # 使用场景
-                    "season": parts[6] if len(parts) > 6 else "",  # 适用季节
-                    "key_attributes": parts[7] if len(parts) > 7 else "",  # 关键属性
-                    "material": parts[8] if len(parts) > 8 else "",  # 材质说明
-                    "features": parts[9] if len(parts) > 9 else "",  # 功能特点
-                    "anchor_text": parts[10] if len(parts) > 10 else "",  # 锚文本
-                }
+                row = {"seq_no": parts[0]}
+                for field_index, field_name in enumerate(schema.result_fields, start=1):
+                    cell = parts[field_index] if len(parts) > field_index else ""
+                    row[field_name] = _normalize_markdown_table_cell(cell)
                 data.append(row)
     return data
@@ -794,31 +1093,49 @@ def _log_parsed_result_quality(
     parsed_results: List[Dict[str, str]],
     target_lang: str,
     batch_num: int,
+    analysis_kind: str,
+    category_taxonomy_profile: Optional[str] = None,
 ) -> None:
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     expected = len(batch_data)
     actual = len(parsed_results)
     if actual != expected:
         logger.warning(
-            "Parsed row count mismatch for batch=%s lang=%s: expected=%s actual=%s",
+            "Parsed row count mismatch for kind=%s batch=%s lang=%s: expected=%s actual=%s",
+            analysis_kind,
             batch_num,
             target_lang,
             expected,
             actual,
         )
-    missing_anchor = sum(1 for item in parsed_results if not str(item.get("anchor_text") or "").strip())
-    missing_category = sum(1 for item in parsed_results if not str(item.get("category_path") or "").strip())
-    missing_title = sum(1 for item in parsed_results if not str(item.get("title") or "").strip())
+    if not schema.quality_fields:
+        logger.info(
+            "Parsed Quality Summary [kind=%s, batch=%s, lang=%s]: rows=%s/%s",
+            analysis_kind,
+            batch_num,
+            target_lang,
+            actual,
+            expected,
+        )
+        return
+    missing_summary = ", ".join(
+        f"missing_{field}="
+        f"{sum(1 for item in parsed_results if not str(item.get(field) or '').strip())}"
+        for field in schema.quality_fields
+    )
     logger.info(
-        "Parsed Quality Summary [batch=%s, lang=%s]: rows=%s/%s, missing_title=%s, missing_category=%s, missing_anchor=%s",
+        "Parsed Quality Summary [kind=%s, batch=%s, lang=%s]: rows=%s/%s, %s",
+        analysis_kind,
         batch_num,
         target_lang,
         actual,
         expected,
-        missing_title,
-        missing_category,
-        missing_anchor,
+        missing_summary,
     )
@@ -826,29 +1143,44 @@ def process_batch(
     batch_data: List[Dict[str, str]],
     batch_num: int,
     target_lang: str = "zh",
+    analysis_kind: str = "content",
+    category_taxonomy_profile: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """处理一个批次的数据"""
+    schema = _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     logger.info(f"\n{'#' * 80}")
-    logger.info(f"Processing Batch {batch_num} ({len(batch_data)} items)")
+    logger.info(
+        "Processing Batch %s (%s items, kind=%s)",
+        batch_num,
+        len(batch_data),
+        analysis_kind,
+    )
     # 创建提示词
     shared_context, user_prompt, assistant_prefix = create_prompt(
         batch_data,
         target_lang=target_lang,
+        analysis_kind=analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
     )
     # 如果提示词创建失败（例如不支持的 target_lang），本次批次整体失败，不再继续调用 LLM
     if shared_context is None or user_prompt is None or assistant_prefix is None:
         logger.error(
-            "Failed to create prompt for batch %s, target_lang=%s; "
+            "Failed to create prompt for batch %s, kind=%s, target_lang=%s; "
             "marking entire batch as failed without calling LLM",
             batch_num,
+            analysis_kind,
             target_lang,
         )
         return [
             _make_empty_analysis_result(
                 item,
                 target_lang,
+                schema,
                 error=f"prompt_creation_failed: unsupported target_lang={target_lang}",
             )
             for item in batch_data
@@ -861,11 +1193,23 @@ def process_batch(
             user_prompt,
             assistant_prefix,
             target_lang=target_lang,
+            analysis_kind=analysis_kind,
         )
         # 解析结果
-        parsed_results = parse_markdown_table(raw_response)
-        _log_parsed_result_quality(batch_data, parsed_results, target_lang, batch_num)
+        parsed_results = parse_markdown_table(
+            raw_response,
+            analysis_kind=analysis_kind,
+            category_taxonomy_profile=category_taxonomy_profile,
+        )
+        _log_parsed_result_quality(
+            batch_data,
+            parsed_results,
+            target_lang,
+            batch_num,
+            analysis_kind,
+            category_taxonomy_profile,
+        )
         logger.info(f"\nParsed Results ({len(parsed_results)} items):")
         logger.info(json.dumps(parsed_results, ensure_ascii=False, indent=2))
@@ -879,10 +1223,12 @@ def process_batch(
                     parsed_item,
                     product=source_product,
                     target_lang=target_lang,
+                    schema=schema,
                 )
                 results_with_ids.append(result)
                 logger.info(
-                    "Mapped: seq=%s -> original_id=%s",
+                    "Mapped: kind=%s seq=%s -> original_id=%s",
+                    analysis_kind,
                     parsed_item.get("seq_no"),
                     source_product.get("id"),
                 )
@@ -890,6 +1236,7 @@ def process_batch(
         # 保存批次 JSON 日志到独立文件
         batch_log = {
             "batch_num": batch_num,
+            "analysis_kind": analysis_kind,
             "timestamp": datetime.now().isoformat(),
             "input_products": batch_data,
             "raw_response": raw_response,
@@ -900,7 +1247,10 @@ def process_batch(
         # 并发写 batch json 日志时，保证文件名唯一避免覆盖
         batch_call_id = uuid.uuid4().hex[:12]
-        batch_log_file = LOG_DIR / f"batch_{batch_num:04d}_{timestamp}_{batch_call_id}.json"
+        batch_log_file = (
+            LOG_DIR
+            / f"batch_{analysis_kind}_{batch_num:04d}_{timestamp}_{batch_call_id}.json"
+        )
         with open(batch_log_file, "w", encoding="utf-8") as f:
             json.dump(batch_log, f, ensure_ascii=False, indent=2)
@@ -912,7 +1262,7 @@ def process_batch(
         logger.error(f"Error processing batch {batch_num}: {str(e)}", exc_info=True)
         # 返回空结果，保持ID映射
         return [
-            _make_empty_analysis_result(item, target_lang, error=str(e))
+            _make_empty_analysis_result(item, target_lang, schema, error=str(e))
             for item in batch_data
         ]
@@ -922,6 +1272,8 @@ def analyze_products(
     target_lang: str = "zh",
     batch_size: Optional[int] = None,
     tenant_id: Optional[str] = None,
+    analysis_kind: str = "content",
+    category_taxonomy_profile: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """
     库调用入口：根据输入+语言，返回锚文本及各维度信息。
@@ -937,6 +1289,10 @@ def analyze_products(
     if not products:
         return []
+    _get_analysis_schema(
+        analysis_kind,
+        category_taxonomy_profile=category_taxonomy_profile,
+    )
     results_by_index: List[Optional[Dict[str, Any]]] = [None] * len(products)
     uncached_items: List[Tuple[int, Dict[str, str]]] = []
@@ -946,11 +1302,16 @@ def analyze_products(
             uncached_items.append((idx, product))
             continue
-        cached = _get_cached_anchor_result(product, target_lang)
+        cached = _get_cached_analysis_result(
+            product,
+            target_lang,
+            analysis_kind,
+            category_taxonomy_profile=category_taxonomy_profile,
+        )
         if cached:
             logger.info(
                 f"[analyze_products] Cache hit for title='{title[:50]}...', "
-                f"lang={target_lang}"
+                f"kind={analysis_kind}, lang={target_lang}"
             )
             results_by_index[idx] = cached
             continue
@@ -979,9 +1340,15 @@ def analyze_products(
         for batch_num, batch_slice, batch in batch_jobs:
             logger.info(
                 f"[analyze_products] Processing batch {batch_num}/{total_batches}, "
-                f"size={len(batch)}, target_lang={target_lang}"
+                f"size={len(batch)}, kind={analysis_kind}, target_lang={target_lang}"
+            )
+            batch_results = process_batch(
+                batch,
+                batch_num=batch_num,
+                target_lang=target_lang,
+                analysis_kind=analysis_kind,
+                category_taxonomy_profile=category_taxonomy_profile,
             )
-            batch_results = process_batch(batch, batch_num=batch_num, target_lang=target_lang)
             for (original_idx, product), item in zip(batch_slice, batch_results):
                 results_by_index[original_idx] = item
@@ -992,7 +1359,13 @@ def analyze_products(
                     # 不缓存错误结果，避免放大临时故障
                     continue
                 try:
-                    _set_cached_anchor_result(product, target_lang, item)
+                    _set_cached_analysis_result(
+                        product,
+                        target_lang,
+                        item,
+                        analysis_kind,
+                        category_taxonomy_profile=category_taxonomy_profile,
+                    )
                 except Exception:
                     # 已在内部记录 warning
                     pass
@@ -1000,10 +1373,11 @@ def analyze_products(
         max_workers = min(CONTENT_UNDERSTANDING_MAX_WORKERS, len(batch_jobs))
         logger.info(
             "[analyze_products] Using ThreadPoolExecutor for uncached batches: "
-            "max_workers=%s, total_batches=%s, bs=%s, target_lang=%s",
+            "max_workers=%s, total_batches=%s, bs=%s, kind=%s, target_lang=%s",
             max_workers,
             total_batches,
             bs,
+            analysis_kind,
             target_lang,
         )
@@ -1013,7 +1387,12 @@ def analyze_products(
         future_by_batch_num: Dict[int, Any] = {}
         for batch_num, _batch_slice, batch in batch_jobs:
             future_by_batch_num[batch_num] = executor.submit(
-                process_batch, batch, batch_num=batch_num, target_lang=target_lang
+                process_batch,
+                batch,
+                batch_num=batch_num,
+                target_lang=target_lang,
+                analysis_kind=analysis_kind,
+                category_taxonomy_profile=category_taxonomy_profile,
             )
         # 按 batch_num 回填，确保输出稳定（results_by_index 是按原始 input index 映射的）
@@ -1028,7 +1407,13 @@ def analyze_products(
                     # 不缓存错误结果，避免放大临时故障
                     continue
                 try:
-                    _set_cached_anchor_result(product, target_lang, item)
+                    _set_cached_analysis_result(
+                        product,
+                        target_lang,
+                        item,
+                        analysis_kind,
+                        category_taxonomy_profile=category_taxonomy_profile,
+                    )
                 except Exception:
                     # 已在内部记录 warning
                     pass
 #!/usr/bin/env python3
-from typing import Any, Dict
+from typing import Any, Dict, Tuple
 SYSTEM_MESSAGE = (
     "You are an e-commerce product annotator. "
@@ -33,6 +33,337 @@ Input product list:
 USER_INSTRUCTION_TEMPLATE = """Please strictly return a Markdown table following the given columns in the specified language. For any column containing multiple values, separate them with commas. Do not add any other explanation.
 Language: {language}"""
+def _taxonomy_field(
+    key: str,
+    label: str,
+    description: str,
+    zh_label: str | None = None,
+) -> Dict[str, str]:
+    return {
+        "key": key,
+        "label": label,
+        "description": description,
+        "zh_label": zh_label or label,
+    }
+
+
+def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[str, str], ...]) -> str:
+    lines = [
+        f"Analyze each input product text and fill the columns below using a {profile_label} attribute taxonomy.",
+        "",
+        "Output columns:",
+    ]
+    for idx, field in enumerate(fields, start=1):
+        lines.append(f"{idx}. {field['label']}: {field['description']}")
+    lines.extend(
+        [
+            "",
+            "Rules:",
+            "- Keep the same row order and row count as input.",
+            "- Leave blank if not applicable, unmentioned, or unsupported.",
+            "- Use concise, standardized ecommerce wording.",
+            "- If multiple values, separate with commas.",
+            "",
+            "Input product list:",
+        ]
+    )
+    return "\n".join(lines)
+
+
+def _make_taxonomy_profile(
+    profile_label: str,
+    fields: Tuple[Dict[str, str], ...],
+) -> Dict[str, Any]:
+    headers = {
+        "en": ["No.", *[field["label"] for field in fields]],
+        "zh": ["序号", *[field["zh_label"] for field in fields]],
+    }
+    return {
+        "profile_label": profile_label,
+        "fields": fields,
+        "shared_instruction": _build_taxonomy_shared_instruction(profile_label, fields),
+        "markdown_table_headers": headers,
+    }
+
+
+APPAREL_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise ecommerce apparel category label, not a full marketing title", "品类"),
+    _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"),
+    _taxonomy_field("age_group", "Age Group", "only if clearly implied, e.g. adults, kids, teens, toddlers, babies", "年龄段"),
+    _taxonomy_field("season", "Season", "season(s) or all-season suitability only if supported", "适用季节"),
+    _taxonomy_field("fit", "Fit", "body closeness, e.g. slim, regular, relaxed, oversized, fitted", "版型"),
+    _taxonomy_field("silhouette", "Silhouette", "overall garment shape, e.g. straight, A-line, boxy, tapered, bodycon, wide-leg", "廓形"),
+    _taxonomy_field("neckline", "Neckline", "neckline type when applicable, e.g. crew neck, V-neck, hooded, collared, square neck", "领型"),
+    _taxonomy_field("sleeve_length_type", "Sleeve Length Type", "sleeve length only, e.g. sleeveless, short sleeve, long sleeve, three-quarter sleeve", "袖长类型"),
+    _taxonomy_field("sleeve_style", "Sleeve Style", "sleeve design only, e.g. puff sleeve, raglan sleeve, batwing sleeve, bell sleeve", "袖型"),
+    _taxonomy_field("strap_type", "Strap Type", "strap design when applicable, e.g. spaghetti strap, wide strap, halter strap, adjustable strap", "肩带设计"),
+    _taxonomy_field("rise_waistline", "Rise / Waistline", "waist placement when applicable, e.g. high rise, mid rise, low rise, empire waist", "腰型"),
+    _taxonomy_field("leg_shape", "Leg Shape", "for bottoms only, e.g. straight leg, wide leg, flare leg, tapered leg, skinny leg", "裤型"),
+    _taxonomy_field("skirt_shape", "Skirt Shape", "for skirts only, e.g. A-line, pleated, pencil, mermaid", "裙型"),
+    _taxonomy_field("length_type", "Length Type", "design length only, not size, e.g. cropped, regular, longline, mini, midi, maxi, ankle length, full length", "长度类型"),
+    _taxonomy_field("closure_type", "Closure Type", "fastening method when applicable, e.g. zipper, button, drawstring, elastic waist, hook-and-loop", "闭合方式"),
+    _taxonomy_field("design_details", "Design Details", "construction or visual details, e.g. ruched, ruffled, pleated, cut-out, layered, distressed, split hem", "设计细节"),
+    _taxonomy_field("fabric", "Fabric", "fabric type only, e.g. denim, knit, chiffon, jersey, fleece, cotton twill", "面料"),
+    _taxonomy_field("material_composition", "Material Composition", "fiber content or blend only if stated, e.g. cotton, polyester, spandex, linen blend, 95% cotton 5% elastane", "成分"),
+    _taxonomy_field("fabric_properties", "Fabric Properties", "inherent fabric traits, e.g. stretch, breathable, lightweight, soft-touch, water-resistant", "面料特性"),
+    _taxonomy_field("clothing_features", "Clothing Features", "product features, e.g. lined, reversible, hooded, packable, padded, pocketed", "服装特征"),
+    _taxonomy_field("functional_benefits", "Functional Benefits", "wearer benefits, e.g. moisture-wicking, thermal insulation, UV protection, easy care, supportive compression", "功能"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("color_family", "Color Family", "normalized broad retail color group, e.g. black, white, blue, green, red, pink, beige, brown, gray", "色系"),
+    _taxonomy_field("print_pattern", "Print / Pattern", "surface pattern when applicable, e.g. solid, striped, plaid, floral, graphic, animal print", "印花 / 图案"),
+    _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use occasion only if supported, e.g. office, casual wear, streetwear, lounge, workout, outdoor", "适用场景"),
+    _taxonomy_field("style_aesthetic", "Style Aesthetic", "overall style only if supported, e.g. minimalist, streetwear, athleisure, smart casual, romantic, playful", "风格"),
+)
+
+THREE_C_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise 3C accessory or peripheral category label", "品类"),
+    _taxonomy_field("compatible_device", "Compatible Device / Model", "supported device family, series, model, or form factor when clearly stated", "适配设备 / 型号"),
+    _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, wireless, Bluetooth, Wi-Fi, NFC, or 2.4G", "连接方式"),
+    _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant connector or port, e.g. USB-C, Lightning, HDMI, AUX, RJ45", "接口 / 端口类型"),
+    _taxonomy_field("power_charging", "Power Source / Charging", "charging or power mode, e.g. battery powered, fast charging, rechargeable, plug-in", "供电 / 充电方式"),
+    _taxonomy_field("key_features", "Key Features", "primary hardware features such as noise cancelling, foldable, magnetic, backlit, waterproof", "关键特征"),
+    _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported", "材质 / 表面处理"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("pack_size", "Pack Size", "unit count or bundle size when stated", "包装规格"),
+    _taxonomy_field("use_case", "Use Case", "intended usage such as travel, office, gaming, car, charging, streaming", "使用场景"),
+)
+
+BAGS_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise bag category such as backpack, tote bag, crossbody bag, luggage, or wallet", "品类"),
+    _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"),
+    _taxonomy_field("carry_style", "Carry Style", "how the bag is worn or carried, e.g. handheld, shoulder, crossbody, backpack", "携带方式"),
+    _taxonomy_field("size_capacity", "Size / Capacity", "size tier or capacity when supported, e.g. mini, large capacity, 20L", "尺寸 / 容量"),
+    _taxonomy_field("material", "Material", "main bag material such as leather, nylon, canvas, PU, straw", "材质"),
+    _taxonomy_field("closure_type", "Closure Type", "bag closure such as zipper, flap, buckle, drawstring, magnetic snap", "闭合方式"),
+    _taxonomy_field("structure_compartments", "Structure / Compartments", "organizational structure such as multi-pocket, laptop sleeve, card slots, expandable", "结构 / 分层"),
+    _taxonomy_field("strap_handle_type", "Strap / Handle Type", "strap or handle design such as chain strap, top handle, adjustable strap", "肩带 / 提手类型"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as commute, travel, evening, school, casual", "适用场景"),
+)
+
+PET_SUPPLIES_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise pet supplies category label", "品类"),
+    _taxonomy_field("pet_type", "Pet Type", "target pet such as dog, cat, bird, fish, hamster", "宠物类型"),
+    _taxonomy_field("breed_size", "Breed Size", "pet size or breed size when stated, e.g. small breed, large dogs", "体型 / 品种大小"),
+    _taxonomy_field("life_stage", "Life Stage", "pet age stage when supported, e.g. puppy, kitten, adult, senior", "成长阶段"),
+    _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredient composition when supported", "材质 / 成分"),
+    _taxonomy_field("flavor_scent", "Flavor / Scent", "flavor or scent when applicable", "口味 / 气味"),
+    _taxonomy_field("key_features", "Key Features", "primary attributes such as interactive, leak-proof, orthopedic, washable, elevated", "关键特征"),
+    _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as dental care, calming, digestion support, joint support", "功能"),
+    _taxonomy_field("size_capacity", "Size / Capacity", "size, count, or net content when stated", "尺寸 / 容量"),
+    _taxonomy_field("use_scenario", "Use Scenario", "usage such as feeding, training, grooming, travel, indoor play", "使用场景"),
+)
+
+ELECTRONICS_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise electronics device or component category label", "品类"),
+    _taxonomy_field("device_category", "Device Category / Compatibility", "supported platform, component class, or compatible device family when stated", "设备类别 / 兼容性"),
+    _taxonomy_field("power_voltage", "Power / Voltage", "power, voltage, wattage, or battery spec when supported", "功率 / 电压"),
+    _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, Bluetooth, Wi-Fi, RF, or smart app control", "连接方式"),
+    _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant port or interface such as USB-C, AC plug type, HDMI, SATA", "接口 / 端口类型"),
+    _taxonomy_field("capacity_storage", "Capacity / Storage", "capacity or storage spec such as 256GB, 2TB, 5000mAh", "容量 / 存储"),
+    _taxonomy_field("key_features", "Key Features", "main product features such as touch control, HD display, noise reduction, smart control", "关键特征"),
+    _taxonomy_field("material_finish", "Material / Finish", "main housing material or finish when supported", "材质 / 表面处理"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("use_case", "Use Case", "intended use such as home entertainment, office, charging, security, repair", "使用场景"),
+)
+
+OUTDOOR_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise outdoor gear category label", "品类"),
+    _taxonomy_field("activity_type", "Activity Type", "primary outdoor activity such as camping, hiking, fishing, climbing, travel", "活动类型"),
+    _taxonomy_field("season_weather", "Season / Weather", "season or weather suitability when supported", "适用季节 / 天气"),
+    _taxonomy_field("material", "Material", "main material such as aluminum, ripstop nylon, stainless steel, EVA", "材质"),
+    _taxonomy_field("capacity_size", "Capacity / Size", "size, length, or capacity when stated", "容量 / 尺寸"),
+    _taxonomy_field("protection_resistance", "Protection / Resistance", "resistance or protection such as waterproof, UV resistant, windproof", "防护 / 耐受性"),
+    _taxonomy_field("key_features", "Key Features", "primary gear attributes such as foldable, lightweight, insulated, non-slip", "关键特征"),
+    _taxonomy_field("portability_packability", "Portability / Packability", "carry or storage trait such as collapsible, compact, ultralight, packable", "便携 / 收纳性"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("use_scenario", "Use Scenario", "likely use setting such as campsite, trail, survival kit, beach, picnic", "使用场景"),
+)
+
+HOME_APPLIANCES_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise home appliance category label", "品类"),
+    _taxonomy_field("appliance_category", "Appliance Category", "functional class such as kitchen appliance, cleaning appliance, personal care appliance", "家电类别"),
+    _taxonomy_field("power_voltage", "Power / Voltage", "wattage, voltage, plug type, or power supply when supported", "功率 / 电压"),
+    _taxonomy_field("capacity_coverage", "Capacity / Coverage", "capacity or coverage metric such as 1.5L, 20L, 40sqm", "容量 / 覆盖范围"),
+    _taxonomy_field("control_method", "Control Method", "operation method such as touch, knob, remote, app control", "控制方式"),
+    _taxonomy_field("installation_type", "Installation Type", "setup style such as countertop, handheld, portable, wall-mounted, built-in", "安装方式"),
+    _taxonomy_field("key_features", "Key Features", "main product features such as timer, steam, HEPA filter, self-cleaning", "关键特征"),
+    _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported", "材质 / 表面处理"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("use_scenario", "Use Scenario", "intended use such as cooking, cleaning, grooming, cooling, air treatment", "使用场景"),
+)
+
+HOME_LIVING_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise home and living category label", "品类"),
+    _taxonomy_field("room_placement", "Room / Placement", "intended room or placement such as bedroom, kitchen, bathroom, desktop", "适用空间 / 摆放位置"),
+    _taxonomy_field("material", "Material", "main material such as wood, ceramic, cotton, glass, metal", "材质"),
+    _taxonomy_field("style", "Style", "home style such as modern, farmhouse, minimalist, boho, Nordic", "风格"),
+    _taxonomy_field("size_dimensions", "Size / Dimensions", "size or dimensions when stated", "尺寸 / 规格"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("pattern_finish", "Pattern / Finish", "surface pattern or finish such as solid, marble, matte, ribbed", "图案 / 表面处理"),
+    _taxonomy_field("key_features", "Key Features", "main product features such as stackable, washable, blackout, space-saving", "关键特征"),
+    _taxonomy_field("assembly_installation", "Assembly / Installation", "assembly or installation trait when supported", "组装 / 安装"),
+    _taxonomy_field("use_scenario", "Use Scenario", "intended use such as storage, dining, decor, sleep, organization", "使用场景"),
+)
+
+WIGS_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise wig or hairpiece category label", "品类"),
+    _taxonomy_field("hair_material", "Hair Material", "hair material such as human hair, synthetic fiber, heat-resistant fiber", "发丝材质"),
+    _taxonomy_field("hair_texture", "Hair Texture", "texture or curl pattern such as straight, body wave, curly, kinky", "发质纹理"),
+    _taxonomy_field("hair_length", "Hair Length", "hair length when stated", "发长"),
+    _taxonomy_field("hair_color", "Hair Color", "specific hair color or blend when available", "发色"),
+    _taxonomy_field("cap_construction", "Cap Construction", "cap type such as full lace, lace front, glueless, U part", "帽网结构"),
+    _taxonomy_field("lace_area_part_type", "Lace Area / Part Type", "lace size or part style such as 13x4 lace, middle part, T part", "蕾丝面积 / 分缝类型"),
+    _taxonomy_field("density_volume", "Density / Volume", "hair density or fullness when supported", "密度 / 发量"),
+    _taxonomy_field("style_bang_type", "Style / Bang Type", "style cue such as bob, pixie, layered, with bangs", "款式 / 刘海类型"),
+    _taxonomy_field("occasion_end_use", "Occasion / End Use", "intended use such as daily wear, cosplay, protective style, party", "适用场景"),
+)
+
+BEAUTY_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise beauty or cosmetics category label", "品类"),
+    _taxonomy_field("target_area", "Target Area", "target area such as face, lips, eyes, nails, hair, body", "适用部位"),
+    _taxonomy_field("skin_hair_type", "Skin Type / Hair Type", "suitable skin or hair type when supported", "肤质 / 发质"),
+    _taxonomy_field("finish_effect", "Finish / Effect", "cosmetic finish or effect such as matte, dewy, volumizing, brightening", "妆效 / 效果"),
+    _taxonomy_field("key_ingredients", "Key Ingredients", "notable ingredients when stated", "关键成分"),
+    _taxonomy_field("shade_color", "Shade / Color", "specific shade or color when available", "色号 / 颜色"),
+    _taxonomy_field("scent", "Scent", "fragrance or scent only when supported", "香味"),
+    _taxonomy_field("formulation", "Formulation", "product form such as cream, serum, powder, gel, stick", "剂型 / 形态"),
+    _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as hydration, anti-aging, long-wear, repair, sun protection", "功能"),
+    _taxonomy_field("use_scenario", "Use Scenario", "intended use such as daily routine, salon, travel, evening makeup", "使用场景"),
+)
+
+ACCESSORIES_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise accessory category label such as necklace, watch, belt, hat, or sunglasses", "品类"),
+    _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"),
+    _taxonomy_field("material", "Material", "main material such as alloy, leather, stainless steel, acetate, fabric", "材质"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("pattern_finish", "Pattern / Finish", "surface treatment or style finish such as polished, textured, braided, rhinestone", "图案 / 表面处理"),
+    _taxonomy_field("closure_fastening", "Closure / Fastening", "fastening method when applicable", "闭合 / 固定方式"),
+    _taxonomy_field("size_fit", "Size / Fit", "size or fit information such as adjustable, one size, 42mm", "尺寸 / 适配"),
+    _taxonomy_field("style", "Style", "style cue such as minimalist, vintage, statement, sporty", "风格"),
+    _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as daily wear, formal, party, travel, sun protection", "适用场景"),
+    _taxonomy_field("set_pack_size", "Set / Pack Size", "set count or pack size when stated", "套装 / 规格"),
+)
+
+TOYS_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise toy category label", "品类"),
+    _taxonomy_field("age_group", "Age Group", "intended age group when clearly implied", "年龄段"),
+    _taxonomy_field("character_theme", "Character / Theme", "licensed character, theme, or play theme when supported", "角色 / 主题"),
+    _taxonomy_field("material", "Material", "main toy material such as plush, plastic, wood, silicone", "材质"),
+    _taxonomy_field("power_source", "Power Source", "battery, rechargeable, wind-up, or non-powered when supported", "供电方式"),
+    _taxonomy_field("interactive_features", "Interactive Features", "interactive functions such as sound, lights, remote control, motion", "互动功能"),
+    _taxonomy_field("educational_play_value", "Educational / Play Value", "play value such as STEM, pretend play, sensory, puzzle solving", "教育 / 可玩性"),
+    _taxonomy_field("piece_count_size", "Piece Count / Size", "piece count or size when stated", "件数 / 尺寸"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("use_scenario", "Use Scenario", "intended use such as indoor play, bath time, party favor, outdoor play", "使用场景"),
+)
+
+SHOES_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise footwear category label", "品类"),
+    _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"),
+    _taxonomy_field("age_group", "Age Group", "only if clearly implied", "年龄段"),
+    _taxonomy_field("closure_type", "Closure Type", "fastening method such as lace-up, slip-on, buckle, hook-and-loop", "闭合方式"),
+    _taxonomy_field("toe_shape", "Toe Shape", "toe shape when applicable, e.g. round toe, pointed toe, open toe", "鞋头形状"),
+    _taxonomy_field("heel_sole_type", "Heel Height / Sole Type", "heel or sole profile such as flat, block heel, wedge, platform, thick sole", "跟高 / 鞋底类型"),
+    _taxonomy_field("upper_material", "Upper Material", "main upper material such as leather, knit, canvas, mesh", "鞋面材质"),
+    _taxonomy_field("lining_insole_material", "Lining / Insole Material", "lining or insole material when supported", "里料 / 鞋垫材质"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as running, casual, office, hiking, formal", "适用场景"),
+)
+
+SPORTS_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise sports product category label", "品类"),
+    _taxonomy_field("sport_activity", "Sport / Activity", "primary sport or activity such as fitness, yoga, basketball, cycling, swimming", "运动 / 活动"),
+    _taxonomy_field("skill_level", "Skill Level", "target user level when supported, e.g. beginner, training, professional", "适用水平"),
+    _taxonomy_field("material", "Material", "main material such as EVA, carbon fiber, neoprene, latex", "材质"),
+    _taxonomy_field("size_capacity", "Size / Capacity", "size, weight, resistance level, or capacity when stated", "尺寸 / 容量"),
+    _taxonomy_field("protection_support", "Protection / Support", "support or protection function such as ankle support, shock absorption, impact protection", "防护 / 支撑"),
+    _taxonomy_field("key_features", "Key Features", "main features such as anti-slip, adjustable, foldable, quick-dry", "关键特征"),
+    _taxonomy_field("power_source", "Power Source", "battery, electric, or non-powered when applicable", "供电方式"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("use_scenario", "Use Scenario", "intended use such as gym, home workout, field training, competition", "使用场景"),
+)
+
+OTHERS_TAXONOMY_FIELDS = (
+    _taxonomy_field("product_type", "Product Type", "concise product category label, not a full marketing title", "品类"),
+    _taxonomy_field("product_category", "Product Category", "broader retail grouping when the specific product type is narrow", "商品类别"),
+    _taxonomy_field("target_user", "Target User", "intended user, audience, or recipient when clearly implied", "适用人群"),
+    _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredients when supported", "材质 / 成分"),
+    _taxonomy_field("key_features", "Key Features", "primary product attributes or standout features", "关键特征"),
+    _taxonomy_field("functional_benefits", "Functional Benefits", "practical benefits or performance advantages when supported", "功能"),
+    _taxonomy_field("size_capacity", "Size / Capacity", "size, count, weight, or capacity when stated", "尺寸 / 容量"),
+    _taxonomy_field("color", "Color", "specific color name when available", "主颜色"),
+    _taxonomy_field("style_theme", "Style / Theme", "overall style, design theme, or visual direction when supported", "风格 / 主题"),
+    _taxonomy_field("use_scenario", "Use Scenario", "likely use occasion or application setting when supported", "使用场景"),
+)
+
+CATEGORY_TAXONOMY_PROFILES: Dict[str, Dict[str, Any]] = {
+    "apparel": _make_taxonomy_profile(
+        "apparel",
+        APPAREL_TAXONOMY_FIELDS,
+    ),
+    "3c": _make_taxonomy_profile(
+        "3C",
+        THREE_C_TAXONOMY_FIELDS,
+    ),
+    "bags": _make_taxonomy_profile(
+        "bags",
+        BAGS_TAXONOMY_FIELDS,
+    ),
+    "pet_supplies": _make_taxonomy_profile(
+        "pet supplies",
+        PET_SUPPLIES_TAXONOMY_FIELDS,
+    ),
+    "electronics": _make_taxonomy_profile(
+        "electronics",
+        ELECTRONICS_TAXONOMY_FIELDS,
+    ),
+    "outdoor": _make_taxonomy_profile(
+        "outdoor products",
+        OUTDOOR_TAXONOMY_FIELDS,
+    ),
+    "home_appliances": _make_taxonomy_profile(
+        "home appliances",
+        HOME_APPLIANCES_TAXONOMY_FIELDS,
+    ),
+    "home_living": _make_taxonomy_profile(
+        "home and living",
+        HOME_LIVING_TAXONOMY_FIELDS,
+    ),
+    "wigs": _make_taxonomy_profile(
+        "wigs",
+        WIGS_TAXONOMY_FIELDS,
+    ),
+    "beauty": _make_taxonomy_profile(
+        "beauty and cosmetics",
+        BEAUTY_TAXONOMY_FIELDS,
+    ),
+    "accessories": _make_taxonomy_profile(
+        "accessories",
+        ACCESSORIES_TAXONOMY_FIELDS,
+    ),
+    "toys": _make_taxonomy_profile(
+        "toys",
+        TOYS_TAXONOMY_FIELDS,
+    ),
+    "shoes": _make_taxonomy_profile(
+        "shoes",
+        SHOES_TAXONOMY_FIELDS,
+    ),
+    "sports": _make_taxonomy_profile(
+        "sports products",
+        SPORTS_TAXONOMY_FIELDS,
+    ),
+    "others": _make_taxonomy_profile(
+        "general merchandise",
+        OTHERS_TAXONOMY_FIELDS,
+    ),
+}
+
+TAXONOMY_SHARED_ANALYSIS_INSTRUCTION = CATEGORY_TAXONOMY_PROFILES["apparel"]["shared_instruction"]
+TAXONOMY_MARKDOWN_TABLE_HEADERS_EN = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"]["en"]
+TAXONOMY_LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"]
+
 LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
   "en": [
     "No.",
@@ -0,0 +1,173 @@
+# 内容富化模块说明
+
+本文说明商品内容富化模块的职责、入口、输出结构，以及当前 taxonomy profile 的设计约束。
+
+## 1. 模块目标
+
+内容富化模块负责基于商品文本调用 LLM，生成以下索引字段：
+
+- `qanchors`
+- `enriched_tags`
+- `enriched_attributes`
+- `enriched_taxonomy_attributes`
+
+模块追求的设计原则：
+
+- 单一职责：只负责内容理解与结构化输出，不负责 CSV 读写
+- 输出对齐 ES mapping：返回结构可直接写入 `search_products`
+- 配置化扩展：taxonomy profile 通过数据配置扩展，而不是散落条件分支
+- 代码精简：只面向正常使用方式，避免为了不合理调用堆叠补丁逻辑
+
+## 2. 主要文件
+
+- [product_enrich.py](/data/saas-search/indexer/product_enrich.py)
+  运行时主逻辑，负责批处理、缓存、prompt 组装、LLM 调用、markdown 解析、输出整理
+- [product_enrich_prompts.py](/data/saas-search/indexer/product_enrich_prompts.py)
+  prompt 模板与 taxonomy profile 配置
+- [document_transformer.py](/data/saas-search/indexer/document_transformer.py)
+  在内部索引构建链路中调用内容富化模块，把结果回填到 ES doc
+- [taxonomy.md](/data/saas-search/indexer/taxonomy.md)
+  taxonomy 设计说明与字段清单
+
+## 3. 对外入口
+
+### 3.1 Python 入口
+
+核心入口：
+
+```python
+build_index_content_fields(
+    items,
+    tenant_id=None,
+    enrichment_scopes=None,
+    category_taxonomy_profile=None,
+)
+```
+
+输入最小要求：
+
+- `id` 或 `spu_id`
+- `title`
+
+可选输入：
+
+- `brief`
+- `description`
+- `image_url`
+
+关键参数：
+
+- `enrichment_scopes`
+  可选 `generic`、`category_taxonomy`
+- `category_taxonomy_profile`
+  taxonomy profile；默认 `apparel`
+
+### 3.2 HTTP 入口
+
+API 路由：
+
+- `POST /indexer/enrich-content`
+
+对应文档：
+
+- [搜索API对接指南-05-索引接口（Indexer）](/data/saas-search/docs/搜索API对接指南-05-索引接口（Indexer）.md)
+- [搜索API对接指南-07-微服务接口（Embedding-Reranker-Translation）](/data/saas-search/docs/搜索API对接指南-07-微服务接口（Embedding-Reranker-Translation）.md)
+
+## 4. 输出结构
+
+返回结果与 ES mapping 对齐：
+
+```json
+{
+  "id": "223167",
+  "qanchors": {
+    "zh": ["短袖T恤", "纯棉"],
+    "en": ["t-shirt", "cotton"]
+  },
+  "enriched_tags": {
+    "zh": ["短袖", "纯棉"],
+    "en": ["short sleeve", "cotton"]
+  },
+  "enriched_attributes": [
+    {
+      "name": "enriched_tags",
+      "value": {
+        "zh": ["短袖", "纯棉"],
+        "en": ["short sleeve", "cotton"]
+      }
+    }
+  ],
+  "enriched_taxonomy_attributes": [
+    {
+      "name": "Product Type",
+      "value": {
+        "zh": ["T恤"],
+        "en": ["t-shirt"]
+      }
+    }
+  ]
+}
+```
+
+说明：
+
+- `generic` 部分固定输出核心索引语言 `zh`、`en`
+- `taxonomy` 部分同样统一输出 `zh`、`en`
+
+## 5. Taxonomy profile
+
+当前支持：
+
+- `apparel`
+- `3c`
+- `bags`
+- `pet_supplies`
+- `electronics`
+- `outdoor`
+- `home_appliances`
+- `home_living`
+- `wigs`
+- `beauty`
+- `accessories`
+- `toys`
+- `shoes`
+- `sports`
+- `others`
+
+统一约束：
+
+- 所有 profile 都返回 `zh` + `en`
+- profile 只决定 taxonomy 字段集合，不再决定输出语言
+- 所有 profile 都配置中英文字段名，prompt/header 结构保持一致
+
+## 6. 内部索引链路的当前约束
+
+在内部 ES 文档构建链路里，`document_transformer` 当前调用内容富化时，taxonomy profile 暂时固定传：
+
+```python
+category_taxonomy_profile="apparel"
+```
+
+这是一种显式、可控、代码更干净的临时策略。
+
+当前代码里已保留 TODO：
+
+- 后续从数据库读取租户真实所属行业
+- 再用该行业替换固定的 `apparel`
+
+当前不做“根据商品类目文本自动猜 profile”的隐式逻辑，避免增加冗余代码与不必要的不确定性。
+
+## 7. 缓存与批处理
+
+缓存键由以下信息共同决定：
+
+- `analysis_kind`
+- `target_lang`
+- prompt/schema 版本指纹
+- prompt 实际输入文本
+
+批处理规则：
+
+- 单次 LLM 调用最多 20 条
+- 上层允许传更大批次，模块内部自动拆批
+- uncached batch 可并发执行
@@ -0,0 +1,196 @@
+
+# Cross-Border E-commerce Core Categories 大类
+
+## 1. 3C
+Phone accessories, computer peripherals, smart wearables, audio & video, smart home, gaming gear. 手机配件、电脑周边、智能穿戴、影音娱乐、智能家居、游戏设备。
+
+## 2. Bags 包
+Handbags, backpacks, wallets, luggage, crossbody bags, tote bags. 手提包、双肩包、钱包、行李箱、斜挎包、托特包。
+
+## 3. Pet Supplies 宠物用品
+Pet food, pet toys, pet care products, pet grooming, pet clothing, smart pet devices. 宠物食品、宠物玩具、宠物护理用品、宠物美容、宠物服装、智能宠物设备。
+
+## 4. Electronics 电子产品
+Consumer electronics, home appliances, digital devices, cables & chargers, batteries, electronic components. 消费电子产品、家用电器、数码设备、线材充电器、电池、电子元器件。
+
+## 5. Clothing 服装
+Women's wear, men's wear, kid's wear, underwear, outerwear, activewear. 女装、男装、童装、内衣、外套、运动服装。
+
+## 6. Outdoor 户外用品
+Camping gear, hiking equipment, fishing supplies, outdoor clothing, travel accessories, survival tools. 露营装备、徒步用品、渔具、户外服装、旅行配件、求生工具。
+
+## 7. Home Appliances 家电/电器
+Kitchen appliances, cleaning appliances, personal care appliances, heating & cooling, smart home devices. 厨房电器、清洁电器、个护电器、冷暖设备、智能家居设备。
+
+## 8. Home & Living 家居
+Furniture, home textiles, lighting, kitchenware, storage, home decor. 家具、家纺、灯具、厨具、收纳、家居装饰。
+
+## 9. Wigs 假发
+
+## 10. Beauty & Cosmetics 美容美妆
+Skincare, makeup, nail care, beauty tools, hair care, fragrances. 护肤品、彩妆、美甲、美容工具、护发、香水。
+
+## 11. Accessories 配饰
+Jewelry, watches, belts, scarves, hats, sunglasses, hair accessories. 珠宝、手表、腰带、围巾、帽子、太阳镜、发饰。
+
+## 12. Toys 玩具
+Educational toys, plush toys, action figures, puzzles, outdoor toys, DIY toys. 益智玩具、毛绒玩具、可动人偶、拼图、户外玩具、DIY玩具。
+
+## 13. Shoes 鞋子
+Sneakers, boots, sandals, heels, flats, sports shoes. 运动鞋、靴子、凉鞋、高跟鞋、平底鞋、球鞋。
+
+## 14. Sports 运动产品
+Fitness equipment, sports gear, team sports, racquet sports, water sports, cycling. 健身器材、运动装备、团队运动、球拍运动、水上运动、骑行。
+
+## 15. Others 其他
+
+# 各个大类的taxonomy
+## 1. Clothing & Apparel 服装
+
+### A. Product Classification
+
+| 一级层级                      | 中文列名 | English Column Name |
+| ------------------------- | ---- | ------------------- |
+| A. Product Classification | 品类   | Product Type        |
+| A. Product Classification | 目标性别 | Target Gender       |
+| A. Product Classification | 年龄段  | Age Group           |
+| A. Product Classification | 适用季节 | Season              |
+
+### B. Garment Design
+
+| 一级层级              | 中文列名 | English Column Name |
+| ----------------- | ---- | ------------------- |
+| B. Garment Design | 版型   | Fit                 |
+| B. Garment Design | 廓形   | Silhouette          |
+| B. Garment Design | 领型   | Neckline            |
+| B. Garment Design | 袖型   | Sleeve Style        |
+| B. Garment Design | 肩带设计 | Strap Type          |
+| B. Garment Design | 腰型   | Rise / Waistline    |
+| B. Garment Design | 裤型   | Leg Shape           |
+| B. Garment Design | 裙型   | Skirt Shape         |
+| B. Garment Design | 长度   | Length Type         |
+| B. Garment Design | 闭合方式 | Closure Type        |
+| B. Garment Design | 设计细节 | Design Details      |
+
+### C. Material & Performance
+
+| 一级层级                      | 中文列名        | English Column Name  |
+| ------------------------- | ----------- | -------------------- |
+| C. Material & Performance | 面料          | Fabric               |
+| C. Material & Performance | 成分          | Material Composition |
+| C. Material & Performance | 面料特性        | Fabric Properties    |
+| C. Material & Performance | 服装特征 / 功能细节 | Clothing Features    |
+| C. Material & Performance | 功能          | Functional Benefits  |
+
+### D. Merchandising Attributes
+
+| 一级层级                        | 中文列名    | English Column Name |
+| --------------------------- | ------- | ------------------- |
+| D. Merchandising Attributes | 主颜色     | Color               |
+| D. Merchandising Attributes | 色系      | Color Family        |
+| D. Merchandising Attributes | 印花 / 图案 | Print / Pattern     |
+| D. Merchandising Attributes | 适用场景    | Occasion / End Use  |
+| D. Merchandising Attributes | 风格      | Style Aesthetic     |
+
+
+
+根据这个产生
+enriched_taxonomy_attributes
+
+```python
+Product Type
+Target Gender
+Age Group
+Season
+Fit
+Silhouette
+Neckline
+Sleeve Length Type
+Sleeve Style
+Strap Type
+Rise / Waistline
+Leg Shape
+Skirt Shape
+Length Type
+Closure Type
+Design Details
+Fabric
+Material Composition
+Fabric Properties
+Clothing Features
+Functional Benefits
+Color
+Color Family
+Print / Pattern
+Occasion / End Use
+Style Aesthetic
+```
+
+提示词：
+
+```python
+SHARED_ANALYSIS_INSTRUCTION = """
+Analyze each input product text and fill the columns below using an apparel attribute taxonomy.
+
+Output columns:
+1. Product Type: concise ecommerce apparel category label, not a full marketing title
+2. Target Gender: intended gender only if clearly implied
+3. Age Group: only if clearly implied, e.g. adults, kids, teens, toddlers, babies
+4. Season: season(s) or all-season suitability only if supported
+5. Fit: body closeness, e.g. slim, regular, relaxed, oversized, fitted
+6. Silhouette: overall garment shape, e.g. straight, A-line, boxy, tapered, bodycon, wide-leg
+7. Neckline: neckline type when applicable, e.g. crew neck, V-neck, hooded, collared, square neck
+8. Sleeve Length Type: sleeve length only, e.g. sleeveless, short sleeve, long sleeve, three-quarter sleeve
+9. Sleeve Style: sleeve design only, e.g. puff sleeve, raglan sleeve, batwing sleeve, bell sleeve
+10. Strap Type: strap design when applicable, e.g. spaghetti strap, wide strap, halter strap, adjustable strap
+11. Rise / Waistline: waist placement when applicable, e.g. high rise, mid rise, low rise, empire waist
+12. Leg Shape: for bottoms only, e.g. straight leg, wide leg, flare leg, tapered leg, skinny leg
+13. Skirt Shape: for skirts only, e.g. A-line, pleated, pencil, mermaid
+14. Length Type: design length only, not size, e.g. cropped, regular, longline, mini, midi, maxi, ankle length, full length
+15. Closure Type: fastening method when applicable, e.g. zipper, button, drawstring, elastic waist, hook-and-loop
+16. Design Details: construction or visual details, e.g. ruched, ruffled, pleated, cut-out, layered, distressed, split hem
+17. Fabric: fabric type only, e.g. denim, knit, chiffon, jersey, fleece, cotton twill
+18. Material Composition: fiber content or blend only if stated, e.g. cotton, polyester, spandex, linen blend, 95% cotton 5% elastane
+19. Fabric Properties: inherent fabric traits, e.g. stretch, breathable, lightweight, soft-touch, water-resistant
+20. Clothing Features: product features, e.g. lined, reversible, hooded, packable, padded, pocketed
+21. Functional Benefits: wearer benefits, e.g. moisture-wicking, thermal insulation, UV protection, easy care, supportive compression
+22. Color: specific color name when available
+23. Color Family: normalized broad retail color group, e.g. black, white, blue, green, red, pink, beige, brown, gray
+24. Print / Pattern: surface pattern when applicable, e.g. solid, striped, plaid, floral, graphic, animal print
+25. Occasion / End Use: likely use occasion only if supported, e.g. office, casual wear, streetwear, lounge, workout, outdoor
+26. Style Aesthetic: overall style only if supported, e.g. minimalist, streetwear, athleisure, smart casual, romantic, playful
+
+Rules:
+- Keep the same row order and row count as input.
+- Infer only from the provided product text.
+- Leave blank if not applicable or not reasonably supported.
+- Use concise, standardized English ecommerce wording.
+- Do not combine different attribute dimensions in one field.
+- If multiple values are needed, use the delimiter required by the localization setting.
+
+Input product list:
+"""
+```
+
+## 2. Other taxonomy profiles
+
+说明：
+- 所有 profile 统一返回 `zh` + `en`。
+- 代码中的 profile slug 与下面保持一致。
+
+| Profile | Core columns (`en`) |
+| --- | --- |
+| `3c` | Product Type, Compatible Device / Model, Connectivity, Interface / Port Type, Power Source / Charging, Key Features, Material / Finish, Color, Pack Size, Use Case |
+| `bags` | Product Type, Target Gender, Carry Style, Size / Capacity, Material, Closure Type, Structure / Compartments, Strap / Handle Type, Color, Occasion / End Use |
+| `pet_supplies` | Product Type, Pet Type, Breed Size, Life Stage, Material / Ingredients, Flavor / Scent, Key Features, Functional Benefits, Size / Capacity, Use Scenario |
+| `electronics` | Product Type, Device Category / Compatibility, Power / Voltage, Connectivity, Interface / Port Type, Capacity / Storage, Key Features, Material / Finish, Color, Use Case |
+| `outdoor` | Product Type, Activity Type, Season / Weather, Material, Capacity / Size, Protection / Resistance, Key Features, Portability / Packability, Color, Use Scenario |
+| `home_appliances` | Product Type, Appliance Category, Power / Voltage, Capacity / Coverage, Control Method, Installation Type, Key Features, Material / Finish, Color, Use Scenario |
+| `home_living` | Product Type, Room / Placement, Material, Style, Size / Dimensions, Color, Pattern / Finish, Key Features, Assembly / Installation, Use Scenario |
+| `wigs` | Product Type, Hair Material, Hair Texture, Hair Length, Hair Color, Cap Construction, Lace Area / Part Type, Density / Volume, Style / Bang Type, Occasion / End Use |
+| `beauty` | Product Type, Target Area, Skin Type / Hair Type, Finish / Effect, Key Ingredients, Shade / Color, Scent, Formulation, Functional Benefits, Use Scenario |
+| `accessories` | Product Type, Target Gender, Material, Color, Pattern / Finish, Closure / Fastening, Size / Fit, Style, Occasion / End Use, Set / Pack Size |
+| `toys` | Product Type, Age Group, Character / Theme, Material, Power Source, Interactive Features, Educational / Play Value, Piece Count / Size, Color, Use Scenario |
+| `shoes` | Product Type, Target Gender, Age Group, Closure Type, Toe Shape, Heel Height / Sole Type, Upper Material, Lining / Insole Material, Color, Occasion / End Use |
+| `sports` | Product Type, Sport / Activity, Skill Level, Material, Size / Capacity, Protection / Support, Key Features, Power Source, Color, Use Scenario |
+| `others` | Product Type, Product Category, Target User, Material / Ingredients, Key Features, Functional Benefits, Size / Capacity, Color, Style / Theme, Use Scenario |
@@ -68,6 +68,7 @@
 - `option2_values`
 - `option3_values`
 - `enriched_attributes.value`
+- `enriched_taxonomy_attributes.value`
 - `specifications.value_text`
 以 `category_path` 和 `option*_values` 为例，核心语言灌入结果应至少包含：
@@ -214,6 +214,11 @@ FIELD_SPECS = [
         scalar_field("name", "keyword"),
         text_field("value", "core_language_text_with_keyword"),
     ),
+    nested_field(
+        "enriched_taxonomy_attributes",
+        scalar_field("name", "keyword"),
+        text_field("value", "core_language_text_with_keyword"),
+    ),
     scalar_field("option1_name", "keyword"),
     scalar_field("option2_name", "keyword"),
     scalar_field("option3_name", "keyword"),
@@ -2116,6 +2116,40 @@
           }
         }
       },
+      "enriched_taxonomy_attributes": {
+        "type": "nested",
+        "properties": {
+          "name": {
+            "type": "keyword"
+          },
+          "value": {
+            "type": "object",
+            "properties": {
+              "zh": {
+                "type": "text",
+                "analyzer": "index_ik",
+                "search_analyzer": "query_ik",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              },
+              "en": {
+                "type": "text",
+                "analyzer": "english",
+                "fields": {
+                  "keyword": {
+                    "type": "keyword",
+                    "normalizer": "lowercase"
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
       "option1_name": {
         "type": "keyword"
       },
@@ -34,5 +34,5 @@ Workload profile:
 ## Reproduce
 ```bash
-./scripts/benchmark_reranker_1000docs.sh
+./benchmarks/reranker/benchmark_reranker_1000docs.sh
 ```
 # Local Translation Model Benchmark Report
-Test script: [`scripts/benchmark_translation_local_models.py`](/data/saas-search/scripts/benchmark_translation_local_models.py)
+Test script: [`benchmarks/translation/benchmark_translation_local_models.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models.py)
 Test time: `2026-03-17`
@@ -67,7 +67,7 @@ To model online search query translation, we reran NLLB with `batch_size=1`. In 
 Command used:
 ```bash
-./.venv-translator/bin/python scripts/benchmark_translation_local_models.py \
+./.venv-translator/bin/python benchmarks/translation/benchmark_translation_local_models.py \
   --single \
   --model nllb-200-distilled-600m \
   --source-lang zh \
 # NLLB T4 Product-Name Tuning Summary
 测试脚本：
-- [`scripts/benchmark_nllb_t4_tuning.py`](/data/saas-search/scripts/benchmark_nllb_t4_tuning.py)
+- [`benchmarks/translation/benchmark_nllb_t4_tuning.py`](/data/saas-search/benchmarks/translation/benchmark_nllb_t4_tuning.py)
 本轮报告：
 - Markdown：[`nllb_t4_tuning_003608.md`](/data/saas-search/perf_reports/20260318/nllb_t4_product_names_ct2/nllb_t4_tuning_003608.md)
 # Local Translation Model Benchmark Report
 测试脚本：
-- [`scripts/benchmark_translation_local_models.py`](/data/saas-search/scripts/benchmark_translation_local_models.py)
+- [`benchmarks/translation/benchmark_translation_local_models.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models.py)
 完整结果：
 - Markdown：[`translation_local_models_extended_221846.md`](/data/saas-search/perf_reports/20260318/translation_local_models/translation_local_models_extended_221846.md)
@@ -39,7 +39,7 @@
 ```bash
 cd /data/saas-search
-./.venv-translator/bin/python scripts/benchmark_translation_local_models.py \
+./.venv-translator/bin/python benchmarks/translation/benchmark_translation_local_models.py \
   --suite extended \
   --disable-cache \
   --serial-items-per-case 256 \
 # Local Translation Model Benchmark Report (CTranslate2)
 测试脚本：
-- [`scripts/benchmark_translation_local_models.py`](/data/saas-search/scripts/benchmark_translation_local_models.py)
+- [`benchmarks/translation/benchmark_translation_local_models.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models.py)
 本轮 CT2 结果：
 - Markdown：[`translation_local_models_ct2_extended_233253.md`](/data/saas-search/perf_reports/20260318/translation_local_models_ct2/translation_local_models_ct2_extended_233253.md)
@@ -46,7 +46,7 @@ from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from scripts.benchmark_translation_local_models import (
+from benchmarks.translation.benchmark_translation_local_models import (
     SCENARIOS,
     benchmark_extended_scenario,
     build_environment_info,
 # Local Translation Model Focused T4 Tuning
 测试脚本：
-- [`scripts/benchmark_translation_local_models_focus.py`](/data/saas-search/scripts/benchmark_translation_local_models_focus.py)
+- [`benchmarks/translation/benchmark_translation_local_models_focus.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models_focus.py)
 本轮聚焦结果：
 - Markdown：[`translation_local_models_focus_235018.md`](/data/saas-search/perf_reports/20260318/translation_local_models_ct2_focus/translation_local_models_focus_235018.md)
@@ -4,7 +4,7 @@
 | 脚本 | 用途 |
 |------|------|
-| `scripts/perf_api_benchmark.py` | 搜索后端、向量、翻译、重排等 HTTP 接口压测；支持 `--embed-text-priority` / `--embed-image-priority` 与 `scripts/perf_cases.json.example` |
+| `benchmarks/perf_api_benchmark.py` | 搜索后端、向量、翻译、重排等 HTTP 接口压测；支持 `--embed-text-priority` / `--embed-image-priority` 与 `benchmarks/perf_cases.json.example` |
 历史矩阵示例（并发扫描）：
@@ -25,10 +25,10 @@
 ```bash
 source activate.sh
-python scripts/perf_api_benchmark.py --scenario embed_text --duration 8 --concurrency 10 --timeout 30 --output perf_reports/2026-03-20_embed_text_p0.json
-python scripts/perf_api_benchmark.py --scenario embed_text --duration 8 --concurrency 10 --embed-text-priority 1 --output perf_reports/2026-03-20_embed_text_p1.json
-python scripts/perf_api_benchmark.py --scenario embed_image --duration 8 --concurrency 5 --timeout 60 --output perf_reports/2026-03-20_embed_image_p0.json
-python scripts/perf_api_benchmark.py --scenario embed_image --duration 8 --concurrency 5 --embed-image-priority 1 --output perf_reports/2026-03-20_embed_image_p1.json
+python benchmarks/perf_api_benchmark.py --scenario embed_text --duration 8 --concurrency 10 --timeout 30 --output perf_reports/2026-03-20_embed_text_p0.json
+python benchmarks/perf_api_benchmark.py --scenario embed_text --duration 8 --concurrency 10 --embed-text-priority 1 --output perf_reports/2026-03-20_embed_text_p1.json
+python benchmarks/perf_api_benchmark.py --scenario embed_image --duration 8 --concurrency 5 --timeout 60 --output perf_reports/2026-03-20_embed_image_p0.json
+python benchmarks/perf_api_benchmark.py --scenario embed_image --duration 8 --concurrency 5 --embed-image-priority 1 --output perf_reports/2026-03-20_embed_image_p1.json
 ```
 说明：本次为 **8 秒 smoke**，与 `2026-03-12` 矩阵的时长/并发不可直接横向对比；仅验证 `priority` 参数下服务仍返回 200 且 payload 校验通过。
@@ -25,7 +25,7 @@ Shared across both backends for this run:
 ## Methodology
-- Script: `python scripts/benchmark_reranker_random_titles.py 100,200,400,600,800,1000 --repeat 5` with **`--seed 99`** (see note below), **`--quiet-runs`**, **`--timeout 360`**.
+- Script: `python benchmarks/reranker/benchmark_reranker_random_titles.py 100,200,400,600,800,1000 --repeat 5` with **`--seed 99`** (see note below), **`--quiet-runs`**, **`--timeout 360`**.
 - Titles: default file `/home/ubuntu/rerank_test/titles.1.8w` (one title per line).
 - Query: default `健身女生T恤短袖`.
 - Each scenario: **3 warm-up** requests at `n=400` (not timed), then **5 timed** runs per `n`.
@@ -56,9 +56,9 @@ JSON aggregates (means, stdev, raw `values_ms`): same directory, `qwen3_vllm_{co
 ## Tooling added / changed
 - `reranker/server.py`: `/health` includes `instruction_format` when the active backend sets `_instruction_format`.
-- `scripts/benchmark_reranker_random_titles.py`: `--tag`, `--json-summary-out`, `--quiet-runs`.
-- `scripts/patch_rerank_vllm_benchmark_config.py`: surgical YAML patch (preserves newlines).
-- `scripts/run_reranker_vllm_instruction_benchmark.sh`: full matrix driver (continues if a benchmark exits non-zero; uses `--timeout 360`).
+- `benchmarks/reranker/benchmark_reranker_random_titles.py`: `--tag`, `--json-summary-out`, `--quiet-runs`.
+- `benchmarks/reranker/patch_rerank_vllm_benchmark_config.py`: surgical YAML patch (preserves newlines).
+- `benchmarks/reranker/run_reranker_vllm_instruction_benchmark.sh`: full matrix driver (continues if a benchmark exits non-zero; uses `--timeout 360`).
 ---
@@ -73,7 +73,7 @@ JSON aggregates (means, stdev, raw `values_ms`): same directory, `qwen3_vllm_{co
 | Attention | Backend forced / steered attention on T4 (e.g. `TRITON_ATTN` path) | **No** `attention_config` in `LLM(...)`; vLLM **auto** — on this T4 run, logs show **`FLASHINFER`** |
 | Config surface | `vllm_attention_backend` / `RERANK_VLLM_ATTENTION_BACKEND` 等 | **Removed**（少 YAML/环境变量分支，逻辑收敛） |
 | Code default `instruction_format` | `qwen3_vllm_score` 默认 `standard` | 与 `qwen3_vllm` 对齐为 **`compact`**（仍可在 YAML 写 `standard`） |
-| Smoke / 启动 | — | `scripts/smoke_qwen3_vllm_score_backend.py`；`scripts/start_reranker.sh` 将 **venv `bin` 置于 `PATH`**（FLASHINFER JIT 依赖 venv 内的 `ninja`） |
+| Smoke / 启动 | — | `benchmarks/reranker/smoke_qwen3_vllm_score_backend.py`；`scripts/start_reranker.sh` 将 **venv `bin` 置于 `PATH`**（FLASHINFER JIT 依赖 venv 内的 `ninja`） |
 Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n=400** docs on `LLM.score()` steady state (~**28%**), after removing the forced attention path and letting vLLM pick **FLASHINFER**.
@@ -13,7 +13,8 @@ httpx&gt;=0.24.0
 tqdm>=4.65.0
 torch>=2.0.0
-transformers>=4.30.0
+# Keep translator conversions on the last verified NLLB-compatible release line.
+transformers>=4.51.0,<4.52.0
 ctranslate2>=4.7.0
 sentencepiece>=0.2.0
 sacremoses>=0.1.1
@@ -109,7 +109,7 @@ curl -sS http://127.0.0.1:6007/health
 ### 5.1 使用一键压测脚本
 ```bash
-./scripts/benchmark_reranker_1000docs.sh
+./benchmarks/reranker/benchmark_reranker_1000docs.sh
 ```
 输出目录：
@@ -144,7 +144,7 @@ qwen3_gguf_06b:
 ```bash
 PYTHONPATH=/data/saas-search ./.venv-reranker-gguf/bin/python \
-  scripts/benchmark_reranker_gguf_local.py --backend-name qwen3_gguf_06b --docs 400
+  benchmarks/reranker/benchmark_reranker_gguf_local.py --backend-name qwen3_gguf_06b --docs 400
 ```
 按服务方式启动：
@@ -117,7 +117,7 @@ HF_HUB_DISABLE_XET=1
 ```bash
 PYTHONPATH=/data/saas-search ./.venv-reranker-gguf/bin/python \
-  scripts/benchmark_reranker_gguf_local.py --docs 64 --repeat 1
+  benchmarks/reranker/benchmark_reranker_gguf_local.py --docs 64 --repeat 1
 ```
 它会直接实例化 GGUF backend，输出：
@@ -134,7 +134,7 @@ PYTHONPATH=/data/saas-search ./.venv-reranker-gguf/bin/python \
 - Query: `白色oversized T-shirt`
 - Docs: `64` 条商品标题
-- 本地脚本：`scripts/benchmark_reranker_gguf_local.py`
+- 本地脚本：`benchmarks/reranker/benchmark_reranker_gguf_local.py`
 - 每组 1 次，重点比较相对趋势
 结果：
@@ -195,7 +195,7 @@ n_gpu_layers=999
 ```bash
 RERANK_BASE=http://127.0.0.1:6007 \
-  ./.venv/bin/python scripts/benchmark_reranker_random_titles.py 64 --repeat 1 --query '白色oversized T-shirt'
+  ./.venv/bin/python benchmarks/reranker/benchmark_reranker_random_titles.py 64 --repeat 1 --query '白色oversized T-shirt'
 ```
 得到：
@@ -206,7 +206,7 @@ RERANK_BASE=http://127.0.0.1:6007 \
 ```bash
 RERANK_BASE=http://127.0.0.1:6007 \
-  ./.venv/bin/python scripts/benchmark_reranker_random_titles.py 153 --repeat 1 --query '白色oversized T-shirt'
+  ./.venv/bin/python benchmarks/reranker/benchmark_reranker_random_titles.py 153 --repeat 1 --query '白色oversized T-shirt'
 ```
 得到：
@@ -276,5 +276,5 @@ offload_kqv: true
 - `config/config.yaml`
 - `scripts/setup_reranker_venv.sh`
 - `scripts/start_reranker.sh`
-- `scripts/benchmark_reranker_gguf_local.py`
+- `benchmarks/reranker/benchmark_reranker_gguf_local.py`
 - `reranker/GGUF_INSTALL_AND_TUNING.md`
@@ -46,9 +46,9 @@ Reranker 服务提供统一的 `/rerank` API，支持可插拔后端（BGE、Jin
   - `backends/dashscope_rerank.py`：DashScope 云端重排后端
 - `scripts/setup_reranker_venv.sh`：按后端创建独立 venv
 - `scripts/start_reranker.sh`：启动 reranker 服务
-- `scripts/smoke_qwen3_vllm_score_backend.py`：`qwen3_vllm_score` 本地 smoke
-- `scripts/benchmark_reranker_random_titles.py`：随机标题压测脚本
-- `scripts/run_reranker_vllm_instruction_benchmark.sh`：历史矩阵脚本
+- `benchmarks/reranker/smoke_qwen3_vllm_score_backend.py`：`qwen3_vllm_score` 本地 smoke
+- `benchmarks/reranker/benchmark_reranker_random_titles.py`：随机标题压测脚本
+- `benchmarks/reranker/run_reranker_vllm_instruction_benchmark.sh`：历史矩阵脚本
 ## 环境基线
@@ -118,7 +118,7 @@ nvidia-smi
 ### 4. Smoke
 ```bash
-PYTHONPATH=. ./.venv-reranker-score/bin/python scripts/smoke_qwen3_vllm_score_backend.py --gpu-memory-utilization 0.2
+PYTHONPATH=. ./.venv-reranker-score/bin/python benchmarks/reranker/smoke_qwen3_vllm_score_backend.py --gpu-memory-utilization 0.2
 ```
 ## `jina_reranker_v3`
@@ -0,0 +1,59 @@
+# Scripts
+
+`scripts/` 现在只保留当前架构下仍然有效的运行、运维、环境和数据处理脚本，并按职责拆到稳定子目录，避免继续在根目录平铺。
+
+## 当前分类
+
+- 服务编排
+  - `service_ctl.sh`
+  - `start_backend.sh`
+  - `start_indexer.sh`
+  - `start_frontend.sh`
+  - `start_eval_web.sh`
+  - `start_embedding_service.sh`
+  - `start_embedding_text_service.sh`
+  - `start_embedding_image_service.sh`
+  - `start_reranker.sh`
+  - `start_translator.sh`
+  - `start_tei_service.sh`
+  - `start_cnclip_service.sh`
+  - `stop.sh`
+  - `stop_tei_service.sh`
+  - `stop_cnclip_service.sh`
+  - `frontend/`
+  - `ops/`
+
+- 环境初始化
+  - `create_venv.sh`
+  - `init_env.sh`
+  - `setup_embedding_venv.sh`
+  - `setup_reranker_venv.sh`
+  - `setup_translator_venv.sh`
+  - `setup_cnclip_venv.sh`
+
+- 数据与索引
+  - `create_tenant_index.sh`
+  - `build_suggestions.sh`
+  - `mock_data.sh`
+  - `data_import/`
+  - `inspect/`
+  - `maintenance/`
+
+- 评估与专项工具
+  - `evaluation/`
+  - `redis/`
+  - `debug/`
+  - `translation/`
+
+## 已迁移
+
+- 基准压测与 smoke 脚本：迁到 `benchmarks/`
+- 手工接口试跑脚本：迁到 `tests/manual/`
+
+## 已清理
+
+- 历史备份目录：`indexer__old_2025_11/`
+- 过时壳脚本：`start.sh`
+- Conda 时代残留：`install_server_deps.sh`
+
+后续如果新增脚本，优先放到明确子目录，不再把 benchmark、manual、历史备份直接丢回根 `scripts/`。
@@ -0,0 +1,13 @@
+# Data Import Scripts
+
+这一组脚本用于把外部商品数据或 CSV/XLSX 样本转换为 Shoplazza 导入格式。
+
+- `amazon_xlsx_to_shoplazza_xlsx.py`
+- `competitor_xlsx_to_shoplazza_xlsx.py`
+- `csv_to_excel.py`
+- `csv_to_excel_multi_variant.py`
+- `shoplazza_excel_template.py`
+- `shoplazza_import_template.py`
+- `tenant3_csv_to_shoplazza_xlsx.sh`
+
+这里是离线数据转换工具，不属于线上服务运维入口。
@@ -35,9 +35,10 @@ from pathlib import Path
 from openpyxl import load_workbook
-# Allow running as `python scripts/xxx.py` without installing as a package
-sys.path.insert(0, str(Path(__file__).resolve().parent))
-from shoplazza_excel_template import create_excel_from_template_fast
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.data_import.shoplazza_excel_template import create_excel_from_template_fast
 PREFERRED_OPTION_KEYS = [
@@ -612,4 +613,3 @@ def main():
 if __name__ == "__main__":
     main()
-
@@ -6,7 +6,7 @@ The input `data/mai_jia_jing_ling/products_data/*.xlsx` files are Amazon-format 
 (Parent/Child ASIN), not “competitor data”.
 Please use:
-  - `scripts/amazon_xlsx_to_shoplazza_xlsx.py`
+  - `scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py`
 This wrapper simply forwards all CLI args to the correctly named script, so you
 automatically get the latest performance improvements (fast read/write).
@@ -15,13 +15,12 @@ automatically get the latest performance improvements (fast read/write).
 import sys
 from pathlib import Path
-# Allow running as `python scripts/xxx.py` without installing as a package
-sys.path.insert(0, str(Path(__file__).resolve().parent))
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
-from amazon_xlsx_to_shoplazza_xlsx import main as amazon_main
+from scripts.data_import.amazon_xlsx_to_shoplazza_xlsx import main as amazon_main
 if __name__ == "__main__":
     amazon_main()
-
@@ -22,12 +22,12 @@ from openpyxl import load_workbook
 from openpyxl.styles import Font, Alignment
 from openpyxl.utils import get_column_letter
-# Shared helpers (keeps template writing consistent across scripts)
-from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
-from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
-# Add parent directory to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
+# Shared helpers (keeps template writing consistent across scripts)
+from scripts.data_import.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
+from scripts.data_import.shoplazza_import_template import generate_handle as _generate_handle_shared
 def clean_value(value):
@@ -299,4 +299,3 @@ def main():
 if __name__ == '__main__':
     main()
-
@@ -22,12 +22,12 @@ import itertools
 from openpyxl import load_workbook
 from openpyxl.styles import Alignment
-# Shared helpers (keeps template writing consistent across scripts)
-from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
-from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
-# Add parent directory to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
+# Shared helpers (keeps template writing consistent across scripts)
+from scripts.data_import.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
+from scripts.data_import.shoplazza_import_template import generate_handle as _generate_handle_shared
 # Color definitions
 COLORS = [
@@ -562,4 +562,3 @@ def main():
 if __name__ == '__main__':
     main()
-
@@ -5,16 +5,16 @@ cd &quot;$(dirname &quot;$0&quot;)/..&quot;
 source ./activate.sh
 # # 基本使用（生成所有数据）
-# python scripts/csv_to_excel.py
+# python scripts/data_import/csv_to_excel.py
 # # 指定输出文件
-# python scripts/csv_to_excel.py --output tenant3_imports.xlsx
+# python scripts/data_import/csv_to_excel.py --output tenant3_imports.xlsx
 # # 限制处理行数（用于测试）
-# python scripts/csv_to_excel.py --limit 100
+# python scripts/data_import/csv_to_excel.py --limit 100
 # 指定CSV文件和模板文件
-python scripts/csv_to_excel.py \
+python scripts/data_import/csv_to_excel.py \
     --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \
     --template docs/商品导入模板.xlsx \
-    --output tenant3_imports.xlsx
 \ No newline at end of file
+    --output tenant3_imports.xlsx
 #!/bin/bash
 #
 # 排查「谁在调用索引服务」的脚本
-# 用法: ./scripts/trace_indexer_calls.sh
+# 用法: ./scripts/debug/trace_indexer_calls.sh
 #
 set -euo pipefail
 #!/usr/bin/env python3
-"""Download local translation models declared in services.translation.capabilities."""
+"""Backward-compatible entrypoint for translation model downloads."""
 from __future__ import annotations
-import argparse
-import os
+import runpy
 from pathlib import Path
-import shutil
-import subprocess
-import sys
-from typing import Iterable
-
-from huggingface_hub import snapshot_download
-
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.insert(0, str(PROJECT_ROOT))
-os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
-
-from config.services_config import get_translation_config
-
-
-LOCAL_BACKENDS = {"local_nllb", "local_marian"}
-
-
-def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
-    cfg = get_translation_config()
-    capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
-    for name, capability in capabilities.items():
-        backend = str(capability.get("backend") or "").strip().lower()
-        if backend not in LOCAL_BACKENDS:
-            continue
-        if selected and name not in selected:
-            continue
-        yield name, capability
-
-
-def _compute_ct2_output_dir(capability: dict) -> Path:
-    custom = str(capability.get("ct2_model_dir") or "").strip()
-    if custom:
-        return Path(custom).expanduser()
-    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
-    compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
-    normalized = compute_type.replace("_", "-")
-    return model_dir / f"ctranslate2-{normalized}"
-
-
-def _resolve_converter_binary() -> str:
-    candidate = shutil.which("ct2-transformers-converter")
-    if candidate:
-        return candidate
-    venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
-    if venv_candidate.exists():
-        return str(venv_candidate)
-    raise RuntimeError(
-        "ct2-transformers-converter was not found. "
-        "Install ctranslate2 in the active Python environment first."
-    )
-
-
-def convert_to_ctranslate2(name: str, capability: dict) -> None:
-    model_id = str(capability.get("model_id") or "").strip()
-    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
-    model_source = str(model_dir if model_dir.exists() else model_id)
-    output_dir = _compute_ct2_output_dir(capability)
-    if (output_dir / "model.bin").exists():
-        print(f"[skip-convert] {name} -> {output_dir}")
-        return
-    quantization = str(
-        capability.get("ct2_conversion_quantization")
-        or capability.get("ct2_compute_type")
-        or capability.get("torch_dtype")
-        or "default"
-    ).strip()
-    output_dir.parent.mkdir(parents=True, exist_ok=True)
-    print(f"[convert] {name} -> {output_dir} ({quantization})")
-    subprocess.run(
-        [
-            _resolve_converter_binary(),
-            "--model",
-            model_source,
-            "--output_dir",
-            str(output_dir),
-            "--quantization",
-            quantization,
-        ],
-        check=True,
-    )
-    print(f"[converted] {name}")
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Download local translation models")
-    parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
-    parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
-    parser.add_argument(
-        "--convert-ctranslate2",
-        action="store_true",
-        help="Also convert the downloaded Hugging Face models into CTranslate2 format",
-    )
-    args = parser.parse_args()
-
-    selected = {item.strip().lower() for item in args.models if item.strip()} or None
-    if not args.all_local and not selected:
-        parser.error("pass --all-local or --models <name> ...")
-
-    for name, capability in iter_local_capabilities(selected):
-        model_id = str(capability.get("model_id") or "").strip()
-        model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
-        if not model_id or not model_dir:
-            raise ValueError(f"Capability '{name}' must define model_id and model_dir")
-        model_dir.parent.mkdir(parents=True, exist_ok=True)
-        print(f"[download] {name} -> {model_dir} ({model_id})")
-        snapshot_download(
-            repo_id=model_id,
-            local_dir=str(model_dir),
-        )
-        print(f"[done] {name}")
-        if args.convert_ctranslate2:
-            convert_to_ctranslate2(name, capability)
 if __name__ == "__main__":
-    main()
+    target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py"
+    runpy.run_path(str(target), run_name="__main__")
@@ -127,8 +127,8 @@ This framework now follows graded ranking evaluation closer to e-commerce best p
 - **Composite tuning score: `Primary_Metric_Score`**
   For experiment ranking we compute the mean of the primary scorecard after normalizing `Avg_Grade@10` by the max grade (`3`).
 - **Gain scheme**
-  `Fully Relevant=7`, `Mostly Relevant=3`, `Weakly Relevant=1`, `Irrelevant=0`
-  The gains come from rel grades `3/2/1/0` with `gain = 2^rel - 1`, a standard `NDCG` setup.
+  `Fully Relevant=3`, `Mostly Relevant=2`, `Weakly Relevant=1`, `Irrelevant=0`
+  We keep the rel grades `3/2/1/0`, but the current implementation uses the grade values directly as gains so the exact/high gap is less aggressive.
 - **Why this is better**
   `NDCG` differentiates “exact”, “strong substitute”, and “weak substitute”, so swapping an `Fully Relevant` with a `Weakly Relevant` item is penalized more than swapping `Mostly Relevant` with `Weakly Relevant`.
@@ -174,6 +174,22 @@ Features: query list from `queries.txt`, single-query and batch evaluation, batc
 Each run stores aggregate and per-query metrics, label distribution, timestamp, metric context (including gain scheme and primary metric), and an `/admin/config` snapshot, as Markdown and JSON under `batch_reports/`.
+To make later case analysis reproducible without digging through backend logs, each per-query record in the batch JSON now also includes:
+
+- `request_id` — the exact `X-Request-ID` sent by the evaluator for that live search call
+- `top_label_sequence_top10` / `top_label_sequence_top20` — compact label sequence strings such as `1:L3 | 2:L1 | 3:L2`
+- `top_results` — a lightweight top-20 snapshot with `rank`, `spu_id`, `label`, title fields, and `relevance_score`
+
+The Markdown report now surfaces the same case context in a lighter human-readable form:
+
+- request id
+- top-10 / top-20 label sequence
+- top 5 result snapshot for quick scanning
+
+This means a bad case can usually be reconstructed directly from the batch artifact itself, without replaying logs or joining SQLite tables by hand.
+
+The web history endpoint intentionally returns a compact summary only (aggregate metrics plus query count), so adding richer per-query snapshots to the batch payload does not bloat the history list UI.
+
 ## Ranking debug and LTR prep
 `debug_info` now exposes two extra layers that are useful for tuning and future learning-to-rank work:
@@ -14,10 +14,10 @@ from .constants import (  # noqa: E402
     DEFAULT_ARTIFACT_ROOT,
     DEFAULT_QUERY_FILE,
     PROJECT_ROOT,
-    RELEVANCE_EXACT,
-    RELEVANCE_HIGH,
-    RELEVANCE_IRRELEVANT,
-    RELEVANCE_LOW,
+    RELEVANCE_LV0,
+    RELEVANCE_LV1,
+    RELEVANCE_LV2,
+    RELEVANCE_LV3,
     RELEVANCE_NON_IRRELEVANT,
     VALID_LABELS,
 )
@@ -39,10 +39,10 @@ __all__ = [
     "EvalStore",
     "PROJECT_ROOT",
     "QueryBuildResult",
-    "RELEVANCE_EXACT",
-    "RELEVANCE_HIGH",
-    "RELEVANCE_IRRELEVANT",
-    "RELEVANCE_LOW",
+    "RELEVANCE_LV0",
+    "RELEVANCE_LV1",
+    "RELEVANCE_LV2",
+    "RELEVANCE_LV3",
     "RELEVANCE_NON_IRRELEVANT",
     "SearchEvaluationFramework",
     "VALID_LABELS",
@@ -157,6 +157,7 @@ class SearchServiceClient:
         return self._request_json("GET", path, timeout=timeout)
     def search(self, query: str, size: int, from_: int = 0, language: str = "en", *, debug: bool = False) -> Dict[str, Any]:
+        request_id = uuid.uuid4().hex[:8]
         payload: Dict[str, Any] = {
             "query": query,
             "size": size,
@@ -165,13 +166,19 @@ class SearchServiceClient:
         }
         if debug:
             payload["debug"] = True
-        return self._request_json(
+        response = self._request_json(
             "POST",
             "/search/",
             timeout=120,
-            headers={"Content-Type": "application/json", "X-Tenant-ID": self.tenant_id},
+            headers={
+                "Content-Type": "application/json",
+                "X-Tenant-ID": self.tenant_id,
+                "X-Request-ID": request_id,
+            },
             json_payload=payload,
         )
+        response["_eval_request_id"] = request_id
+        return response
 class RerankServiceClient:
@@ -7,24 +7,24 @@ _SCRIPTS_EVAL_DIR = _PKG_DIR.parent
 PROJECT_ROOT = _SCRIPTS_EVAL_DIR.parents[1]
 # Canonical English labels (must match LLM prompt output in prompts._CLASSIFY_TEMPLATE_EN)
-RELEVANCE_EXACT = "Fully Relevant"
-RELEVANCE_HIGH = "Mostly Relevant"
-RELEVANCE_LOW = "Weakly Relevant"
-RELEVANCE_IRRELEVANT = "Irrelevant"
+RELEVANCE_LV3 = "Fully Relevant"
+RELEVANCE_LV2 = "Mostly Relevant"
+RELEVANCE_LV1 = "Weakly Relevant"
+RELEVANCE_LV0 = "Irrelevant"
-VALID_LABELS = frozenset({RELEVANCE_EXACT, RELEVANCE_HIGH, RELEVANCE_LOW, RELEVANCE_IRRELEVANT})
+VALID_LABELS = frozenset({RELEVANCE_LV3, RELEVANCE_LV2, RELEVANCE_LV1, RELEVANCE_LV0})
 # Useful label sets for binary diagnostic slices layered on top of graded ranking metrics.
-RELEVANCE_NON_IRRELEVANT = frozenset({RELEVANCE_EXACT, RELEVANCE_HIGH, RELEVANCE_LOW})
-RELEVANCE_STRONG = frozenset({RELEVANCE_EXACT, RELEVANCE_HIGH})
+RELEVANCE_NON_IRRELEVANT = frozenset({RELEVANCE_LV3, RELEVANCE_LV2, RELEVANCE_LV1})
+RELEVANCE_STRONG = frozenset({RELEVANCE_LV3, RELEVANCE_LV2})
 # Graded relevance for ranking evaluation.
 # We use rel grades 3/2/1/0 and gain = 2^rel - 1, which is standard for NDCG-style metrics.
 RELEVANCE_GRADE_MAP = {
-    RELEVANCE_EXACT: 3,
-    RELEVANCE_HIGH: 2,
-    RELEVANCE_LOW: 1,
-    RELEVANCE_IRRELEVANT: 0,
+    RELEVANCE_LV3: 3,
+    RELEVANCE_LV2: 2,
+    RELEVANCE_LV1: 1,
+    RELEVANCE_LV0: 0,
 }
 # 标准的gain计算方法：2^rel - 1
 # 但是是因为标注质量不是特别精确，因此适当降低 exact 和 high 的区分度
@@ -35,11 +35,12 @@ RELEVANCE_GAIN_MAP = {
 }
 # P(stop | relevance) for ERR (Expected Reciprocal Rank); cascade model (Chapelle et al., 2009).
+# p(t) = (2^t - 1) / 2^{max_grade}
 STOP_PROB_MAP = {
-    RELEVANCE_EXACT: 0.99,
-    RELEVANCE_HIGH: 0.8,
-    RELEVANCE_LOW: 0.1,
-    RELEVANCE_IRRELEVANT: 0.0,
+    RELEVANCE_LV3: 0.875,
+    RELEVANCE_LV2: 0.375,
+    RELEVANCE_LV1: 0.125,
+    RELEVANCE_LV0: 0.0,
 }
 DEFAULT_ARTIFACT_ROOT = PROJECT_ROOT / "artifacts" / "search_evaluation"
@@ -78,7 +79,7 @@ DEFAULT_REBUILD_MAX_LLM_BATCHES = 40
 # A batch is "bad" when **both** hold (strict inequalities; see ``framework._annotate_rebuild_batches``):
 #   - irrelevant_ratio > DEFAULT_REBUILD_IRRELEVANT_STOP_RATIO  (default 93.9%),
 #   - (Irrelevant + Weakly Relevant) / n > DEFAULT_REBUILD_IRREL_LOW_COMBINED_STOP_RATIO  (default 95.9%).
-# ``irrelevant_ratio`` = Irrelevant count / n; weak relevance is ``RELEVANCE_LOW`` ("Weakly Relevant").
+# ``irrelevant_ratio`` = Irrelevant count / n; weak relevance is ``RELEVANCE_LV1`` ("Weakly Relevant").
 # Increment streak on consecutive bad batches; reset on any non-bad batch. Stop when streak
 # reaches ``DEFAULT_REBUILD_IRRELEVANT_STOP_STREAK`` (default 3).
 DEFAULT_REBUILD_IRRELEVANT_STOP_RATIO = 0.799
@@ -25,14 +25,14 @@ from .constants import (
     DEFAULT_RERANK_HIGH_SKIP_COUNT,
     DEFAULT_RERANK_HIGH_THRESHOLD,
     DEFAULT_SEARCH_RECALL_TOP_K,
-    RELEVANCE_EXACT,
     RELEVANCE_GAIN_MAP,
-    RELEVANCE_HIGH,
-    STOP_PROB_MAP,
-    RELEVANCE_IRRELEVANT,
-    RELEVANCE_LOW,
+    RELEVANCE_LV0,
+    RELEVANCE_LV1,
+    RELEVANCE_LV2,
+    RELEVANCE_LV3,
     RELEVANCE_NON_IRRELEVANT,
     VALID_LABELS,
+    STOP_PROB_MAP,
 )
 from .metrics import (
     PRIMARY_METRIC_GRADE_NORMALIZER,
@@ -96,6 +96,16 @@ def _zh_titles_from_debug_per_result(debug_info: Any) -&gt; Dict[str, str]:
     return out
+def _encode_label_sequence(items: Sequence[Dict[str, Any]], limit: int) -> str:
+    parts: List[str] = []
+    for item in items[:limit]:
+        rank = int(item.get("rank") or 0)
+        label = str(item.get("label") or "")
+        grade = RELEVANCE_GAIN_MAP.get(label)
+        parts.append(f"{rank}:L{grade}" if grade is not None else f"{rank}:?")
+    return " | ".join(parts)
+
+
 class SearchEvaluationFramework:
     def __init__(
         self,
@@ -168,7 +178,7 @@ class SearchEvaluationFramework:
     ) -> Dict[str, Any]:
         live = self.evaluate_live_query(query=query, top_k=top_k, auto_annotate=auto_annotate, language=language)
         labels = [
-            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_IRRELEVANT
+            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_LV0
             for item in live["results"]
         ]
         return {
@@ -432,7 +442,7 @@ class SearchEvaluationFramework:
           - ``#(Irrelevant)/n > irrelevant_stop_ratio`` (default 0.939), and
           - ``( #(Irrelevant) + #(Weakly Relevant) ) / n > irrelevant_low_combined_stop_ratio``
-            (default 0.959; weak relevance = ``RELEVANCE_LOW``).
+            (default 0.959; weak relevance = ``RELEVANCE_LV1``).
         Maintain a streak of consecutive *bad* batches; any non-bad batch resets the streak to 0.
         Stop labeling when ``streak >= stop_streak`` (default 3) or when ``max_batches`` is reached
@@ -474,9 +484,9 @@ class SearchEvaluationFramework:
             time.sleep(0.1)
             n = len(batch_docs)
-            exact_n = sum(1 for doc in batch_docs if labels.get(str(doc.get("spu_id"))) == RELEVANCE_EXACT)
-            irrel_n = sum(1 for doc in batch_docs if labels.get(str(doc.get("spu_id"))) == RELEVANCE_IRRELEVANT)
-            low_n = sum(1 for doc in batch_docs if labels.get(str(doc.get("spu_id"))) == RELEVANCE_LOW)
+            exact_n = sum(1 for doc in batch_docs if labels.get(str(doc.get("spu_id"))) == RELEVANCE_LV3)
+            irrel_n = sum(1 for doc in batch_docs if labels.get(str(doc.get("spu_id"))) == RELEVANCE_LV0)
+            low_n = sum(1 for doc in batch_docs if labels.get(str(doc.get("spu_id"))) == RELEVANCE_LV1)
             exact_ratio = exact_n / n if n else 0.0
             irrelevant_ratio = irrel_n / n if n else 0.0
             low_ratio = low_n / n if n else 0.0
@@ -633,7 +643,7 @@ class SearchEvaluationFramework:
             )
         top100_labels = [
-            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_IRRELEVANT
+            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_LV0
             for item in search_labeled_results[:100]
         ]
         metrics = compute_query_metrics(top100_labels, ideal_labels=list(labels.values()))
@@ -843,7 +853,7 @@ class SearchEvaluationFramework:
             )
         top100_labels = [
-            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_IRRELEVANT
+            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_LV0
             for item in search_labeled_results[:100]
         ]
         metrics = compute_query_metrics(top100_labels, ideal_labels=list(labels.values()))
@@ -920,16 +930,17 @@ class SearchEvaluationFramework:
                     "title_zh": title_zh if title_zh and title_zh != primary_title else "",
                     "image_url": doc.get("image_url"),
                     "label": label,
+                    "relevance_score": doc.get("relevance_score"),
                     "option_values": list(compact_option_values(doc.get("skus") or [])),
                     "product": compact_product_payload(doc),
                 }
             )
         metric_labels = [
-            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_IRRELEVANT
+            item["label"] if item["label"] in VALID_LABELS else RELEVANCE_LV0
             for item in labeled
         ]
         ideal_labels = [
-            label if label in VALID_LABELS else RELEVANCE_IRRELEVANT
+            label if label in VALID_LABELS else RELEVANCE_LV0
             for label in labels.values()
         ]
         label_stats = self.store.get_query_label_stats(self.tenant_id, query)
@@ -960,10 +971,10 @@ class SearchEvaluationFramework:
                 }
             )
         label_order = {
-            RELEVANCE_EXACT: 0,
-            RELEVANCE_HIGH: 1,
-            RELEVANCE_LOW: 2,
-            RELEVANCE_IRRELEVANT: 3,
+            RELEVANCE_LV3: 0,
+            RELEVANCE_LV2: 1,
+            RELEVANCE_LV1: 2,
+            RELEVANCE_LV0: 3,
         }
         missing_relevant.sort(
             key=lambda item: (
@@ -989,6 +1000,7 @@ class SearchEvaluationFramework:
             "top_k": top_k,
             "metrics": compute_query_metrics(metric_labels, ideal_labels=ideal_labels),
             "metric_context": _metric_context_payload(),
+            "request_id": str(search_payload.get("_eval_request_id") or ""),
             "results": labeled,
             "missing_relevant": missing_relevant,
             "label_stats": {
@@ -996,9 +1008,9 @@ class SearchEvaluationFramework:
                 "unlabeled_hits_treated_irrelevant": unlabeled_hits,
                 "recalled_hits": len(labeled),
                 "missing_relevant_count": len(missing_relevant),
-                "missing_exact_count": sum(1 for item in missing_relevant if item["label"] == RELEVANCE_EXACT),
-                "missing_high_count": sum(1 for item in missing_relevant if item["label"] == RELEVANCE_HIGH),
-                "missing_low_count": sum(1 for item in missing_relevant if item["label"] == RELEVANCE_LOW),
+                "missing_exact_count": sum(1 for item in missing_relevant if item["label"] == RELEVANCE_LV3),
+                "missing_high_count": sum(1 for item in missing_relevant if item["label"] == RELEVANCE_LV2),
+                "missing_low_count": sum(1 for item in missing_relevant if item["label"] == RELEVANCE_LV1),
             },
             "tips": tips,
             "total": int(search_payload.get("total") or 0),
@@ -1014,6 +1026,7 @@ class SearchEvaluationFramework:
         force_refresh_labels: bool = False,
     ) -> Dict[str, Any]:
         per_query = []
+        case_snapshot_top_n = min(max(int(top_k), 1), 20)
         total_q = len(queries)
         _log.info("[batch-eval] starting %s queries top_k=%s auto_annotate=%s", total_q, top_k, auto_annotate)
         for q_index, query in enumerate(queries, start=1):
@@ -1025,7 +1038,7 @@ class SearchEvaluationFramework:
                 force_refresh_labels=force_refresh_labels,
             )
             labels = [
-                item["label"] if item["label"] in VALID_LABELS else RELEVANCE_IRRELEVANT
+                item["label"] if item["label"] in VALID_LABELS else RELEVANCE_LV0
                 for item in live["results"]
             ]
             per_query.append(
@@ -1036,6 +1049,21 @@ class SearchEvaluationFramework:
                     "metrics": live["metrics"],
                     "distribution": label_distribution(labels),
                     "total": live["total"],
+                    "request_id": live.get("request_id") or "",
+                    "case_snapshot_top_n": case_snapshot_top_n,
+                    "top_label_sequence_top10": _encode_label_sequence(live["results"], 10),
+                    "top_label_sequence_top20": _encode_label_sequence(live["results"], case_snapshot_top_n),
+                    "top_results": [
+                        {
+                            "rank": int(item.get("rank") or 0),
+                            "spu_id": str(item.get("spu_id") or ""),
+                            "label": item.get("label"),
+                            "title": item.get("title"),
+                            "title_zh": item.get("title_zh"),
+                            "relevance_score": item.get("relevance_score"),
+                        }
+                        for item in live["results"][:case_snapshot_top_n]
+                    ],
                 }
             )
             m = live["metrics"]
@@ -1055,10 +1083,10 @@ class SearchEvaluationFramework:
             )
         aggregate = aggregate_metrics([item["metrics"] for item in per_query])
         aggregate_distribution = {
-            RELEVANCE_EXACT: sum(item["distribution"][RELEVANCE_EXACT] for item in per_query),
-            RELEVANCE_HIGH: sum(item["distribution"][RELEVANCE_HIGH] for item in per_query),
-            RELEVANCE_LOW: sum(item["distribution"][RELEVANCE_LOW] for item in per_query),
-            RELEVANCE_IRRELEVANT: sum(item["distribution"][RELEVANCE_IRRELEVANT] for item in per_query),
+            RELEVANCE_LV3: sum(item["distribution"][RELEVANCE_LV3] for item in per_query),
+            RELEVANCE_LV2: sum(item["distribution"][RELEVANCE_LV2] for item in per_query),
+            RELEVANCE_LV1: sum(item["distribution"][RELEVANCE_LV1] for item in per_query),
+            RELEVANCE_LV0: sum(item["distribution"][RELEVANCE_LV0] for item in per_query),
         }
         batch_id = f"batch_{utc_timestamp()}_{sha1_text(self.tenant_id + '|' + '|'.join(queries))[:10]}"
         report_dir = ensure_dir(self.artifact_root / "batch_reports")
@@ -6,12 +6,12 @@ import math
 from typing import Dict, Iterable, Sequence
 from .constants import (
-    RELEVANCE_EXACT,
     RELEVANCE_GAIN_MAP,
     RELEVANCE_GRADE_MAP,
-    RELEVANCE_HIGH,
-    RELEVANCE_IRRELEVANT,
-    RELEVANCE_LOW,
+    RELEVANCE_LV0,
+    RELEVANCE_LV1,
+    RELEVANCE_LV2,
+    RELEVANCE_LV3,
     RELEVANCE_NON_IRRELEVANT,
     RELEVANCE_STRONG,
     STOP_PROB_MAP,
@@ -33,7 +33,7 @@ PRIMARY_METRIC_GRADE_NORMALIZER = float(max(RELEVANCE_GRADE_MAP.values()) or 1.0
 def _normalize_label(label: str) -> str:
     if label in RELEVANCE_GRADE_MAP:
         return label
-    return RELEVANCE_IRRELEVANT
+    return RELEVANCE_LV0
 def _gains_for_labels(labels: Sequence[str]) -> list[float]:
@@ -135,7 +135,7 @@ def compute_query_metrics(
     ideal = list(ideal_labels) if ideal_labels is not None else list(labels)
     metrics: Dict[str, float] = {}
-    exact_hits = _binary_hits(labels, [RELEVANCE_EXACT])
+    exact_hits = _binary_hits(labels, [RELEVANCE_LV3])
     strong_hits = _binary_hits(labels, RELEVANCE_STRONG)
     useful_hits = _binary_hits(labels, RELEVANCE_NON_IRRELEVANT)
@@ -183,8 +183,8 @@ def aggregate_metrics(metric_items: Sequence[Dict[str, float]]) -&gt; Dict[str, flo
 def label_distribution(labels: Sequence[str]) -> Dict[str, int]:
     return {
-        RELEVANCE_EXACT: sum(1 for label in labels if label == RELEVANCE_EXACT),
-        RELEVANCE_HIGH: sum(1 for label in labels if label == RELEVANCE_HIGH),
-        RELEVANCE_LOW: sum(1 for label in labels if label == RELEVANCE_LOW),
-        RELEVANCE_IRRELEVANT: sum(1 for label in labels if label == RELEVANCE_IRRELEVANT),
+        RELEVANCE_LV3: sum(1 for label in labels if label == RELEVANCE_LV3),
+        RELEVANCE_LV2: sum(1 for label in labels if label == RELEVANCE_LV2),
+        RELEVANCE_LV1: sum(1 for label in labels if label == RELEVANCE_LV1),
+        RELEVANCE_LV0: sum(1 for label in labels if label == RELEVANCE_LV0),
     }
@@ -4,7 +4,7 @@ from __future__ import annotations
 from typing import Any, Dict
-from .constants import RELEVANCE_EXACT, RELEVANCE_HIGH, RELEVANCE_IRRELEVANT, RELEVANCE_LOW
+from .constants import RELEVANCE_GAIN_MAP, RELEVANCE_LV0, RELEVANCE_LV1, RELEVANCE_LV2, RELEVANCE_LV3
 from .metrics import PRIMARY_METRIC_KEYS
@@ -25,6 +25,38 @@ def _append_metric_block(lines: list[str], metrics: Dict[str, Any]) -&gt; None:
         lines.append(f"- {key}: {value}")
+def _label_level_code(label: str) -> str:
+    grade = RELEVANCE_GAIN_MAP.get(label)
+    return f"L{grade}" if grade is not None else "?"
+
+
+def _append_case_snapshot(lines: list[str], item: Dict[str, Any]) -> None:
+    request_id = str(item.get("request_id") or "").strip()
+    if request_id:
+        lines.append(f"- Request ID: `{request_id}`")
+    seq10 = str(item.get("top_label_sequence_top10") or "").strip()
+    if seq10:
+        lines.append(f"- Top-10 Labels: `{seq10}`")
+    seq20 = str(item.get("top_label_sequence_top20") or "").strip()
+    if seq20 and seq20 != seq10:
+        lines.append(f"- Top-20 Labels: `{seq20}`")
+    top_results = item.get("top_results") or []
+    if not top_results:
+        return
+    lines.append("- Case Snapshot:")
+    for result in top_results[:5]:
+        rank = int(result.get("rank") or 0)
+        label = _label_level_code(str(result.get("label") or ""))
+        spu_id = str(result.get("spu_id") or "")
+        title = str(result.get("title") or "")
+        title_zh = str(result.get("title_zh") or "")
+        relevance_score = result.get("relevance_score")
+        score_suffix = f" (rel={relevance_score})" if relevance_score not in (None, "") else ""
+        lines.append(f"  - #{rank} [{label}] spu={spu_id} {title}{score_suffix}")
+        if title_zh:
+            lines.append(f"    zh: {title_zh}")
+
+
 def render_batch_report_markdown(payload: Dict[str, Any]) -> str:
     lines = [
         "# Search Batch Evaluation",
@@ -56,10 +88,10 @@ def render_batch_report_markdown(payload: Dict[str, Any]) -&gt; str:
                 "",
                 "## Label Distribution",
                 "",
-                f"- Fully Relevant: {distribution.get(RELEVANCE_EXACT, 0)}",
-                f"- Mostly Relevant: {distribution.get(RELEVANCE_HIGH, 0)}",
-                f"- Weakly Relevant: {distribution.get(RELEVANCE_LOW, 0)}",
-                f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}",
+                f"- Fully Relevant: {distribution.get(RELEVANCE_LV3, 0)}",
+                f"- Mostly Relevant: {distribution.get(RELEVANCE_LV2, 0)}",
+                f"- Weakly Relevant: {distribution.get(RELEVANCE_LV1, 0)}",
+                f"- Irrelevant: {distribution.get(RELEVANCE_LV0, 0)}",
             ]
         )
     lines.extend(["", "## Per Query", ""])
@@ -68,9 +100,10 @@ def render_batch_report_markdown(payload: Dict[str, Any]) -&gt; str:
         lines.append("")
         _append_metric_block(lines, item.get("metrics") or {})
         distribution = item.get("distribution") or {}
-        lines.append(f"- Fully Relevant: {distribution.get(RELEVANCE_EXACT, 0)}")
-        lines.append(f"- Mostly Relevant: {distribution.get(RELEVANCE_HIGH, 0)}")
-        lines.append(f"- Weakly Relevant: {distribution.get(RELEVANCE_LOW, 0)}")
-        lines.append(f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}")
+        lines.append(f"- Fully Relevant: {distribution.get(RELEVANCE_LV3, 0)}")
+        lines.append(f"- Mostly Relevant: {distribution.get(RELEVANCE_LV2, 0)}")
+        lines.append(f"- Weakly Relevant: {distribution.get(RELEVANCE_LV1, 0)}")
+        lines.append(f"- Irrelevant: {distribution.get(RELEVANCE_LV0, 0)}")
+        _append_case_snapshot(lines, item)
         lines.append("")
     return "\n".join(lines)
@@ -190,7 +190,7 @@ async function loadQueries() {
 function historySummaryHtml(meta) {
   const m = meta && meta.aggregate_metrics;
-  const nq = (meta && meta.queries && meta.queries.length) || (meta && meta.per_query && meta.per_query.length) || null;
+  const nq = (meta && meta.query_count) || (meta && meta.queries && meta.queries.length) || (meta && meta.per_query && meta.per_query.length) || null;
   const parts = [];
   if (nq != null) parts.push(`<span>Queries</span> ${nq}`);
   if (m && m["Primary_Metric_Score"] != null) parts.push(`<span>Primary</span> ${fmtNumber(m["Primary_Metric_Score"])}`);
@@ -23,6 +23,18 @@ class QueryBuildResult:
     output_json_path: Path
+def _compact_batch_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]:
+    return {
+        "batch_id": metadata.get("batch_id"),
+        "created_at": metadata.get("created_at"),
+        "tenant_id": metadata.get("tenant_id"),
+        "top_k": metadata.get("top_k"),
+        "query_count": len(metadata.get("queries") or []),
+        "aggregate_metrics": dict(metadata.get("aggregate_metrics") or {}),
+        "metric_context": dict(metadata.get("metric_context") or {}),
+    }
+
+
 class EvalStore:
     def __init__(self, db_path: Path):
         self.db_path = db_path
@@ -339,6 +351,7 @@ class EvalStore:
         ).fetchall()
         items: List[Dict[str, Any]] = []
         for row in rows:
+            metadata = json.loads(row["metadata_json"])
             items.append(
                 {
                     "batch_id": row["batch_id"],
@@ -346,7 +359,7 @@ class EvalStore:
                     "output_json_path": row["output_json_path"],
                     "report_markdown_path": row["report_markdown_path"],
                     "config_snapshot_path": row["config_snapshot_path"],
-                    "metadata": json.loads(row["metadata_json"]),
+                    "metadata": _compact_batch_metadata(metadata),
                     "created_at": row["created_at"],
                 }
             )
@@ -23,11 +23,11 @@ if str(PROJECT_ROOT) not in sys.path:
 from scripts.evaluation.eval_framework.constants import (
     DEFAULT_ARTIFACT_ROOT,
-    RELEVANCE_EXACT,
     RELEVANCE_GRADE_MAP,
-    RELEVANCE_HIGH,
-    RELEVANCE_IRRELEVANT,
-    RELEVANCE_LOW,
+    RELEVANCE_LV0,
+    RELEVANCE_LV1,
+    RELEVANCE_LV2,
+    RELEVANCE_LV3,
 )
 from scripts.evaluation.eval_framework.metrics import aggregate_metrics, compute_query_metrics
 from scripts.evaluation.eval_framework.store import EvalStore
@@ -35,10 +35,10 @@ from scripts.evaluation.eval_framework.utils import ensure_dir, utc_timestamp
 LABELS_BY_GRADE = {
-    3: RELEVANCE_EXACT,
-    2: RELEVANCE_HIGH,
-    1: RELEVANCE_LOW,
-    0: RELEVANCE_IRRELEVANT,
+    3: RELEVANCE_LV3,
+    2: RELEVANCE_LV2,
+    1: RELEVANCE_LV1,
+    0: RELEVANCE_LV0,
 }
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Simple HTTP server for saas-search frontend.
+"""
+
+import http.server
+import socketserver
+import os
+import sys
+import logging
+import time
+import urllib.request
+import urllib.error
+from collections import defaultdict, deque
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Load .env file
+project_root = Path(__file__).resolve().parents[2]
+load_dotenv(project_root / '.env')
+
+# Get API_BASE_URL from environment（默认不注入，避免被旧 .env 覆盖同源策略）
+# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。
+API_BASE_URL = os.getenv('API_BASE_URL') or None
+INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
+# Backend proxy target for same-origin API forwarding
+BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')
+
+# Change to frontend directory
+frontend_dir = os.path.join(project_root, 'frontend')
+os.chdir(frontend_dir)
+
+# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
+PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))
+
+# Configure logging to suppress scanner noise
+logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
+
+class RateLimitingMixin:
+    """Mixin for rate limiting requests by IP address."""
+    request_counts = defaultdict(deque)
+    rate_limit = 100  # requests per minute
+    window = 60  # seconds
+
+    @classmethod
+    def is_rate_limited(cls, ip):
+        now = time.time()
+
+        # Clean old requests
+        while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
+            cls.request_counts[ip].popleft()
+
+        # Check rate limit
+        if len(cls.request_counts[ip]) > cls.rate_limit:
+            return True
+
+        cls.request_counts[ip].append(now)
+        return False
+
+class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
+    """Custom request handler with CORS support and robust error handling."""
+
+    _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer"
+
+    def _is_proxy_path(self, path: str) -> bool:
+        """Return True for API paths that should be forwarded to backend service."""
+        return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
+
+    def _proxy_to_backend(self):
+        """Proxy current request to backend service on the GPU server."""
+        target_url = f"{BACKEND_PROXY_URL}{self.path}"
+        method = self.command.upper()
+
+        try:
+            content_length = int(self.headers.get('Content-Length', '0'))
+        except ValueError:
+            content_length = 0
+        body = self.rfile.read(content_length) if content_length > 0 else None
+
+        forward_headers = {}
+        for key, value in self.headers.items():
+            lk = key.lower()
+            if lk in ('host', 'content-length', 'connection'):
+                continue
+            forward_headers[key] = value
+
+        req = urllib.request.Request(
+            target_url,
+            data=body,
+            headers=forward_headers,
+            method=method,
+        )
+
+        try:
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                resp_body = resp.read()
+                self.send_response(resp.getcode())
+                for header, value in resp.getheaders():
+                    lh = header.lower()
+                    if lh in ('transfer-encoding', 'connection', 'content-length'):
+                        continue
+                    self.send_header(header, value)
+                self.end_headers()
+                self.wfile.write(resp_body)
+        except urllib.error.HTTPError as e:
+            err_body = e.read() if hasattr(e, 'read') else b''
+            self.send_response(e.code)
+            if e.headers:
+                for header, value in e.headers.items():
+                    lh = header.lower()
+                    if lh in ('transfer-encoding', 'connection', 'content-length'):
+                        continue
+                    self.send_header(header, value)
+            self.end_headers()
+            if err_body:
+                self.wfile.write(err_body)
+        except Exception as e:
+            logging.error(f"Backend proxy error for {method} {self.path}: {e}")
+            self.send_response(502)
+            self.send_header('Content-Type', 'application/json; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')
+
+    def do_GET(self):
+        """Handle GET requests with API config injection."""
+        path = self.path.split('?')[0]
+
+        # Proxy API paths to backend first
+        if self._is_proxy_path(path):
+            self._proxy_to_backend()
+            return
+        
+        # Route / to index.html
+        if path == '/' or path == '':
+            self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
+        
+        # Inject API config for HTML files
+        if self.path.endswith('.html'):
+            self._serve_html_with_config()
+        else:
+            super().do_GET()
+    
+    def _serve_html_with_config(self):
+        """Serve HTML with optional API_BASE_URL injected."""
+        try:
+            file_path = self.path.lstrip('/')
+            if not os.path.exists(file_path):
+                self.send_error(404)
+                return
+            
+            with open(file_path, 'r', encoding='utf-8') as f:
+                html = f.read()
+
+            # 默认不注入 API_BASE_URL，避免历史 .env（如 http://xx:6002）覆盖同源调用。
+            # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。
+            if INJECT_API_BASE_URL and API_BASE_URL:
+                config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n    '
+                html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
+            
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(html.encode('utf-8'))
+        except Exception as e:
+            logging.error(f"Error serving HTML: {e}")
+            self.send_error(500)
+
+    def do_POST(self):
+        """Handle POST requests. Proxy API requests to backend."""
+        path = self.path.split('?')[0]
+        if self._is_proxy_path(path):
+            self._proxy_to_backend()
+            return
+        self.send_error(405, "Method Not Allowed")
+
+    def setup(self):
+        """Setup with error handling."""
+        try:
+            super().setup()
+        except Exception:
+            pass  # Silently handle setup errors from scanners
+
+    def handle_one_request(self):
+        """Handle single request with error catching."""
+        try:
+            # Check rate limiting
+            client_ip = self.client_address[0]
+            if self.is_rate_limited(client_ip):
+                logging.warning(f"Rate limiting IP: {client_ip}")
+                self.send_error(429, "Too Many Requests")
+                return
+
+            super().handle_one_request()
+        except (ConnectionResetError, BrokenPipeError):
+            # Client disconnected prematurely - common with scanners
+            pass
+        except UnicodeDecodeError:
+            # Binary data received - not HTTP
+            pass
+        except Exception as e:
+            # Log unexpected errors but don't crash
+            logging.debug(f"Request handling error: {e}")
+
+    def log_message(self, format, *args):
+        """Suppress logging for malformed requests from scanners."""
+        message = format % args
+        # Filter out scanner noise
+        noise_patterns = [
+            "code 400",
+            "Bad request",
+            "Bad request version",
+            "Bad HTTP/0.9 request type",
+            "Bad request syntax"
+        ]
+        if any(pattern in message for pattern in noise_patterns):
+            return
+        # Only log legitimate requests
+        if message and not message.startswith(" ") and len(message) > 10:
+            super().log_message(format, *args)
+
+    def end_headers(self):
+        # Add CORS headers
+        self.send_header('Access-Control-Allow-Origin', '*')
+        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
+        self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS)
+        # Add security headers
+        self.send_header('X-Content-Type-Options', 'nosniff')
+        self.send_header('X-Frame-Options', 'DENY')
+        self.send_header('X-XSS-Protection', '1; mode=block')
+        super().end_headers()
+
+    def do_OPTIONS(self):
+        """Handle OPTIONS requests."""
+        try:
+            path = self.path.split('?')[0]
+            if self._is_proxy_path(path):
+                self.send_response(204)
+                self.end_headers()
+                return
+            self.send_response(200)
+            self.end_headers()
+        except Exception:
+            pass
+
+class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
+    """Threaded TCP server with better error handling."""
+    allow_reuse_address = True
+    daemon_threads = True
+
+if __name__ == '__main__':
+    # Check if port is already in use
+    import socket
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        sock.bind(("", PORT))
+        sock.close()
+    except OSError:
+        print(f"ERROR: Port {PORT} is already in use.")
+        print(f"Please stop the existing server or use a different port.")
+        print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
+        sys.exit(1)
+    
+    # Create threaded server for better concurrency
+    with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
+        print(f"Frontend server started at http://localhost:{PORT}")
+        print(f"Serving files from: {os.getcwd()}")
+        print("\nPress Ctrl+C to stop the server")
+
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            print("\nShutting down server...")
+            httpd.shutdown()
+            print("Server stopped")
+            sys.exit(0)
+        except Exception as e:
+            print(f"Server error: {e}")
+            sys.exit(1)
 #!/usr/bin/env python3
-"""
-Simple HTTP server for saas-search frontend.
-"""
+"""Backward-compatible frontend server entrypoint."""
-import http.server
-import socketserver
-import os
-import sys
-import logging
-import time
-import urllib.request
-import urllib.error
-from collections import defaultdict, deque
-from pathlib import Path
-from dotenv import load_dotenv
-
-# Load .env file
-project_root = Path(__file__).parent.parent
-load_dotenv(project_root / '.env')
-
-# Get API_BASE_URL from environment（默认不注入，避免被旧 .env 覆盖同源策略）
-# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。
-API_BASE_URL = os.getenv('API_BASE_URL') or None
-INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
-# Backend proxy target for same-origin API forwarding
-BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')
-
-# Change to frontend directory
-frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend')
-os.chdir(frontend_dir)
-
-# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
-PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))
-
-# Configure logging to suppress scanner noise
-logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
-
-class RateLimitingMixin:
-    """Mixin for rate limiting requests by IP address."""
-    request_counts = defaultdict(deque)
-    rate_limit = 100  # requests per minute
-    window = 60  # seconds
-
-    @classmethod
-    def is_rate_limited(cls, ip):
-        now = time.time()
-
-        # Clean old requests
-        while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
-            cls.request_counts[ip].popleft()
-
-        # Check rate limit
-        if len(cls.request_counts[ip]) > cls.rate_limit:
-            return True
-
-        cls.request_counts[ip].append(now)
-        return False
-
-class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
-    """Custom request handler with CORS support and robust error handling."""
-
-    def _is_proxy_path(self, path: str) -> bool:
-        """Return True for API paths that should be forwarded to backend service."""
-        return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
-
-    def _proxy_to_backend(self):
-        """Proxy current request to backend service on the GPU server."""
-        target_url = f"{BACKEND_PROXY_URL}{self.path}"
-        method = self.command.upper()
-
-        try:
-            content_length = int(self.headers.get('Content-Length', '0'))
-        except ValueError:
-            content_length = 0
-        body = self.rfile.read(content_length) if content_length > 0 else None
+from __future__ import annotations
-        forward_headers = {}
-        for key, value in self.headers.items():
-            lk = key.lower()
-            if lk in ('host', 'content-length', 'connection'):
-                continue
-            forward_headers[key] = value
-
-        req = urllib.request.Request(
-            target_url,
-            data=body,
-            headers=forward_headers,
-            method=method,
-        )
-
-        try:
-            with urllib.request.urlopen(req, timeout=30) as resp:
-                resp_body = resp.read()
-                self.send_response(resp.getcode())
-                for header, value in resp.getheaders():
-                    lh = header.lower()
-                    if lh in ('transfer-encoding', 'connection', 'content-length'):
-                        continue
-                    self.send_header(header, value)
-                self.end_headers()
-                self.wfile.write(resp_body)
-        except urllib.error.HTTPError as e:
-            err_body = e.read() if hasattr(e, 'read') else b''
-            self.send_response(e.code)
-            if e.headers:
-                for header, value in e.headers.items():
-                    lh = header.lower()
-                    if lh in ('transfer-encoding', 'connection', 'content-length'):
-                        continue
-                    self.send_header(header, value)
-            self.end_headers()
-            if err_body:
-                self.wfile.write(err_body)
-        except Exception as e:
-            logging.error(f"Backend proxy error for {method} {self.path}: {e}")
-            self.send_response(502)
-            self.send_header('Content-Type', 'application/json; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')
-
-    def do_GET(self):
-        """Handle GET requests with API config injection."""
-        path = self.path.split('?')[0]
-
-        # Proxy API paths to backend first
-        if self._is_proxy_path(path):
-            self._proxy_to_backend()
-            return
-        
-        # Route / to index.html
-        if path == '/' or path == '':
-            self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
-        
-        # Inject API config for HTML files
-        if self.path.endswith('.html'):
-            self._serve_html_with_config()
-        else:
-            super().do_GET()
-    
-    def _serve_html_with_config(self):
-        """Serve HTML with optional API_BASE_URL injected."""
-        try:
-            file_path = self.path.lstrip('/')
-            if not os.path.exists(file_path):
-                self.send_error(404)
-                return
-            
-            with open(file_path, 'r', encoding='utf-8') as f:
-                html = f.read()
-
-            # 默认不注入 API_BASE_URL，避免历史 .env（如 http://xx:6002）覆盖同源调用。
-            # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。
-            if INJECT_API_BASE_URL and API_BASE_URL:
-                config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n    '
-                html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
-            
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/html; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(html.encode('utf-8'))
-        except Exception as e:
-            logging.error(f"Error serving HTML: {e}")
-            self.send_error(500)
-
-    def do_POST(self):
-        """Handle POST requests. Proxy API requests to backend."""
-        path = self.path.split('?')[0]
-        if self._is_proxy_path(path):
-            self._proxy_to_backend()
-            return
-        self.send_error(405, "Method Not Allowed")
-
-    def setup(self):
-        """Setup with error handling."""
-        try:
-            super().setup()
-        except Exception:
-            pass  # Silently handle setup errors from scanners
-
-    def handle_one_request(self):
-        """Handle single request with error catching."""
-        try:
-            # Check rate limiting
-            client_ip = self.client_address[0]
-            if self.is_rate_limited(client_ip):
-                logging.warning(f"Rate limiting IP: {client_ip}")
-                self.send_error(429, "Too Many Requests")
-                return
-
-            super().handle_one_request()
-        except (ConnectionResetError, BrokenPipeError):
-            # Client disconnected prematurely - common with scanners
-            pass
-        except UnicodeDecodeError:
-            # Binary data received - not HTTP
-            pass
-        except Exception as e:
-            # Log unexpected errors but don't crash
-            logging.debug(f"Request handling error: {e}")
-
-    def log_message(self, format, *args):
-        """Suppress logging for malformed requests from scanners."""
-        message = format % args
-        # Filter out scanner noise
-        noise_patterns = [
-            "code 400",
-            "Bad request",
-            "Bad request version",
-            "Bad HTTP/0.9 request type",
-            "Bad request syntax"
-        ]
-        if any(pattern in message for pattern in noise_patterns):
-            return
-        # Only log legitimate requests
-        if message and not message.startswith(" ") and len(message) > 10:
-            super().log_message(format, *args)
-
-    def end_headers(self):
-        # Add CORS headers
-        self.send_header('Access-Control-Allow-Origin', '*')
-        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
-        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
-        # Add security headers
-        self.send_header('X-Content-Type-Options', 'nosniff')
-        self.send_header('X-Frame-Options', 'DENY')
-        self.send_header('X-XSS-Protection', '1; mode=block')
-        super().end_headers()
-
-    def do_OPTIONS(self):
-        """Handle OPTIONS requests."""
-        try:
-            path = self.path.split('?')[0]
-            if self._is_proxy_path(path):
-                self.send_response(204)
-                self.end_headers()
-                return
-            self.send_response(200)
-            self.end_headers()
-        except Exception:
-            pass
-
-class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
-    """Threaded TCP server with better error handling."""
-    allow_reuse_address = True
-    daemon_threads = True
+import runpy
+from pathlib import Path
-if __name__ == '__main__':
-    # Check if port is already in use
-    import socket
-    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    try:
-        sock.bind(("", PORT))
-        sock.close()
-    except OSError:
-        print(f"ERROR: Port {PORT} is already in use.")
-        print(f"Please stop the existing server or use a different port.")
-        print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
-        sys.exit(1)
-    
-    # Create threaded server for better concurrency
-    with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
-        print(f"Frontend server started at http://localhost:{PORT}")
-        print(f"Serving files from: {os.getcwd()}")
-        print("\nPress Ctrl+C to stop the server")
-        try:
-            httpd.serve_forever()
-        except KeyboardInterrupt:
-            print("\nShutting down server...")
-            httpd.shutdown()
-            print("Server stopped")
-            sys.exit(0)
-        except Exception as e:
-            print(f"Server error: {e}")
-            sys.exit(1)
+if __name__ == "__main__":
+    target = Path(__file__).resolve().parent / "frontend" / "frontend_server.py"
+    runpy.run_path(str(target), run_name="__main__")
@@ -0,0 +1,10 @@
+# Inspect Scripts
+
+这一组脚本用于做一次性诊断、索引检查和数据核对：
+
+- `check_data_source.py`
+- `check_es_data.py`
+- `check_index_mapping.py`
+- `compare_index_mappings.py`
+
+它们依赖真实 DB / ES 环境，不属于 CI 测试或 benchmark。
@@ -14,8 +14,8 @@ import argparse
 from pathlib import Path
 from sqlalchemy import create_engine, text
-# Add parent directory to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
+# Add repo root to path
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 from utils.db_connector import create_db_connection
@@ -298,4 +298,3 @@ def main():
 if __name__ == '__main__':
     sys.exit(main())
-
@@ -8,7 +8,7 @@ import os
 import argparse
 from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 from utils.es_client import ESClient
@@ -265,4 +265,3 @@ def main():
 if __name__ == '__main__':
     sys.exit(main())
-
@@ -8,7 +8,7 @@ import sys
 import json
 from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 from utils.es_client import get_es_client_from_env
 from indexer.mapping_generator import get_tenant_index_name
@@ -9,7 +9,7 @@ import json
 from pathlib import Path
 from typing import Dict, Any
-sys.path.insert(0, str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 from utils.es_client import get_es_client_from_env
@@ -186,4 +186,3 @@ def main():
 if __name__ == '__main__':
     sys.exit(main())
-
@@ -5,7 +5,7 @@
 用法:
   source activate.sh   # 会加载 .env，提供 ES_HOST / ES_USERNAME / ES_PASSWORD
-  python scripts/temp_embed_tenant_image_urls.py
+  python scripts/maintenance/embed_tenant_image_urls.py
 未 source 时脚本也会尝试加载项目根目录 .env。
 """
@@ -30,7 +30,7 @@ from elasticsearch.helpers import scan
 try:
     from dotenv import load_dotenv
-    _ROOT = Path(__file__).resolve().parents[1]
+    _ROOT = Path(__file__).resolve().parents[2]
     load_dotenv(_ROOT / ".env")
 except ImportError:
     pass
@@ -0,0 +1,8 @@
+# Ops Scripts
+
+这一组脚本是服务编排过程中的辅助脚本：
+
+- `daily_log_router.sh`：按天切日志
+- `wechat_alert.py`：监控告警发送
+
+如果其他启动脚本引用这些文件，应通过这里的固定路径，不要再复制出新的同类工具。
@@ -3,7 +3,7 @@
 # Route incoming log stream into per-day files.
 #
 # Usage:
-#   command 2>&1 | ./scripts/daily_log_router.sh <service> <log_dir> [retention_days]
+#   command 2>&1 | ./scripts/ops/daily_log_router.sh <service> <log_dir> [retention_days]
 #
 set -euo pipefail
@@ -6,7 +6,7 @@ This module is intentionally small and focused so that Bash-based monitors
 can invoke it without pulling in the full application stack.
 Usage example:
-  python scripts/wechat_alert.py --service backend --level error --message "backend restarted"
+  python scripts/ops/wechat_alert.py --service backend --level error --message "backend restarted"
 """
 import argparse
@@ -101,4 +101,3 @@ def main(argv: list[str] | None = None) -&gt; int:
 if __name__ == "__main__":
     raise SystemExit(main())
-
@@ -12,7 +12,7 @@ from pathlib import Path
 from datetime import datetime
 # 添加项目路径
-project_root = Path(__file__).parent.parent
+project_root = Path(__file__).resolve().parents[2]
 sys.path.insert(0, str(project_root))
 from config.env_config import REDIS_CONFIG
@@ -20,6 +20,7 @@ CORE_SERVICES=(&quot;backend&quot; &quot;indexer&quot; &quot;frontend&quot; &quot;eval-web&quot;)
 OPTIONAL_SERVICES=("tei" "cnclip" "embedding" "embedding-image" "translator" "reranker")
 FULL_SERVICES=("${OPTIONAL_SERVICES[@]}" "${CORE_SERVICES[@]}")
 STOP_ORDER_SERVICES=("frontend" "eval-web" "indexer" "backend" "reranker" "translator" "embedding-image" "embedding" "cnclip" "tei")
+declare -Ag SERVICE_ENABLED_CACHE=()
 all_services() {
   echo "${FULL_SERVICES[@]}"
@@ -33,6 +34,72 @@ config_python_bin() {
   fi
 }
+service_enabled_by_config() {
+  local service="$1"
+  case "${service}" in
+    reranker|reranker-fine|translator)
+      ;;
+    *)
+      return 0
+      ;;
+  esac
+
+  if [ -n "${SERVICE_ENABLED_CACHE[${service}]+x}" ]; then
+    [ "${SERVICE_ENABLED_CACHE[${service}]}" = "1" ]
+    return
+  fi
+
+  local pybin
+  pybin="$(config_python_bin)"
+
+  local enabled
+  if ! enabled="$(
+    SERVICE_NAME="${service}" \
+    PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" \
+    "${pybin}" - <<'PY'
+from config.loader import get_app_config
+import os
+
+service = os.environ["SERVICE_NAME"]
+cfg = get_app_config()
+
+enabled = True
+if service == "reranker":
+    enabled = bool(cfg.search.rerank.enabled)
+elif service == "reranker-fine":
+    enabled = bool(cfg.search.fine_rank.enabled)
+elif service == "translator":
+    capabilities = dict(cfg.services.translation.capabilities or {})
+    enabled = any(bool((value or {}).get("enabled", True)) for value in capabilities.values())
+
+print("1" if enabled else "0")
+PY
+  )"; then
+    echo "[warn] failed to read config state for ${service}; defaulting to enabled" >&2
+    enabled="1"
+  fi
+
+  SERVICE_ENABLED_CACHE["${service}"]="${enabled}"
+  [ "${enabled}" = "1" ]
+}
+
+filter_disabled_targets() {
+  local targets="$1"
+  local verbose="${2:-quiet}"
+  local out=""
+  local svc
+
+  for svc in ${targets}; do
+    if service_enabled_by_config "${svc}"; then
+      out="${out} ${svc}"
+    elif [ "${verbose}" = "verbose" ]; then
+      echo "[skip] ${svc} disabled by config" >&2
+    fi
+  done
+
+  echo "${out# }"
+}
+
 reranker_instance_for_service() {
   local service="$1"
   case "${service}" in
@@ -334,7 +401,7 @@ monitor_services() {
   local fail_threshold="${MONITOR_FAIL_THRESHOLD:-3}"
   local restart_cooldown_sec="${MONITOR_RESTART_COOLDOWN_SEC:-30}"
   local max_restarts_per_hour="${MONITOR_MAX_RESTARTS_PER_HOUR:-6}"
-  local wechat_alert_py="${PROJECT_ROOT}/scripts/wechat_alert.py"
+  local wechat_alert_py="${PROJECT_ROOT}/scripts/ops/wechat_alert.py"
   require_positive_int "MONITOR_INTERVAL_SEC" "${interval_sec}"
   require_positive_int "MONITOR_FAIL_THRESHOLD" "${fail_threshold}"
@@ -468,6 +535,16 @@ stop_monitor_daemon() {
 start_monitor_daemon() {
   local targets="$1"
+  if [ -z "${targets}" ]; then
+    if is_monitor_daemon_running; then
+      echo "[info] no enabled services to monitor; stopping monitor daemon"
+      stop_monitor_daemon
+    else
+      echo "[info] no enabled services to monitor"
+    fi
+    return 0
+  fi
+
   local pf
   pf="$(monitor_pid_file)"
   local tf
@@ -581,6 +658,10 @@ wait_for_startup_health() {
 start_one() {
   local service="$1"
   cd "${PROJECT_ROOT}"
+  if ! service_enabled_by_config "${service}"; then
+    echo "[skip] ${service} disabled by config"
+    return 0
+  fi
   local cmd
   if ! cmd="$(service_start_cmd "${service}")"; then
     echo "[error] unknown service: ${service}" >&2
@@ -953,6 +1034,7 @@ main() {
   load_env_file "${PROJECT_ROOT}/.env"
   local targets=""
+  local effective_targets=""
   local monitor_was_running=0
   local monitor_prev_targets=""
   local auto_monitor_on_start="${SERVICE_CTL_AUTO_MONITOR_ON_START:-1}"
@@ -976,12 +1058,23 @@ main() {
       ;;
   esac
+  effective_targets="${targets}"
+  case "${action}" in
+    up|start|restart|monitor|monitor-start)
+      effective_targets="$(filter_disabled_targets "${targets}" "verbose")"
+      ;;
+  esac
+
   case "${action}" in
     up)
-      for svc in ${targets}; do
+      if [ -z "${effective_targets}" ]; then
+        echo "[info] no enabled services in target set"
+        exit 0
+      fi
+      for svc in ${effective_targets}; do
         start_one "${svc}"
       done
-      start_monitor_daemon "${targets}"
+      start_monitor_daemon "${effective_targets}"
       ;;
     down)
       stop_monitor_daemon
@@ -990,11 +1083,15 @@ main() {
       done
       ;;
     start)
-      for svc in ${targets}; do
+      if [ -z "${effective_targets}" ]; then
+        echo "[info] no enabled services in target set"
+        exit 0
+      fi
+      for svc in ${effective_targets}; do
         start_one "${svc}"
       done
       if [ "${auto_monitor_on_start}" = "1" ]; then
-        start_monitor_daemon "$(merge_targets "$(monitor_current_targets)" "${targets}")"
+        start_monitor_daemon "$(merge_targets "$(monitor_current_targets)" "${effective_targets}")"
       fi
       ;;
     stop)
@@ -1025,16 +1122,17 @@ main() {
       for svc in ${restart_stop_targets}; do
         stop_one "${svc}"
       done
-      for svc in ${targets}; do
+      for svc in ${effective_targets}; do
         start_one "${svc}"
       done
       if [ "${monitor_was_running}" -eq 1 ]; then
         monitor_prev_targets="$(normalize_targets "${monitor_prev_targets}")"
+        monitor_prev_targets="$(filter_disabled_targets "${monitor_prev_targets}" "quiet")"
         monitor_prev_targets="$(apply_target_order monitor "${monitor_prev_targets}")"
-        [ -z "${monitor_prev_targets}" ] && monitor_prev_targets="${targets}"
+        [ -z "${monitor_prev_targets}" ] && monitor_prev_targets="${effective_targets}"
         start_monitor_daemon "${monitor_prev_targets}"
       elif [ "${auto_monitor_on_start}" = "1" ]; then
-        start_monitor_daemon "$(merge_targets "$(monitor_current_targets)" "${targets}")"
+        start_monitor_daemon "$(merge_targets "$(monitor_current_targets)" "${effective_targets}")"
       fi
       ;;
     status)
@@ -1044,10 +1142,14 @@ main() {
       monitor_daemon_status
       ;;
     monitor)
-      monitor_services "${targets}"
+      if [ -z "${effective_targets}" ]; then
+        echo "[info] no enabled services in target set"
+        exit 0
+      fi
+      monitor_services "${effective_targets}"
       ;;
     monitor-start)
-      start_monitor_daemon "${targets}"
+      start_monitor_daemon "${effective_targets}"
       ;;
     monitor-stop)
       stop_monitor_daemon
@@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot;
 cd "${PROJECT_ROOT}"
 VENV_DIR="${PROJECT_ROOT}/.venv-translator"
-PYTHON_BIN="${PYTHON_BIN:-python3}"
 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}"
+MIN_PYTHON_MAJOR=3
+MIN_PYTHON_MINOR=10
+
+python_meets_minimum() {
+  local bin="$1"
+  "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}"
+import sys
+
+required = tuple(int(value) for value in sys.argv[1:])
+sys.exit(0 if sys.version_info[:2] >= required else 1)
+PY
+}
+
+discover_python_bin() {
+  local candidates=()
+
+  if [[ -n "${PYTHON_BIN:-}" ]]; then
+    candidates+=("${PYTHON_BIN}")
+  fi
+  candidates+=("python3.12" "python3.11" "python3.10" "python3")
+
+  local candidate
+  for candidate in "${candidates[@]}"; do
+    if ! command -v "${candidate}" >/dev/null 2>&1; then
+      continue
+    fi
+    if python_meets_minimum "${candidate}"; then
+      echo "${candidate}"
+      return 0
+    fi
+  done
+
+  return 1
+}
+
+if ! PYTHON_BIN="$(discover_python_bin)"; then
+  echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2
+  echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2
+  exit 1
+fi
 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
   echo "ERROR: python not found: ${PYTHON_BIN}" >&2
@@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot;
 export TMPDIR="${TMP_DIR}"
 PIP_ARGS=(--no-cache-dir)
+echo "Using Python=${PYTHON_BIN}"
 echo "Using TMPDIR=${TMPDIR}"
 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel
 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt
@@ -39,5 +79,5 @@ echo &quot;Using TMPDIR=${TMPDIR}&quot;
 echo
 echo "Done."
 echo "Translator venv: ${VENV_DIR}"
-echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local"
+echo "Download local models: ./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local"
 echo "Start service: ./scripts/start_translator.sh"
@@ -61,7 +61,7 @@ LOG_DIR=&quot;${PROJECT_ROOT}/logs&quot;
 PID_FILE="${LOG_DIR}/cnclip.pid"
 LOG_LINK="${LOG_DIR}/cnclip.log"
 LOG_FILE="${LOG_DIR}/cnclip-$(date +%F).log"
-LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/daily_log_router.sh"
+LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/ops/daily_log_router.sh"
 # 帮助信息
 show_help() {
@@ -27,4 +27,4 @@ echo -e &quot;  ${GREEN}http://localhost:${API_PORT}${NC}&quot;
 echo ""
 export FRONTEND_PORT API_PORT PORT
-exec python scripts/frontend_server.py
+exec python scripts/frontend/frontend_server.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+"""Download local translation models declared in services.translation.capabilities."""
+
+from __future__ import annotations
+
+import argparse
+import os
+from pathlib import Path
+import sys
+from typing import Iterable
+
+from huggingface_hub import snapshot_download
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
+
+from config.services_config import get_translation_config
+from translation.ct2_conversion import convert_transformers_model
+
+
+LOCAL_BACKENDS = {"local_nllb", "local_marian"}
+
+
+def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
+    cfg = get_translation_config()
+    capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
+    for name, capability in capabilities.items():
+        backend = str(capability.get("backend") or "").strip().lower()
+        if backend not in LOCAL_BACKENDS:
+            continue
+        if selected and name not in selected:
+            continue
+        yield name, capability
+
+
+def _compute_ct2_output_dir(capability: dict) -> Path:
+    custom = str(capability.get("ct2_model_dir") or "").strip()
+    if custom:
+        return Path(custom).expanduser()
+    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
+    compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
+    normalized = compute_type.replace("_", "-")
+    return model_dir / f"ctranslate2-{normalized}"
+
+
+def convert_to_ctranslate2(name: str, capability: dict) -> None:
+    model_id = str(capability.get("model_id") or "").strip()
+    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
+    model_source = str(model_dir if model_dir.exists() else model_id)
+    output_dir = _compute_ct2_output_dir(capability)
+    if (output_dir / "model.bin").exists():
+        print(f"[skip-convert] {name} -> {output_dir}")
+        return
+    quantization = str(
+        capability.get("ct2_conversion_quantization")
+        or capability.get("ct2_compute_type")
+        or capability.get("torch_dtype")
+        or "default"
+    ).strip()
+    output_dir.parent.mkdir(parents=True, exist_ok=True)
+    print(f"[convert] {name} -> {output_dir} ({quantization})")
+    convert_transformers_model(model_source, str(output_dir), quantization)
+    print(f"[converted] {name}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Download local translation models")
+    parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
+    parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
+    parser.add_argument(
+        "--convert-ctranslate2",
+        action="store_true",
+        help="Also convert the downloaded Hugging Face models into CTranslate2 format",
+    )
+    args = parser.parse_args()
+
+    selected = {item.strip().lower() for item in args.models if item.strip()} or None
+    if not args.all_local and not selected:
+        parser.error("pass --all-local or --models <name> ...")
+
+    for name, capability in iter_local_capabilities(selected):
+        model_id = str(capability.get("model_id") or "").strip()
+        model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
+        if not model_id or not model_dir:
+            raise ValueError(f"Capability '{name}' must define model_id and model_dir")
+        model_dir.parent.mkdir(parents=True, exist_ok=True)
+        print(f"[download] {name} -> {model_dir} ({model_id})")
+        snapshot_download(
+            repo_id=model_id,
+            local_dir=str(model_dir),
+        )
+        print(f"[done] {name}")
+        if args.convert_ctranslate2:
+            convert_to_ctranslate2(name, capability)
+
+
+if __name__ == "__main__":
+    main()
@@ -8,6 +8,7 @@ Simplified architecture:
 - function_score wrapper for boosting fields
 """
+from dataclasses import dataclass
 from typing import Dict, Any, List, Optional, Tuple
 import numpy as np
@@ -114,6 +115,171 @@ class ESQueryBuilder:
         self.phrase_match_tie_breaker = float(phrase_match_tie_breaker)
         self.phrase_match_boost = float(phrase_match_boost)
+    @dataclass(frozen=True)
+    class KNNClausePlan:
+        field: str
+        boost: float
+        k: Optional[int] = None
+        num_candidates: Optional[int] = None
+        nested_path: Optional[str] = None
+
+    @staticmethod
+    def _vector_to_list(vector: Any) -> List[float]:
+        if vector is None:
+            return []
+        if hasattr(vector, "tolist"):
+            values = vector.tolist()
+        else:
+            values = list(vector)
+        return [float(v) for v in values]
+
+    @staticmethod
+    def _query_token_count(parsed_query: Optional[Any]) -> int:
+        if parsed_query is None:
+            return 0
+        query_tokens = getattr(parsed_query, "query_tokens", None) or []
+        return len(query_tokens)
+
+    def get_text_knn_plan(self, parsed_query: Optional[Any] = None) -> Optional[KNNClausePlan]:
+        if not self.text_embedding_field:
+            return None
+        boost = self.knn_text_boost
+        final_knn_k = self.knn_text_k
+        final_knn_num_candidates = self.knn_text_num_candidates
+        if self._query_token_count(parsed_query) >= 5:
+            final_knn_k = self.knn_text_k_long
+            final_knn_num_candidates = self.knn_text_num_candidates_long
+            boost = self.knn_text_boost * 1.4
+        return self.KNNClausePlan(
+            field=str(self.text_embedding_field),
+            boost=float(boost),
+            k=int(final_knn_k),
+            num_candidates=int(final_knn_num_candidates),
+        )
+
+    def get_image_knn_plan(self) -> Optional[KNNClausePlan]:
+        if not self.image_embedding_field:
+            return None
+        nested_path, _, _ = str(self.image_embedding_field).rpartition(".")
+        return self.KNNClausePlan(
+            field=str(self.image_embedding_field),
+            boost=float(self.knn_image_boost),
+            k=int(self.knn_image_k),
+            num_candidates=int(self.knn_image_num_candidates),
+            nested_path=nested_path or None,
+        )
+
+    def build_text_knn_clause(
+        self,
+        query_vector: Any,
+        *,
+        parsed_query: Optional[Any] = None,
+        query_name: str = "knn_query",
+    ) -> Optional[Dict[str, Any]]:
+        plan = self.get_text_knn_plan(parsed_query)
+        if plan is None or query_vector is None:
+            return None
+        return {
+            "knn": {
+                "field": plan.field,
+                "query_vector": self._vector_to_list(query_vector),
+                "k": plan.k,
+                "num_candidates": plan.num_candidates,
+                "boost": plan.boost,
+                "_name": query_name,
+            }
+        }
+
+    def build_image_knn_clause(
+        self,
+        image_query_vector: Any,
+        *,
+        query_name: str = "image_knn_query",
+    ) -> Optional[Dict[str, Any]]:
+        plan = self.get_image_knn_plan()
+        if plan is None or image_query_vector is None:
+            return None
+        image_knn_query = {
+            "field": plan.field,
+            "query_vector": self._vector_to_list(image_query_vector),
+            "k": plan.k,
+            "num_candidates": plan.num_candidates,
+            "boost": plan.boost,
+        }
+        if plan.nested_path:
+            return {
+                "nested": {
+                    "path": plan.nested_path,
+                    "_name": query_name,
+                    "query": {"knn": image_knn_query},
+                    "score_mode": "max",
+                }
+            }
+        return {
+            "knn": {
+                **image_knn_query,
+                "_name": query_name,
+            }
+        }
+
+    def build_exact_text_knn_rescore_clause(
+        self,
+        query_vector: Any,
+        *,
+        parsed_query: Optional[Any] = None,
+        query_name: str = "exact_text_knn_query",
+    ) -> Optional[Dict[str, Any]]:
+        plan = self.get_text_knn_plan(parsed_query)
+        if plan is None or query_vector is None:
+            return None
+        return {
+            "script_score": {
+                "_name": query_name,
+                "query": {"exists": {"field": plan.field}},
+                "script": {
+                    "source": (
+                        f"((dotProduct(params.query_vector, '{plan.field}') + 1.0) / 2.0) * params.boost"
+                    ),
+                    "params": {
+                        "query_vector": self._vector_to_list(query_vector),
+                        "boost": float(plan.boost),
+                    },
+                },
+            }
+        }
+
+    def build_exact_image_knn_rescore_clause(
+        self,
+        image_query_vector: Any,
+        *,
+        query_name: str = "exact_image_knn_query",
+    ) -> Optional[Dict[str, Any]]:
+        plan = self.get_image_knn_plan()
+        if plan is None or image_query_vector is None:
+            return None
+        script_score_query = {
+            "query": {"exists": {"field": plan.field}},
+            "script": {
+                "source": (
+                    f"((dotProduct(params.query_vector, '{plan.field}') + 1.0) / 2.0) * params.boost"
+                ),
+                "params": {
+                    "query_vector": self._vector_to_list(image_query_vector),
+                    "boost": float(plan.boost),
+                },
+            },
+        }
+        if plan.nested_path:
+            return {
+                "nested": {
+                    "path": plan.nested_path,
+                    "_name": query_name,
+                    "score_mode": "max",
+                    "query": {"script_score": script_score_query},
+                }
+            }
+        return {"script_score": {"_name": query_name, **script_score_query}}
+
     def _apply_source_filter(self, es_query: Dict[str, Any]) -> None:
         """
         Apply tri-state _source semantics:
@@ -250,52 +416,21 @@ class ESQueryBuilder:
         # 3. Add KNN search clauses alongside lexical clauses under the same bool.should
         # Text KNN: k / num_candidates from config; long queries use *_long and higher boost
         if has_embedding:
-            text_knn_boost = self.knn_text_boost
-            final_knn_k = self.knn_text_k
-            final_knn_num_candidates = self.knn_text_num_candidates
-            if parsed_query:
-                query_tokens = getattr(parsed_query, 'query_tokens', None) or []
-                token_count = len(query_tokens)
-                if token_count >= 5:
-                    final_knn_k = self.knn_text_k_long
-                    final_knn_num_candidates = self.knn_text_num_candidates_long
-                    text_knn_boost = self.knn_text_boost * 1.4
-            recall_clauses.append({
-                "knn": {
-                    "field": self.text_embedding_field,
-                    "query_vector": query_vector.tolist(),
-                    "k": final_knn_k,
-                    "num_candidates": final_knn_num_candidates,
-                    "boost": text_knn_boost,
-                    "_name": "knn_query",
-                }
-            })
+            text_knn_clause = self.build_text_knn_clause(
+                query_vector,
+                parsed_query=parsed_query,
+                query_name="knn_query",
+            )
+            if text_knn_clause:
+                recall_clauses.append(text_knn_clause)
         if has_image_embedding:
-            nested_path, _, _ = str(self.image_embedding_field).rpartition(".")
-            image_knn_query = {
-                "field": self.image_embedding_field,
-                "query_vector": image_query_vector.tolist(),
-                "k": self.knn_image_k,
-                "num_candidates": self.knn_image_num_candidates,
-                "boost": self.knn_image_boost,
-            }
-            if nested_path:
-                recall_clauses.append({
-                    "nested": {
-                        "path": nested_path,
-                        "_name": "image_knn_query",
-                        "query": {"knn": image_knn_query},
-                        "score_mode": "max",
-                    }
-                })
-            else:
-                recall_clauses.append({
-                    "knn": {
-                        **image_knn_query,
-                        "_name": "image_knn_query",
-                    }
-                })
+            image_knn_clause = self.build_image_knn_clause(
+                image_query_vector,
+                query_name="image_knn_query",
+            )
+            if image_knn_clause:
+                recall_clauses.append(image_knn_clause)
         # 4. Build main query structure: filters and recall
         if recall_clauses:
@@ -153,12 +153,59 @@ def _extract_named_query_score(matched_queries: Any, name: str) -&gt; float:
     return 0.0
+def _resolve_named_query_score(
+    matched_queries: Any,
+    *,
+    preferred_names: List[str],
+    fallback_names: List[str],
+) -> Tuple[float, Optional[str], float, Optional[str]]:
+    preferred_score = 0.0
+    preferred_name: Optional[str] = None
+    for name in preferred_names:
+        score = _extract_named_query_score(matched_queries, name)
+        if score > 0.0:
+            preferred_score = score
+            preferred_name = name
+            break
+
+    fallback_score = 0.0
+    fallback_name: Optional[str] = None
+    for name in fallback_names:
+        score = _extract_named_query_score(matched_queries, name)
+        if score > 0.0:
+            fallback_score = score
+            fallback_name = name
+            break
+
+    if preferred_name is None and preferred_names:
+        preferred_name = preferred_names[0]
+        preferred_score = _extract_named_query_score(matched_queries, preferred_name)
+    if fallback_name is None and fallback_names:
+        fallback_name = fallback_names[0]
+        fallback_score = _extract_named_query_score(matched_queries, fallback_name)
+    if preferred_score > 0.0:
+        return preferred_score, preferred_name, fallback_score, fallback_name
+    return fallback_score, fallback_name, preferred_score, preferred_name
+
+
 def _collect_knn_score_components(
     matched_queries: Any,
     fusion: RerankFusionConfig,
 ) -> Dict[str, float]:
-    text_knn_score = _extract_named_query_score(matched_queries, "knn_query")
-    image_knn_score = _extract_named_query_score(matched_queries, "image_knn_query")
+    text_knn_score, text_knn_source, _, _ = _resolve_named_query_score(
+        matched_queries,
+        preferred_names=["exact_text_knn_query"],
+        fallback_names=["knn_query"],
+    )
+    image_knn_score, image_knn_source, _, _ = _resolve_named_query_score(
+        matched_queries,
+        preferred_names=["exact_image_knn_query"],
+        fallback_names=["image_knn_query"],
+    )
+    exact_text_knn_score = _extract_named_query_score(matched_queries, "exact_text_knn_query")
+    exact_image_knn_score = _extract_named_query_score(matched_queries, "exact_image_knn_query")
+    approx_text_knn_score = _extract_named_query_score(matched_queries, "knn_query")
+    approx_image_knn_score = _extract_named_query_score(matched_queries, "image_knn_query")
     weighted_text_knn_score = text_knn_score * float(fusion.knn_text_weight)
     weighted_image_knn_score = image_knn_score * float(fusion.knn_image_weight)
@@ -171,6 +218,14 @@ def _collect_knn_score_components(
     return {
         "text_knn_score": text_knn_score,
         "image_knn_score": image_knn_score,
+        "exact_text_knn_score": exact_text_knn_score,
+        "exact_image_knn_score": exact_image_knn_score,
+        "approx_text_knn_score": approx_text_knn_score,
+        "approx_image_knn_score": approx_image_knn_score,
+        "text_knn_source": text_knn_source,
+        "image_knn_source": image_knn_source,
+        "approx_text_knn_source": "knn_query",
+        "approx_image_knn_source": "image_knn_query",
         "weighted_text_knn_score": weighted_text_knn_score,
         "weighted_image_knn_score": weighted_image_knn_score,
         "primary_knn_score": primary_knn_score,
@@ -322,6 +377,10 @@ def _build_ltr_feature_block(
         "text_support_score": float(text_components["support_text_score"]),
         "text_knn_score": text_knn_score,
         "image_knn_score": image_knn_score,
+        "exact_text_knn_score": float(knn_components["exact_text_knn_score"]),
+        "exact_image_knn_score": float(knn_components["exact_image_knn_score"]),
+        "approx_text_knn_score": float(knn_components["approx_text_knn_score"]),
+        "approx_image_knn_score": float(knn_components["approx_image_knn_score"]),
         "knn_primary_score": float(knn_components["primary_knn_score"]),
         "knn_support_score": float(knn_components["support_knn_score"]),
         "has_text_match": source_score > 0.0,
@@ -337,12 +396,50 @@ def _build_ltr_feature_block(
     }
+def _maybe_append_weighted_knn_terms(
+    *,
+    term_rows: List[Dict[str, Any]],
+    fusion: CoarseRankFusionConfig | RerankFusionConfig,
+    knn_components: Optional[Dict[str, Any]],
+) -> None:
+    if not knn_components:
+        return
+
+    weighted_text_knn_score = _to_score(knn_components.get("weighted_text_knn_score"))
+    weighted_image_knn_score = _to_score(knn_components.get("weighted_image_knn_score"))
+
+    if float(getattr(fusion, "knn_text_exponent", 0.0)) != 0.0:
+        text_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias))
+        term_rows.append(
+            {
+                "name": "weighted_text_knn_score",
+                "raw_score": weighted_text_knn_score,
+                "bias": text_bias,
+                "exponent": float(fusion.knn_text_exponent),
+                "factor": (max(weighted_text_knn_score, 0.0) + text_bias) ** float(fusion.knn_text_exponent),
+            }
+        )
+    if float(getattr(fusion, "knn_image_exponent", 0.0)) != 0.0:
+        image_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias))
+        term_rows.append(
+            {
+                "name": "weighted_image_knn_score",
+                "raw_score": weighted_image_knn_score,
+                "bias": image_bias,
+                "exponent": float(fusion.knn_image_exponent),
+                "factor": (max(weighted_image_knn_score, 0.0) + image_bias)
+                ** float(fusion.knn_image_exponent),
+            }
+        )
+
+
 def _compute_multiplicative_fusion(
     *,
     es_score: float,
     text_score: float,
     knn_score: float,
     fusion: RerankFusionConfig,
+    knn_components: Optional[Dict[str, Any]] = None,
     rerank_score: Optional[float] = None,
     fine_score: Optional[float] = None,
     style_boost: float = 1.0,
@@ -368,6 +465,7 @@ def _compute_multiplicative_fusion(
     _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent)
     _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent)
     _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent)
+    _maybe_append_weighted_knn_terms(term_rows=term_rows, fusion=fusion, knn_components=knn_components)
     fused = 1.0
     factors: Dict[str, float] = {}
@@ -391,12 +489,30 @@ def _multiply_coarse_fusion_factors(
     es_score: float,
     text_score: float,
     knn_score: float,
+    knn_components: Dict[str, Any],
     fusion: CoarseRankFusionConfig,
-) -> Tuple[float, float, float, float]:
+) -> Tuple[float, float, float, float, float, float]:
     es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent
     text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
     knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
-    return es_factor, text_factor, knn_factor, es_factor * text_factor * knn_factor
+    text_knn_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias))
+    image_knn_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias))
+    text_knn_factor = (
+        (max(_to_score(knn_components.get("weighted_text_knn_score")), 0.0) + text_knn_bias)
+        ** float(getattr(fusion, "knn_text_exponent", 0.0))
+    )
+    image_knn_factor = (
+        (max(_to_score(knn_components.get("weighted_image_knn_score")), 0.0) + image_knn_bias)
+        ** float(getattr(fusion, "knn_image_exponent", 0.0))
+    )
+    return (
+        es_factor,
+        text_factor,
+        knn_factor,
+        text_knn_factor,
+        image_knn_factor,
+        es_factor * text_factor * knn_factor * text_knn_factor * image_knn_factor,
+    )
 def _has_selected_sku(hit: Dict[str, Any]) -> bool:
@@ -422,10 +538,18 @@ def coarse_resort_hits(
         knn_components = signal_bundle["knn_components"]
         text_score = signal_bundle["text_score"]
         knn_score = signal_bundle["knn_score"]
-        es_factor, text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors(
+        (
+            es_factor,
+            text_factor,
+            knn_factor,
+            text_knn_factor,
+            image_knn_factor,
+            coarse_score,
+        ) = _multiply_coarse_fusion_factors(
             es_score=es_score,
             text_score=text_score,
             knn_score=knn_score,
+            knn_components=knn_components,
             fusion=f,
         )
@@ -433,6 +557,8 @@ def coarse_resort_hits(
         hit["_knn_score"] = knn_score
         hit["_text_knn_score"] = knn_components["text_knn_score"]
         hit["_image_knn_score"] = knn_components["image_knn_score"]
+        hit["_exact_text_knn_score"] = knn_components["exact_text_knn_score"]
+        hit["_exact_image_knn_score"] = knn_components["exact_image_knn_score"]
         hit["_coarse_score"] = coarse_score
         if debug:
@@ -460,6 +586,12 @@ def coarse_resort_hits(
                     ),
                     "text_knn_score": knn_components["text_knn_score"],
                     "image_knn_score": knn_components["image_knn_score"],
+                    "exact_text_knn_score": knn_components["exact_text_knn_score"],
+                    "exact_image_knn_score": knn_components["exact_image_knn_score"],
+                    "approx_text_knn_score": knn_components["approx_text_knn_score"],
+                    "approx_image_knn_score": knn_components["approx_image_knn_score"],
+                    "text_knn_source": knn_components["text_knn_source"],
+                    "image_knn_source": knn_components["image_knn_source"],
                     "weighted_text_knn_score": knn_components["weighted_text_knn_score"],
                     "weighted_image_knn_score": knn_components["weighted_image_knn_score"],
                     "knn_primary_score": knn_components["primary_knn_score"],
@@ -468,6 +600,8 @@ def coarse_resort_hits(
                     "coarse_es_factor": es_factor,
                     "coarse_text_factor": text_factor,
                     "coarse_knn_factor": knn_factor,
+                    "coarse_text_knn_factor": text_knn_factor,
+                    "coarse_image_knn_factor": image_knn_factor,
                     "coarse_score": coarse_score,
                     "matched_queries": matched_queries,
                     "ltr_features": ltr_features,
@@ -509,7 +643,7 @@ def fuse_scores_and_resort(
     - _rerank_score: 重排服务返回的分数
     - _fused_score: 融合分数
     - _text_score: 文本相关性分数（优先取 named queries 的 base_query 分数）
-    - _knn_score: KNN 分数（优先取 named queries 的 knn_query 分数）
+    - _knn_score: KNN 分数（优先取 exact named queries，缺失时回退 ANN named queries）
     Args:
         es_hits: ES hits 列表（会被原地修改）
@@ -545,6 +679,7 @@ def fuse_scores_and_resort(
             text_score=text_score,
             knn_score=knn_score,
             fusion=f,
+            knn_components=knn_components,
             style_boost=style_boost,
         )
         fused = fusion_result["score"]
@@ -557,6 +692,8 @@ def fuse_scores_and_resort(
         hit["_knn_score"] = knn_score
         hit["_text_knn_score"] = knn_components["text_knn_score"]
         hit["_image_knn_score"] = knn_components["image_knn_score"]
+        hit["_exact_text_knn_score"] = knn_components["exact_text_knn_score"]
+        hit["_exact_image_knn_score"] = knn_components["exact_image_knn_score"]
         hit["_fused_score"] = fused
         hit["_style_intent_selected_sku_boost"] = style_boost
@@ -589,6 +726,12 @@ def fuse_scores_and_resort(
                 "text_support_score": text_components["support_text_score"],
                 "text_knn_score": knn_components["text_knn_score"],
                 "image_knn_score": knn_components["image_knn_score"],
+                "exact_text_knn_score": knn_components["exact_text_knn_score"],
+                "exact_image_knn_score": knn_components["exact_image_knn_score"],
+                "approx_text_knn_score": knn_components["approx_text_knn_score"],
+                "approx_image_knn_score": knn_components["approx_image_knn_score"],
+                "text_knn_source": knn_components["text_knn_source"],
+                "image_knn_source": knn_components["image_knn_source"],
                 "weighted_text_knn_score": knn_components["weighted_text_knn_score"],
                 "weighted_image_knn_score": knn_components["weighted_image_knn_score"],
                 "knn_primary_score": knn_components["primary_knn_score"],
@@ -603,6 +746,8 @@ def fuse_scores_and_resort(
                 "es_factor": fusion_result["factors"].get("es_score"),
                 "text_factor": fusion_result["factors"].get("text_score"),
                 "knn_factor": fusion_result["factors"].get("knn_score"),
+                "text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"),
+                "image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"),
                 "style_intent_selected_sku": sku_selected,
                 "style_intent_selected_sku_boost": style_boost,
                 "matched_queries": signal_bundle["matched_queries"],
@@ -735,6 +880,7 @@ def run_lightweight_rerank(
             text_score=text_score,
             knn_score=knn_score,
             fusion=f,
+            knn_components=signal_bundle["knn_components"],
             style_boost=style_boost,
         )
@@ -744,6 +890,8 @@ def run_lightweight_rerank(
         hit["_knn_score"] = knn_score
         hit["_text_knn_score"] = signal_bundle["knn_components"]["text_knn_score"]
         hit["_image_knn_score"] = signal_bundle["knn_components"]["image_knn_score"]
+        hit["_exact_text_knn_score"] = signal_bundle["knn_components"]["exact_text_knn_score"]
+        hit["_exact_image_knn_score"] = signal_bundle["knn_components"]["exact_image_knn_score"]
         hit["_style_intent_selected_sku_boost"] = style_boost
         if debug:
@@ -769,6 +917,8 @@ def run_lightweight_rerank(
                 "es_factor": fusion_result["factors"].get("es_score"),
                 "text_factor": fusion_result["factors"].get("text_score"),
                 "knn_factor": fusion_result["factors"].get("knn_score"),
+                "text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"),
+                "image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"),
                 "style_intent_selected_sku": sku_selected,
                 "style_intent_selected_sku_boost": style_boost,
                 "ltr_features": ltr_features,
@@ -236,6 +236,81 @@ class Searcher:
             return
         es_query["_source"] = {"includes": self.source_fields}
+    def _resolve_exact_knn_rescore_window(self) -> int:
+        configured = int(self.config.rerank.exact_knn_rescore_window)
+        if configured > 0:
+            return configured
+        return int(self.config.rerank.rerank_window)
+
+    def _build_exact_knn_rescore(
+        self,
+        *,
+        query_vector: Any,
+        image_query_vector: Any,
+        parsed_query: Optional[ParsedQuery] = None,
+    ) -> Optional[Dict[str, Any]]:
+        clauses: List[Dict[str, Any]] = []
+
+        text_clause = self.query_builder.build_exact_text_knn_rescore_clause(
+            query_vector,
+            parsed_query=parsed_query,
+            query_name="exact_text_knn_query",
+        )
+        if text_clause:
+            clauses.append(text_clause)
+
+        image_clause = self.query_builder.build_exact_image_knn_rescore_clause(
+            image_query_vector,
+            query_name="exact_image_knn_query",
+        )
+        if image_clause:
+            clauses.append(image_clause)
+
+        if not clauses:
+            return None
+
+        return {
+            "window_size": self._resolve_exact_knn_rescore_window(),
+            "query": {
+                # Phase 1: only compute exact vector scores and expose them in matched_queries.
+                "score_mode": "total",
+                "query_weight": 1.0,
+                "rescore_query_weight": 0.0,
+                "rescore_query": {
+                    "bool": {
+                        "should": clauses,
+                        "minimum_should_match": 1,
+                    }
+                },
+            },
+        }
+
+    def _attach_exact_knn_rescore(
+        self,
+        es_query: Dict[str, Any],
+        *,
+        in_rank_window: bool,
+        query_vector: Any,
+        image_query_vector: Any,
+        parsed_query: Optional[ParsedQuery] = None,
+    ) -> None:
+        if not in_rank_window or not self.config.rerank.exact_knn_rescore_enabled:
+            return
+        rescore = self._build_exact_knn_rescore(
+            query_vector=query_vector,
+            image_query_vector=image_query_vector,
+            parsed_query=parsed_query,
+        )
+        if not rescore:
+            return
+        existing = es_query.get("rescore")
+        if existing is None:
+            es_query["rescore"] = rescore
+        elif isinstance(existing, list):
+            es_query["rescore"] = [*existing, rescore]
+        else:
+            es_query["rescore"] = [existing, rescore]
+
     def _resolve_rerank_source_filter(
         self,
         doc_template: str,
@@ -401,7 +476,9 @@ class Searcher:
             language: Response / field selection language hint (e.g. zh, en)
             sku_filter_dimension: SKU grouping dimensions for per-SPU variant pick
             enable_rerank: If None, use ``config.rerank.enabled``; if set, overrides
-                whether the rerank provider is invoked (subject to rerank window).
+                whether the final rerank provider is invoked (subject to rank window).
+                When false, the ranking pipeline still runs and rerank stage becomes
+                pass-through.
             rerank_query_template: Override for rerank query text template; None uses
                 ``config.rerank.rerank_query_template`` (e.g. ``"{query}"``).
             rerank_doc_template: Override for per-hit document text passed to rerank;
@@ -430,15 +507,16 @@ class Searcher:
         # 重排开关优先级：请求参数显式传值 > 服务端配置（默认开启）
         rerank_enabled_by_config = bool(rc.enabled)
         do_rerank = rerank_enabled_by_config if enable_rerank is None else bool(enable_rerank)
+        fine_enabled = bool(fine_cfg.enabled)
         rerank_window = rc.rerank_window
         coarse_input_window = max(rerank_window, int(coarse_cfg.input_window))
         coarse_output_window = max(rerank_window, int(coarse_cfg.output_window))
         fine_input_window = max(rerank_window, int(fine_cfg.input_window))
         fine_output_window = max(rerank_window, int(fine_cfg.output_window))
-        # 若开启重排且请求范围在窗口内：从 ES 取前 rerank_window 条、重排后再按 from/size 分页；否则不重排，按原 from/size 查 ES
-        in_rerank_window = do_rerank and (from_ + size) <= rerank_window
-        es_fetch_from = 0 if in_rerank_window else from_
-        es_fetch_size = coarse_input_window if in_rerank_window else size
+        # 多阶段排序窗口独立于最终 rerank 开关：即使关闭最终 rerank，也保留 coarse/fine 流程。
+        in_rank_window = (from_ + size) <= rerank_window
+        es_fetch_from = 0 if in_rank_window else from_
+        es_fetch_size = coarse_input_window if in_rank_window else size
         es_score_normalization_factor: Optional[float] = None
         initial_ranks_by_doc: Dict[str, int] = {}
@@ -455,7 +533,8 @@ class Searcher:
         context.logger.info(
             f"开始搜索请求 | 查询: '{query}' | 参数: size={size}, from_={from_}, "
             f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, "
-            f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, "
+            f"fine_enabled(config)={fine_enabled}, "
+            f"enable_rerank(effective)={do_rerank}, in_rank_window={in_rank_window}, "
             f"es_fetch=({es_fetch_from},{es_fetch_size}) | "
             f"index_languages={index_langs} | "
             f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}",
@@ -468,8 +547,9 @@ class Searcher:
             'from_': from_,
             'es_fetch_from': es_fetch_from,
             'es_fetch_size': es_fetch_size,
-            'in_rerank_window': in_rerank_window,
+            'in_rank_window': in_rank_window,
             'rerank_enabled_by_config': rerank_enabled_by_config,
+            'fine_enabled': fine_enabled,
             'enable_rerank_request': enable_rerank,
             'rerank_query_template': effective_query_template,
             'rerank_doc_template': effective_doc_template,
@@ -494,6 +574,7 @@ class Searcher:
         context.metadata['feature_flags'] = {
             'translation_enabled': enable_translation,
             'embedding_enabled': enable_embedding,
+            'fine_enabled': fine_enabled,
             'rerank_enabled': do_rerank,
             'style_intent_enabled': bool(self.style_intent_registry.enabled),
         }
@@ -526,7 +607,7 @@ class Searcher:
                 f"语言: {parsed_query.detected_language} | "
                 f"关键词: {parsed_query.keywords_queries} | "
                 f"文本向量: {'是' if parsed_query.query_vector is not None else '否'} | "
-                f"图片向量: {'是' if getattr(parsed_query, 'image_query_vector', None) is not None else '否'}",
+                f"图片向量: {'是' if parsed_query.image_query_vector is not None else '否'}",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         except Exception as e:
@@ -545,17 +626,16 @@ class Searcher:
             # Generate tenant-specific index name
             index_name = get_tenant_index_name(tenant_id)
             # index_name = "search_products"
-            
+
             # No longer need to add tenant_id to filters since each tenant has its own index
+            image_query_vector = None
+            if enable_embedding:
+                image_query_vector = parsed_query.image_query_vector
             es_query = self.query_builder.build_query(
                 query_text=parsed_query.rewritten_query or parsed_query.query_normalized,
                 query_vector=parsed_query.query_vector if enable_embedding else None,
-                image_query_vector=(
-                    getattr(parsed_query, "image_query_vector", None)
-                    if enable_embedding
-                    else None
-                ),
+                image_query_vector=image_query_vector,
                 filters=filters,
                 range_filters=range_filters,
                 facet_configs=facets,
@@ -563,11 +643,18 @@ class Searcher:
                 from_=es_fetch_from,
                 enable_knn=enable_embedding and (
                     parsed_query.query_vector is not None
-                    or getattr(parsed_query, "image_query_vector", None) is not None
+                    or image_query_vector is not None
                 ),
                 min_score=min_score,
                 parsed_query=parsed_query,
             )
+            self._attach_exact_knn_rescore(
+                es_query,
+                in_rank_window=in_rank_window,
+                query_vector=parsed_query.query_vector if enable_embedding else None,
+                image_query_vector=image_query_vector,
+                parsed_query=parsed_query,
+            )
             # Add facets for faceted search
             if facets:
@@ -587,8 +674,7 @@ class Searcher:
             # In multi-stage rank window, first pass only needs score signals for coarse rank.
             es_query_for_fetch = es_query
-            rerank_prefetch_source = None
-            if in_rerank_window:
+            if in_rank_window:
                 es_query_for_fetch = dict(es_query)
                 es_query_for_fetch["_source"] = False
@@ -597,31 +683,28 @@ class Searcher:
             # Store ES query in context
             context.store_intermediate_result('es_query', es_query)
-            if in_rerank_window and rerank_prefetch_source is not None:
-                context.store_intermediate_result('es_query_rerank_prefetch_source', rerank_prefetch_source)
             # Serialize ES query to compute a compact size + stable digest for correlation
             es_query_compact = json.dumps(es_query_for_fetch, ensure_ascii=False, separators=(",", ":"))
             es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16]
             knn_enabled = bool(enable_embedding and (
                 parsed_query.query_vector is not None
-                or getattr(parsed_query, "image_query_vector", None) is not None
+                or image_query_vector is not None
             ))
             vector_dims = int(len(parsed_query.query_vector)) if parsed_query.query_vector is not None else 0
             image_vector_dims = (
-                int(len(parsed_query.image_query_vector))
-                if getattr(parsed_query, "image_query_vector", None) is not None
+                int(len(image_query_vector))
+                if image_query_vector is not None
                 else 0
             )
             context.logger.info(
-                "ES query built | size: %s chars | digest: %s | KNN: %s | vector_dims: %s | image_vector_dims: %s | facets: %s | rerank_prefetch_source: %s",
+                "ES query built | size: %s chars | digest: %s | KNN: %s | vector_dims: %s | image_vector_dims: %s | facets: %s",
                 len(es_query_compact),
                 es_query_digest,
                 "yes" if knn_enabled else "no",
                 vector_dims,
                 image_vector_dims,
                 "yes" if facets else "no",
-                rerank_prefetch_source,
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
             _log_backend_verbose({
@@ -656,7 +739,7 @@ class Searcher:
                 body=body_for_es,
                 size=es_fetch_size,
                 from_=es_fetch_from,
-                include_named_queries_score=bool(do_rerank and in_rerank_window),
+                include_named_queries_score=bool(in_rank_window),
             )
             # Store ES response in context
@@ -698,10 +781,177 @@ class Searcher:
             context.end_stage(RequestContextStage.ELASTICSEARCH_SEARCH_PRIMARY)
         style_intent_decisions: Dict[str, SkuSelectionDecision] = {}
-        if do_rerank and in_rerank_window:
+        if in_rank_window:
             from dataclasses import asdict
             from config.services_config import get_rerank_backend_config, get_rerank_service_url
             from .rerank_client import coarse_resort_hits, run_lightweight_rerank, run_rerank
+            coarse_fusion_debug = asdict(coarse_cfg.fusion)
+            stage_fusion_debug = asdict(rc.fusion)
+
+            def _rank_map(stage_hits: List[Dict[str, Any]]) -> Dict[str, int]:
+                return {
+                    str(hit.get("_id")): rank
+                    for rank, hit in enumerate(stage_hits, 1)
+                    if hit.get("_id") is not None
+                }
+
+            def _stage_debug_info(
+                *,
+                enabled: bool,
+                applied: bool,
+                skipped_reason: Optional[str],
+                service_profile: Optional[str],
+                query_template: str,
+                doc_template: str,
+                docs_in: int,
+                docs_out: int,
+                top_n: int,
+                meta: Optional[Dict[str, Any]] = None,
+                backend: Optional[str] = None,
+                backend_model_name: Optional[str] = None,
+                service_url: Optional[str] = None,
+                model: Optional[str] = None,
+                fusion: Optional[Dict[str, Any]] = None,
+            ) -> Dict[str, Any]:
+                return {
+                    "enabled": enabled,
+                    "applied": applied,
+                    "passthrough": not applied,
+                    "skipped_reason": skipped_reason,
+                    "service_profile": service_profile,
+                    "service_url": service_url,
+                    "backend": backend,
+                    "model": model,
+                    "backend_model_name": backend_model_name,
+                    "query_template": query_template,
+                    "doc_template": doc_template,
+                    "query_text": str(query_template).format_map({"query": rerank_query}),
+                    "docs_in": docs_in,
+                    "docs_out": docs_out,
+                    "top_n": top_n,
+                    "meta": meta,
+                    "fusion": fusion,
+                }
+
+            def _run_optional_stage(
+                *,
+                stage: RequestContextStage,
+                stage_label: str,
+                enabled: bool,
+                stage_hits: List[Dict[str, Any]],
+                input_limit: int,
+                output_limit: int,
+                service_profile: Optional[str],
+                query_template: str,
+                doc_template: str,
+                top_n: int,
+                debug_key: Optional[str],
+                runner,
+            ) -> tuple[List[Dict[str, Any]], Dict[str, int], Optional[Dict[str, Any]]]:
+                context.start_stage(stage)
+                try:
+                    input_hits = list(stage_hits[:input_limit])
+                    output_hits = list(stage_hits[:output_limit])
+                    applied = False
+                    skip_reason: Optional[str] = None
+                    meta: Optional[Dict[str, Any]] = None
+                    debug_rows: Optional[List[Dict[str, Any]]] = None
+
+                    if enabled and input_hits:
+                        output_hits_candidate, applied, meta, debug_rows = runner(input_hits)
+                        if applied:
+                            output_hits = list((output_hits_candidate or input_hits)[:output_limit])
+                        else:
+                            skip_reason = "service_returned_none"
+                    else:
+                        skip_reason = "disabled" if not enabled else "no_hits"
+
+                    ranks = _rank_map(output_hits) if debug else {}
+                    stage_info = None
+                    if debug:
+                        if applied:
+                            backend_name, backend_cfg = get_rerank_backend_config(service_profile)
+                            stage_info = _stage_debug_info(
+                                enabled=True,
+                                applied=True,
+                                skipped_reason=None,
+                                service_profile=service_profile,
+                                service_url=get_rerank_service_url(profile=service_profile),
+                                backend=backend_name,
+                                backend_model_name=backend_cfg.get("model_name"),
+                                model=meta.get("model") if isinstance(meta, dict) else None,
+                                query_template=query_template,
+                                doc_template=doc_template,
+                                docs_in=len(input_hits),
+                                docs_out=len(output_hits),
+                                top_n=top_n,
+                                meta=meta,
+                                fusion=stage_fusion_debug,
+                            )
+                            if debug_key is not None and debug_rows is not None:
+                                context.store_intermediate_result(debug_key, debug_rows)
+                        else:
+                            stage_info = _stage_debug_info(
+                                enabled=enabled,
+                                applied=False,
+                                skipped_reason=skip_reason,
+                                service_profile=service_profile,
+                                query_template=query_template,
+                                doc_template=doc_template,
+                                docs_in=len(input_hits),
+                                docs_out=len(output_hits),
+                                top_n=top_n,
+                                fusion=stage_fusion_debug,
+                            )
+
+                    if applied:
+                        context.logger.info(
+                            "%s完成 | docs=%s | top_n=%s | meta=%s",
+                            stage_label,
+                            len(output_hits),
+                            top_n,
+                            meta,
+                            extra={'reqid': context.reqid, 'uid': context.uid}
+                        )
+                    else:
+                        context.logger.info(
+                            "%s透传 | reason=%s | docs=%s | top_n=%s",
+                            stage_label,
+                            skip_reason,
+                            len(output_hits),
+                            top_n,
+                            extra={'reqid': context.reqid, 'uid': context.uid}
+                        )
+                    return output_hits, ranks, stage_info
+                except Exception as e:
+                    output_hits = list(stage_hits[:output_limit])
+                    ranks = _rank_map(output_hits) if debug else {}
+                    stage_info = None
+                    if debug:
+                        stage_info = _stage_debug_info(
+                            enabled=enabled,
+                            applied=False,
+                            skipped_reason="error",
+                            service_profile=service_profile,
+                            query_template=query_template,
+                            doc_template=doc_template,
+                            docs_in=min(len(stage_hits), input_limit),
+                            docs_out=len(output_hits),
+                            top_n=top_n,
+                            meta={"error": str(e)},
+                            fusion=stage_fusion_debug,
+                        )
+                    context.add_warning(f"{stage_label} failed: {e}")
+                    context.logger.warning(
+                        "调用%s服务失败 | error: %s",
+                        stage_label,
+                        e,
+                        extra={'reqid': context.reqid, 'uid': context.uid},
+                        exc_info=True,
+                    )
+                    return output_hits, ranks, stage_info
+                finally:
+                    context.end_stage(stage)
             rerank_query = parsed_query.text_for_rerank() if parsed_query else query
             hits = es_response.get("hits", {}).get("hits") or []
@@ -716,17 +966,12 @@ class Searcher:
                 hits = hits[:coarse_output_window]
                 es_response.setdefault("hits", {})["hits"] = hits
                 if debug:
-                    coarse_ranks_by_doc = {
-                        str(hit.get("_id")): rank
-                        for rank, hit in enumerate(hits, 1)
-                        if hit.get("_id") is not None
+                    coarse_ranks_by_doc = _rank_map(hits)
+                    coarse_debug_info = {
+                        "docs_in": es_fetch_size,
+                        "docs_out": len(hits),
+                        "fusion": coarse_fusion_debug,
                     }
-                    if debug:
-                        coarse_debug_info = {
-                            "docs_in": es_fetch_size,
-                            "docs_out": len(hits),
-                            "fusion": asdict(coarse_cfg.fusion),
-                        }
                     context.store_intermediate_result("coarse_rank_scores", coarse_debug)
                 context.logger.info(
                     "粗排完成 | docs_in=%s | docs_out=%s",
@@ -777,72 +1022,42 @@ class Searcher:
                         extra={'reqid': context.reqid, 'uid': context.uid}
                     )
-            fine_scores: Optional[List[float]] = None
-            hits = es_response.get("hits", {}).get("hits") or []
-            if fine_cfg.enabled and hits:
-                context.start_stage(RequestContextStage.FINE_RANKING)
-                try:
-                    fine_scores, fine_meta, fine_debug_rows = run_lightweight_rerank(
-                        query=rerank_query,
-                        es_hits=hits[:fine_input_window],
-                        language=language,
-                        timeout_sec=fine_cfg.timeout_sec,
-                        rerank_query_template=fine_query_template,
-                        rerank_doc_template=fine_doc_template,
-                        top_n=fine_output_window,
-                        debug=debug,
-                        fusion=rc.fusion,
-                        style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
-                        service_profile=fine_cfg.service_profile,
-                    )
-                    if fine_scores is not None:
-                        hits = hits[:fine_output_window]
-                        es_response["hits"]["hits"] = hits
-                        if debug:
-                            fine_ranks_by_doc = {
-                                str(hit.get("_id")): rank
-                                for rank, hit in enumerate(hits, 1)
-                                if hit.get("_id") is not None
-                            }
-                            fine_backend_name, fine_backend_cfg = get_rerank_backend_config(fine_cfg.service_profile)
-                            fine_debug_info = {
-                                "service_profile": fine_cfg.service_profile,
-                                "service_url": get_rerank_service_url(profile=fine_cfg.service_profile),
-                                "backend": fine_backend_name,
-                                "model": fine_meta.get("model") if isinstance(fine_meta, dict) else None,
-                                "backend_model_name": fine_backend_cfg.get("model_name"),
-                                "query_template": fine_query_template,
-                                "doc_template": fine_doc_template,
-                                "query_text": str(fine_query_template).format_map({"query": rerank_query}),
-                                "docs_in": min(len(fine_scores), fine_input_window),
-                                "docs_out": len(hits),
-                                "top_n": fine_output_window,
-                                "meta": fine_meta,
-                                "fusion": asdict(rc.fusion),
-                            }
-                            context.store_intermediate_result("fine_rank_scores", fine_debug_rows)
-                        context.logger.info(
-                            "精排完成 | docs=%s | top_n=%s | meta=%s",
-                            len(hits),
-                            fine_output_window,
-                            fine_meta,
-                            extra={'reqid': context.reqid, 'uid': context.uid}
-                        )
-                except Exception as e:
-                    context.add_warning(f"Fine rerank failed: {e}")
-                    context.logger.warning(
-                        f"调用精排服务失败 | error: {e}",
-                        extra={'reqid': context.reqid, 'uid': context.uid},
-                        exc_info=True,
-                    )
-                finally:
-                    context.end_stage(RequestContextStage.FINE_RANKING)
+            def _run_fine_stage(stage_input: List[Dict[str, Any]]):
+                fine_scores, fine_meta, fine_debug_rows = run_lightweight_rerank(
+                    query=rerank_query,
+                    es_hits=stage_input,
+                    language=language,
+                    timeout_sec=fine_cfg.timeout_sec,
+                    rerank_query_template=fine_query_template,
+                    rerank_doc_template=fine_doc_template,
+                    top_n=fine_output_window,
+                    debug=debug,
+                    fusion=rc.fusion,
+                    style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
+                    service_profile=fine_cfg.service_profile,
+                )
+                return stage_input, fine_scores is not None, fine_meta, fine_debug_rows
+
+            hits, fine_ranks_by_doc, fine_debug_info = _run_optional_stage(
+                stage=RequestContextStage.FINE_RANKING,
+                stage_label="精排",
+                enabled=fine_enabled,
+                stage_hits=es_response.get("hits", {}).get("hits") or [],
+                input_limit=fine_input_window,
+                output_limit=fine_output_window,
+                service_profile=fine_cfg.service_profile,
+                query_template=fine_query_template,
+                doc_template=fine_doc_template,
+                top_n=fine_output_window,
+                debug_key="fine_rank_scores",
+                runner=_run_fine_stage,
+            )
+            es_response["hits"]["hits"] = hits
-            context.start_stage(RequestContextStage.RERANKING)
-            try:
-                final_hits = es_response.get("hits", {}).get("hits") or []
-                final_input = final_hits[:rerank_window]
-                es_response["hits"]["hits"] = final_input
+            def _run_rerank_stage(stage_input: List[Dict[str, Any]]):
+                nonlocal es_response
+
+                es_response["hits"]["hits"] = stage_input
                 es_response, rerank_meta, fused_debug = run_rerank(
                     query=rerank_query,
                     es_response=es_response,
@@ -858,48 +1073,31 @@ class Searcher:
                     service_profile=rc.service_profile,
                     style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
                 )
-
-                if rerank_meta is not None:
-                    if debug:
-                        rerank_ranks_by_doc = {
-                            str(hit.get("_id")): rank
-                            for rank, hit in enumerate(es_response.get("hits", {}).get("hits") or [], 1)
-                            if hit.get("_id") is not None
-                        }
-                        rerank_backend_name, rerank_backend_cfg = get_rerank_backend_config(rc.service_profile)
-                        rerank_debug_info = {
-                            "service_profile": rc.service_profile,
-                            "service_url": get_rerank_service_url(profile=rc.service_profile),
-                            "backend": rerank_backend_name,
-                            "model": rerank_meta.get("model") if isinstance(rerank_meta, dict) else None,
-                            "backend_model_name": rerank_backend_cfg.get("model_name"),
-                            "query_template": effective_query_template,
-                            "doc_template": effective_doc_template,
-                            "query_text": str(effective_query_template).format_map({"query": rerank_query}),
-                            "docs_in": len(final_input),
-                            "docs_out": len(es_response.get("hits", {}).get("hits") or []),
-                            "top_n": from_ + size,
-                            "meta": rerank_meta,
-                            "fusion": asdict(rc.fusion),
-                        }
-                        context.store_intermediate_result("rerank_scores", fused_debug)
-                    context.logger.info(
-                        f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | "
-                        f"top_n={from_ + size} | meta={rerank_meta}",
-                        extra={'reqid': context.reqid, 'uid': context.uid}
-                    )
-            except Exception as e:
-                context.add_warning(f"Rerank failed: {e}")
-                context.logger.warning(
-                    f"调用重排服务失败 | error: {e}",
-                    extra={'reqid': context.reqid, 'uid': context.uid},
-                    exc_info=True,
+                return (
+                    es_response.get("hits", {}).get("hits") or [],
+                    rerank_meta is not None,
+                    rerank_meta,
+                    fused_debug,
                 )
-            finally:
-                context.end_stage(RequestContextStage.RERANKING)
-        # 当本次请求在重排窗口内时：已按多阶段排序产出前 rerank_window 条，需按请求的 from/size 做分页切片
-        if in_rerank_window:
+            hits, rerank_ranks_by_doc, rerank_debug_info = _run_optional_stage(
+                stage=RequestContextStage.RERANKING,
+                stage_label="重排",
+                enabled=do_rerank,
+                stage_hits=es_response.get("hits", {}).get("hits") or [],
+                input_limit=rerank_window,
+                output_limit=rerank_window,
+                service_profile=rc.service_profile,
+                query_template=effective_query_template,
+                doc_template=effective_doc_template,
+                top_n=from_ + size,
+                debug_key="rerank_scores",
+                runner=_run_rerank_stage,
+            )
+            es_response["hits"]["hits"] = hits
+
+        # 当本次请求在排序窗口内时：已按多阶段排序产出前 rerank_window 条，需按请求的 from/size 做分页切片
+        if in_rank_window:
             hits = es_response.get("hits", {}).get("hits") or []
             sliced = hits[from_ : from_ + size]
             es_response.setdefault("hits", {})["hits"] = sliced
@@ -961,12 +1159,12 @@ class Searcher:
                         context.end_stage(RequestContextStage.ELASTICSEARCH_PAGE_FILL)
             context.logger.info(
-                f"重排分页切片 | from={from_}, size={size}, 返回={len(sliced)}条",
+                f"排序窗口分页切片 | from={from_}, size={size}, 返回={len(sliced)}条",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         # 非重排窗口：款式意图在 result_processing 之前执行，便于单独计时且与 ES 召回阶段衔接
-        if self._has_style_intent(parsed_query) and not in_rerank_window:
+        if self._has_style_intent(parsed_query) and not in_rank_window:
             es_hits_pre = es_response.get("hits", {}).get("hits") or []
             style_intent_decisions = self._apply_style_intent_to_hits(
                 es_hits_pre,
@@ -1259,7 +1457,7 @@ class Searcher:
         # Collect debug information if requested
         debug_info = None
         if debug:
-            query_tokens = getattr(parsed_query, "query_tokens", []) if parsed_query else []
+            query_tokens = parsed_query.query_tokens if parsed_query else []
             token_count = len(query_tokens)
             text_knn_is_long = token_count >= 5
             text_knn_k = self.query_builder.knn_text_k_long if text_knn_is_long else self.query_builder.knn_text_k
@@ -1279,7 +1477,7 @@ class Searcher:
                     "translations": context.query_analysis.translations,
                     "keywords_queries": context.query_analysis.keywords_queries,
                     "has_vector": context.query_analysis.query_vector is not None,
-                    "has_image_vector": getattr(parsed_query, "image_query_vector", None) is not None,
+                    "has_image_vector": parsed_query.image_query_vector is not None,
                     "query_tokens": query_tokens,
                     "intent_detection": context.get_intermediate_result("style_intent_profile"),
                 },
@@ -1298,9 +1496,10 @@ class Searcher:
                     },
                     "image_knn": {
                         "enabled": bool(
-                            enable_embedding
+                            self.image_embedding_field
+                            and enable_embedding
                             and parsed_query
-                            and getattr(parsed_query, "image_query_vector", None) is not None
+                            and image_query_vector is not None
                         ),
                         "k": self.query_builder.knn_image_k,
                         "num_candidates": self.query_builder.knn_image_num_candidates,
@@ -1311,9 +1510,14 @@ class Searcher:
                 "es_query_context": {
                     "es_fetch_from": es_fetch_from,
                     "es_fetch_size": es_fetch_size,
-                    "in_rerank_window": in_rerank_window,
-                    "rerank_prefetch_source": context.get_intermediate_result('es_query_rerank_prefetch_source'),
-                    "include_named_queries_score": bool(do_rerank and in_rerank_window),
+                    "in_rank_window": in_rank_window,
+                    "include_named_queries_score": bool(in_rank_window),
+                    "exact_knn_rescore_enabled": bool(rc.exact_knn_rescore_enabled and in_rank_window),
+                    "exact_knn_rescore_window": (
+                        self._resolve_exact_knn_rescore_window()
+                        if rc.exact_knn_rescore_enabled and in_rank_window
+                        else None
+                    ),
                 },
                 "es_response": {
                     "took_ms": es_response.get('took', 0),
@@ -1369,10 +1573,10 @@ class Searcher:
                     "retrieval_plan": debug_info["retrieval_plan"],
                     "ranking_windows": {
                         "es_fetch_size": es_fetch_size,
-                        "coarse_output_window": coarse_output_window if do_rerank and in_rerank_window else None,
-                        "fine_input_window": fine_input_window if do_rerank and in_rerank_window else None,
-                        "fine_output_window": fine_output_window if do_rerank and in_rerank_window else None,
-                        "rerank_window": rerank_window if do_rerank and in_rerank_window else None,
+                        "coarse_output_window": coarse_output_window if in_rank_window else None,
+                        "fine_input_window": fine_input_window if in_rank_window else None,
+                        "fine_output_window": fine_output_window if in_rank_window else None,
+                        "rerank_window": rerank_window if in_rank_window else None,
                         "page_from": from_,
                         "page_size": size,
                     },
@@ -366,7 +366,8 @@ class SuggestionIndexBuilder:
         index_name = get_tenant_index_name(tenant_id)
         search_after: Optional[List[Any]] = None
-
+        print(f"[DEBUG] Python using index: {index_name} for tenant {tenant_id}")
+        total_processed = 0
         while True:
             body: Dict[str, Any] = {
                 "size": batch_size,
@@ -385,10 +386,13 @@ class SuggestionIndexBuilder:
             if not hits:
                 break
             for hit in hits:
+                total_processed += 1
                 yield hit
             search_after = hits[-1].get("sort")
             if len(hits) < batch_size:
                 break
+        print(f"[DEBUG] Python processed total products: {total_processed} for tenant {tenant_id}")
+    
     def _iter_query_log_rows(
         self,
@@ -0,0 +1,1014 @@
+"""
+Suggestion index builder (Phase 2).
+
+Capabilities:
+- Full rebuild to versioned index
+- Atomic alias publish
+- Incremental update from query logs with watermark
+"""
+
+import json
+import logging
+import math
+import re
+import unicodedata
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta, timezone
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+
+from sqlalchemy import text
+
+from config.loader import get_app_config
+from config.tenant_config_loader import get_tenant_config_loader
+from query.query_parser import detect_text_language_for_suggestions
+from suggestion.mapping import build_suggestion_mapping
+from utils.es_client import ESClient
+
+logger = logging.getLogger(__name__)
+
+
+def _index_prefix() -> str:
+    return get_app_config().runtime.index_namespace or ""
+
+
+def get_suggestion_alias_name(tenant_id: str) -> str:
+    """Read alias for suggestion index (single source of truth)."""
+    return f"{_index_prefix()}search_suggestions_tenant_{tenant_id}_current"
+
+
+def get_suggestion_versioned_index_name(tenant_id: str, build_at: Optional[datetime] = None) -> str:
+    """Versioned suggestion index name."""
+    ts = (build_at or datetime.now(timezone.utc)).strftime("%Y%m%d%H%M%S%f")
+    return f"{_index_prefix()}search_suggestions_tenant_{tenant_id}_v{ts}"
+
+
+def get_suggestion_versioned_index_pattern(tenant_id: str) -> str:
+    return f"{_index_prefix()}search_suggestions_tenant_{tenant_id}_v*"
+
+
+def get_suggestion_meta_index_name() -> str:
+    return f"{_index_prefix()}search_suggestions_meta"
+
+
+@dataclass
+class SuggestionCandidate:
+    text: str
+    text_norm: str
+    lang: str
+    sources: set = field(default_factory=set)
+    title_spu_ids: set = field(default_factory=set)
+    qanchor_spu_ids: set = field(default_factory=set)
+    tag_spu_ids: set = field(default_factory=set)
+    query_count_7d: int = 0
+    query_count_30d: int = 0
+    lang_confidence: float = 1.0
+    lang_source: str = "default"
+    lang_conflict: bool = False
+
+    def add_product(self, source: str, spu_id: str) -> None:
+        self.sources.add(source)
+        if source == "title":
+            self.title_spu_ids.add(spu_id)
+        elif source == "qanchor":
+            self.qanchor_spu_ids.add(spu_id)
+        elif source == "tag":
+            self.tag_spu_ids.add(spu_id)
+
+    def add_query_log(self, is_7d: bool) -> None:
+        self.sources.add("query_log")
+        self.query_count_30d += 1
+        if is_7d:
+            self.query_count_7d += 1
+
+
+@dataclass
+class QueryDelta:
+    tenant_id: str
+    lang: str
+    text: str
+    text_norm: str
+    delta_7d: int = 0
+    delta_30d: int = 0
+    lang_confidence: float = 1.0
+    lang_source: str = "default"
+    lang_conflict: bool = False
+
+
+class SuggestionIndexBuilder:
+    """Build and update suggestion index."""
+
+    def __init__(self, es_client: ESClient, db_engine: Any):
+        self.es_client = es_client
+        self.db_engine = db_engine
+
+    def _format_allocation_failure(self, index_name: str) -> str:
+        health = self.es_client.wait_for_index_ready(index_name=index_name, timeout="5s")
+        explain = self.es_client.get_allocation_explain(index_name=index_name)
+
+        parts = [
+            f"Suggestion index '{index_name}' was created but is not allocatable/readable yet",
+            f"health_status={health.get('status')}",
+            f"timed_out={health.get('timed_out')}",
+        ]
+        if health.get("error"):
+            parts.append(f"health_error={health['error']}")
+
+        if explain:
+            unassigned = explain.get("unassigned_info") or {}
+            if unassigned.get("reason"):
+                parts.append(f"unassigned_reason={unassigned['reason']}")
+            if unassigned.get("last_allocation_status"):
+                parts.append(f"last_allocation_status={unassigned['last_allocation_status']}")
+
+            for node in explain.get("node_allocation_decisions") or []:
+                node_name = node.get("node_name") or node.get("node_id") or "unknown-node"
+                for decider in node.get("deciders") or []:
+                    if decider.get("decision") == "NO":
+                        parts.append(
+                            f"{node_name}:{decider.get('decider')}={decider.get('explanation')}"
+                        )
+                        return "; ".join(parts)
+
+        return "; ".join(parts)
+
+    def _create_fresh_versioned_index(
+        self,
+        tenant_id: str,
+        mapping: Dict[str, Any],
+        max_attempts: int = 5,
+    ) -> str:
+        for attempt in range(1, max_attempts + 1):
+            index_name = get_suggestion_versioned_index_name(tenant_id)
+            if self.es_client.index_exists(index_name):
+                logger.warning(
+                    "Suggestion index name collision before create for tenant=%s index=%s attempt=%s/%s",
+                    tenant_id,
+                    index_name,
+                    attempt,
+                    max_attempts,
+                )
+                continue
+
+            if self.es_client.create_index(index_name, mapping):
+                return index_name
+
+            if self.es_client.index_exists(index_name):
+                logger.warning(
+                    "Suggestion index name collision during create for tenant=%s index=%s attempt=%s/%s",
+                    tenant_id,
+                    index_name,
+                    attempt,
+                    max_attempts,
+                )
+                continue
+
+            raise RuntimeError(f"Failed to create suggestion index: {index_name}")
+
+        raise RuntimeError(
+            f"Failed to allocate a unique suggestion index name for tenant={tenant_id} after {max_attempts} attempts"
+        )
+
+    def _ensure_new_index_ready(self, index_name: str) -> None:
+        health = self.es_client.wait_for_index_ready(index_name=index_name, timeout="5s")
+        if health.get("ok"):
+            return
+        raise RuntimeError(self._format_allocation_failure(index_name))
+
+    @staticmethod
+    def _to_utc(dt: Any) -> Optional[datetime]:
+        if dt is None:
+            return None
+        if isinstance(dt, datetime):
+            if dt.tzinfo is None:
+                return dt.replace(tzinfo=timezone.utc)
+            return dt.astimezone(timezone.utc)
+        return None
+
+    @staticmethod
+    def _normalize_text(value: str) -> str:
+        text_value = unicodedata.normalize("NFKC", (value or "")).strip().lower()
+        text_value = re.sub(r"\s+", " ", text_value)
+        return text_value
+
+    @staticmethod
+    def _prepare_title_for_suggest(title: str, max_len: int = 120) -> str:
+        """
+        Keep title-derived suggestions concise:
+        - keep raw title when short enough
+        - for long titles, keep the leading phrase before common separators
+        - fallback to hard truncate
+        """
+        raw = str(title or "").strip()
+        if not raw:
+            return ""
+        if len(raw) <= max_len:
+            return raw
+
+        head = re.split(r"[，,;；|/\\\\(（\\[【]", raw, maxsplit=1)[0].strip()
+        if 1 < len(head) <= max_len:
+            return head
+
+        truncated = raw[:max_len].rstrip(" ，,;；|/\\\\-—–()（）[]【】")
+        return truncated or raw[:max_len]
+
+    @staticmethod
+    def _split_qanchors(value: Any) -> List[str]:
+        if value is None:
+            return []
+        if isinstance(value, list):
+            return [str(x).strip() for x in value if str(x).strip()]
+        raw = str(value).strip()
+        if not raw:
+            return []
+        parts = re.split(r"[，、,;|/\n\t]+", raw)
+        out = [p.strip() for p in parts if p and p.strip()]
+        if not out:
+            return [raw]
+        return out
+
+    @staticmethod
+    def _iter_product_tags(raw: Any) -> List[str]:
+        if raw is None:
+            return []
+        if isinstance(raw, list):
+            return [str(x).strip() for x in raw if str(x).strip()]
+        s = str(raw).strip()
+        if not s:
+            return []
+        parts = re.split(r"[，、,;|/\n\t]+", s)
+        out = [p.strip() for p in parts if p and p.strip()]
+        return out if out else [s]
+
+    def _iter_multilang_product_tags(
+        self,
+        raw: Any,
+        index_languages: List[str],
+        primary_language: str,
+    ) -> List[Tuple[str, str]]:
+        if isinstance(raw, dict):
+            pairs: List[Tuple[str, str]] = []
+            for lang in index_languages:
+                for tag in self._iter_product_tags(raw.get(lang)):
+                    pairs.append((lang, tag))
+            return pairs
+
+        pairs = []
+        for tag in self._iter_product_tags(raw):
+            tag_lang, _, _ = detect_text_language_for_suggestions(
+                tag,
+                index_languages=index_languages,
+                primary_language=primary_language,
+            )
+            pairs.append((tag_lang, tag))
+        return pairs
+
+    @staticmethod
+    def _looks_noise(text_value: str) -> bool:
+        if not text_value:
+            return True
+        if len(text_value) > 120:
+            return True
+        if re.fullmatch(r"[\W_]+", text_value):
+            return True
+        return False
+
+    @staticmethod
+    def _normalize_lang(lang: Optional[str]) -> Optional[str]:
+        if not lang:
+            return None
+        token = str(lang).strip().lower().replace("-", "_")
+        if not token:
+            return None
+        if token in {"zh_tw", "pt_br"}:
+            return token
+        return token.split("_")[0]
+
+    @staticmethod
+    def _parse_request_params_language(raw: Any) -> Optional[str]:
+        if raw is None:
+            return None
+        if isinstance(raw, dict):
+            return raw.get("language")
+        text_raw = str(raw).strip()
+        if not text_raw:
+            return None
+        try:
+            obj = json.loads(text_raw)
+            if isinstance(obj, dict):
+                return obj.get("language")
+        except Exception:
+            return None
+        return None
+
+    def _resolve_query_language(
+        self,
+        query: str,
+        log_language: Optional[str],
+        request_params: Any,
+        index_languages: List[str],
+        primary_language: str,
+    ) -> Tuple[str, float, str, bool]:
+        """Resolve lang with priority: log field > request_params > script/model."""
+        langs_set = set(index_languages or [])
+        primary = self._normalize_lang(primary_language) or "en"
+        if primary not in langs_set and langs_set:
+            primary = index_languages[0]
+
+        log_lang = self._normalize_lang(log_language)
+        req_lang = self._normalize_lang(self._parse_request_params_language(request_params))
+        conflict = bool(log_lang and req_lang and log_lang != req_lang)
+
+        if log_lang and (not langs_set or log_lang in langs_set):
+            return log_lang, 1.0, "log_field", conflict
+
+        if req_lang and (not langs_set or req_lang in langs_set):
+            return req_lang, 1.0, "request_params", conflict
+
+        det_lang, conf, det_source = detect_text_language_for_suggestions(
+            query,
+            index_languages=index_languages,
+            primary_language=primary,
+        )
+        if det_lang and (not langs_set or det_lang in langs_set):
+            return det_lang, conf, det_source, conflict
+
+        return primary, 0.3, "default", conflict
+
+    @staticmethod
+    def _compute_rank_score(
+        query_count_30d: int,
+        query_count_7d: int,
+        qanchor_doc_count: int,
+        title_doc_count: int,
+        tag_doc_count: int = 0,
+    ) -> float:
+        return (
+            1.8 * math.log1p(max(query_count_30d, 0))
+            + 1.2 * math.log1p(max(query_count_7d, 0))
+            + 1.0 * math.log1p(max(qanchor_doc_count, 0))
+            + 0.85 * math.log1p(max(tag_doc_count, 0))
+            + 0.6 * math.log1p(max(title_doc_count, 0))
+        )
+
+    @classmethod
+    def _compute_rank_score_from_candidate(cls, c: SuggestionCandidate) -> float:
+        return cls._compute_rank_score(
+            query_count_30d=c.query_count_30d,
+            query_count_7d=c.query_count_7d,
+            qanchor_doc_count=len(c.qanchor_spu_ids),
+            title_doc_count=len(c.title_spu_ids),
+            tag_doc_count=len(c.tag_spu_ids),
+        )
+
+    def _iter_products(self, tenant_id: str, batch_size: int = 500) -> Iterator[Dict[str, Any]]:
+        """Stream product docs from tenant index using search_after."""
+        from indexer.mapping_generator import get_tenant_index_name
+
+        index_name = get_tenant_index_name(tenant_id)
+        search_after: Optional[List[Any]] = None
+
+        while True:
+            body: Dict[str, Any] = {
+                "size": batch_size,
+                "_source": ["id", "spu_id", "title", "qanchors", "enriched_tags"],
+                "sort": [
+                    {"spu_id": {"order": "asc", "missing": "_last"}},
+                    {"id.keyword": {"order": "asc", "missing": "_last"}},
+                ],
+                "query": {"match_all": {}},
+            }
+            if search_after is not None:
+                body["search_after"] = search_after
+
+            resp = self.es_client.client.search(index=index_name, body=body)
+            hits = resp.get("hits", {}).get("hits", []) or []
+            if not hits:
+                break
+            for hit in hits:
+                yield hit
+            search_after = hits[-1].get("sort")
+            if len(hits) < batch_size:
+                break
+
+    def _iter_query_log_rows(
+        self,
+        tenant_id: str,
+        since: datetime,
+        until: datetime,
+        fetch_size: int = 2000,
+    ) -> Iterator[Any]:
+        """Stream search logs from MySQL with bounded time range."""
+        query_sql = text(
+            """
+            SELECT query, language, request_params, create_time
+            FROM shoplazza_search_log
+            WHERE tenant_id = :tenant_id
+              AND deleted = 0
+              AND query IS NOT NULL
+              AND query <> ''
+              AND create_time >= :since_time
+              AND create_time < :until_time
+            ORDER BY create_time ASC
+            """
+        )
+
+        with self.db_engine.connect().execution_options(stream_results=True) as conn:
+            result = conn.execute(
+                query_sql,
+                {
+                    "tenant_id": int(tenant_id),
+                    "since_time": since,
+                    "until_time": until,
+                },
+            )
+            while True:
+                rows = result.fetchmany(fetch_size)
+                if not rows:
+                    break
+                for row in rows:
+                    yield row
+
+    def _ensure_meta_index(self) -> str:
+        meta_index = get_suggestion_meta_index_name()
+        if self.es_client.index_exists(meta_index):
+            return meta_index
+        body = {
+            "settings": {
+                "number_of_shards": 1,
+                "number_of_replicas": 0,
+                "refresh_interval": "1s",
+            },
+            "mappings": {
+                "properties": {
+                    "tenant_id": {"type": "keyword"},
+                    "active_alias": {"type": "keyword"},
+                    "active_index": {"type": "keyword"},
+                    "last_full_build_at": {"type": "date"},
+                    "last_incremental_build_at": {"type": "date"},
+                    "last_incremental_watermark": {"type": "date"},
+                    "updated_at": {"type": "date"},
+                }
+            },
+        }
+        if not self.es_client.create_index(meta_index, body):
+            raise RuntimeError(f"Failed to create suggestion meta index: {meta_index}")
+        return meta_index
+
+    def _get_meta(self, tenant_id: str) -> Dict[str, Any]:
+        meta_index = self._ensure_meta_index()
+        try:
+            resp = self.es_client.client.get(index=meta_index, id=str(tenant_id))
+            return resp.get("_source", {}) or {}
+        except Exception:
+            return {}
+
+    def _upsert_meta(self, tenant_id: str, patch: Dict[str, Any]) -> None:
+        meta_index = self._ensure_meta_index()
+        current = self._get_meta(tenant_id)
+        now_iso = datetime.now(timezone.utc).isoformat()
+        merged = {
+            "tenant_id": str(tenant_id),
+            **current,
+            **patch,
+            "updated_at": now_iso,
+        }
+        self.es_client.client.index(index=meta_index, id=str(tenant_id), document=merged, refresh="wait_for")
+
+    def _cleanup_old_versions(self, tenant_id: str, keep_versions: int, protected_indices: Optional[List[str]] = None) -> List[str]:
+        if keep_versions < 1:
+            keep_versions = 1
+        protected = set(protected_indices or [])
+        pattern = get_suggestion_versioned_index_pattern(tenant_id)
+        all_indices = self.es_client.list_indices(pattern)
+        if len(all_indices) <= keep_versions:
+            return []
+
+        # Names are timestamp-ordered by suffix; keep newest N.
+        kept = set(sorted(all_indices)[-keep_versions:])
+        dropped: List[str] = []
+        for idx in sorted(all_indices):
+            if idx in kept or idx in protected:
+                continue
+            if self.es_client.delete_index(idx):
+                dropped.append(idx)
+        return dropped
+
+    def _publish_alias(self, tenant_id: str, index_name: str, keep_versions: int = 2) -> Dict[str, Any]:
+        alias_name = get_suggestion_alias_name(tenant_id)
+        current_indices = self.es_client.get_alias_indices(alias_name)
+
+        actions: List[Dict[str, Any]] = []
+        for idx in current_indices:
+            actions.append({"remove": {"index": idx, "alias": alias_name}})
+        actions.append({"add": {"index": index_name, "alias": alias_name}})
+
+        if not self.es_client.update_aliases(actions):
+            raise RuntimeError(f"Failed to publish alias {alias_name} -> {index_name}")
+
+        dropped = self._cleanup_old_versions(
+            tenant_id=tenant_id,
+            keep_versions=keep_versions,
+            protected_indices=[index_name],
+        )
+
+        self._upsert_meta(
+            tenant_id,
+            {
+                "active_alias": alias_name,
+                "active_index": index_name,
+            },
+        )
+
+        return {
+            "alias": alias_name,
+            "previous_indices": current_indices,
+            "current_index": index_name,
+            "dropped_old_indices": dropped,
+        }
+
+    def _resolve_incremental_target_index(self, tenant_id: str) -> Optional[str]:
+        """Resolve active suggestion index for incremental updates (alias only)."""
+        alias_name = get_suggestion_alias_name(tenant_id)
+        aliased = self.es_client.get_alias_indices(alias_name)
+        if aliased:
+            # alias should map to one index in this design
+            return sorted(aliased)[-1]
+        return None
+
+    def _build_full_candidates(
+        self,
+        tenant_id: str,
+        index_languages: List[str],
+        primary_language: str,
+        days: int,
+        batch_size: int,
+        min_query_len: int,
+    ) -> Dict[Tuple[str, str], SuggestionCandidate]:
+        key_to_candidate: Dict[Tuple[str, str], SuggestionCandidate] = {}
+
+        # Step 1: product title/qanchors
+        for hit in self._iter_products(tenant_id, batch_size=batch_size):
+            src = hit.get("_source", {}) or {}
+            product_id = str(src.get("spu_id") or src.get("id") or hit.get("_id") or "")
+            if not product_id:
+                continue
+            title_obj = src.get("title") or {}
+            qanchor_obj = src.get("qanchors") or {}
+
+            for lang in index_languages:
+                title = ""
+                if isinstance(title_obj, dict):
+                    title = self._prepare_title_for_suggest(title_obj.get(lang) or "")
+                if title:
+                    text_norm = self._normalize_text(title)
+                    if not self._looks_noise(text_norm):
+                        key = (lang, text_norm)
+                        c = key_to_candidate.get(key)
+                        if c is None:
+                            c = SuggestionCandidate(text=title, text_norm=text_norm, lang=lang)
+                            key_to_candidate[key] = c
+                        c.add_product("title", spu_id=product_id)
+
+                q_raw = None
+                if isinstance(qanchor_obj, dict):
+                    q_raw = qanchor_obj.get(lang)
+                for q_text in self._split_qanchors(q_raw):
+                    text_norm = self._normalize_text(q_text)
+                    if self._looks_noise(text_norm):
+                        continue
+                    key = (lang, text_norm)
+                    c = key_to_candidate.get(key)
+                    if c is None:
+                        c = SuggestionCandidate(text=q_text, text_norm=text_norm, lang=lang)
+                        key_to_candidate[key] = c
+                    c.add_product("qanchor", spu_id=product_id)
+
+            for tag_lang, tag in self._iter_multilang_product_tags(
+                src.get("enriched_tags"),
+                index_languages=index_languages,
+                primary_language=primary_language,
+            ):
+                text_norm = self._normalize_text(tag)
+                if self._looks_noise(text_norm):
+                    continue
+                key = (tag_lang, text_norm)
+                c = key_to_candidate.get(key)
+                if c is None:
+                    c = SuggestionCandidate(text=tag, text_norm=text_norm, lang=tag_lang)
+                    key_to_candidate[key] = c
+                c.add_product("tag", spu_id=product_id)
+
+        # Step 2: query logs
+        now = datetime.now(timezone.utc)
+        since = now - timedelta(days=days)
+        since_7d = now - timedelta(days=7)
+
+        for row in self._iter_query_log_rows(tenant_id=tenant_id, since=since, until=now):
+            q = str(row.query or "").strip()
+            if len(q) < min_query_len:
+                continue
+
+            lang, conf, source, conflict = self._resolve_query_language(
+                query=q,
+                log_language=getattr(row, "language", None),
+                request_params=getattr(row, "request_params", None),
+                index_languages=index_languages,
+                primary_language=primary_language,
+            )
+            text_norm = self._normalize_text(q)
+            if self._looks_noise(text_norm):
+                continue
+
+            key = (lang, text_norm)
+            c = key_to_candidate.get(key)
+            if c is None:
+                c = SuggestionCandidate(text=q, text_norm=text_norm, lang=lang)
+                key_to_candidate[key] = c
+
+            c.lang_confidence = max(c.lang_confidence, conf)
+            c.lang_source = source if c.lang_source == "default" else c.lang_source
+            c.lang_conflict = c.lang_conflict or conflict
+
+            created_at = self._to_utc(getattr(row, "create_time", None))
+            is_7d = bool(created_at and created_at >= since_7d)
+            c.add_query_log(is_7d=is_7d)
+
+        return key_to_candidate
+
+    def _candidate_to_doc(self, tenant_id: str, c: SuggestionCandidate, now_iso: str) -> Dict[str, Any]:
+        rank_score = self._compute_rank_score_from_candidate(c)
+        completion_obj = {c.lang: {"input": [c.text], "weight": int(max(rank_score, 1.0) * 100)}}
+        sat_obj = {c.lang: c.text}
+        return {
+            "_id": f"{tenant_id}|{c.lang}|{c.text_norm}",
+            "tenant_id": str(tenant_id),
+            "lang": c.lang,
+            "text": c.text,
+            "text_norm": c.text_norm,
+            "sources": sorted(c.sources),
+            "title_doc_count": len(c.title_spu_ids),
+            "qanchor_doc_count": len(c.qanchor_spu_ids),
+            "tag_doc_count": len(c.tag_spu_ids),
+            "query_count_7d": c.query_count_7d,
+            "query_count_30d": c.query_count_30d,
+            "rank_score": float(rank_score),
+            "lang_confidence": float(c.lang_confidence),
+            "lang_source": c.lang_source,
+            "lang_conflict": bool(c.lang_conflict),
+            "status": 1,
+            "updated_at": now_iso,
+            "completion": completion_obj,
+            "sat": sat_obj,
+        }
+
+    def rebuild_tenant_index(
+        self,
+        tenant_id: str,
+        days: int = 365,
+        batch_size: int = 500,
+        min_query_len: int = 1,
+        publish_alias: bool = True,
+        keep_versions: int = 2,
+    ) -> Dict[str, Any]:
+        """
+        Full rebuild.
+
+        Phase2 default behavior:
+        - write to versioned index
+        - atomically publish alias
+        """
+        tenant_loader = get_tenant_config_loader()
+        tenant_cfg = tenant_loader.get_tenant_config(tenant_id)
+        index_languages: List[str] = tenant_cfg.get("index_languages") or ["en", "zh"]
+        primary_language: str = tenant_cfg.get("primary_language") or "en"
+
+        alias_publish: Optional[Dict[str, Any]] = None
+        index_name: Optional[str] = None
+        try:
+            mapping = build_suggestion_mapping(index_languages=index_languages)
+            index_name = self._create_fresh_versioned_index(
+                tenant_id=tenant_id,
+                mapping=mapping,
+            )
+            self._ensure_new_index_ready(index_name)
+
+            key_to_candidate = self._build_full_candidates(
+                tenant_id=tenant_id,
+                index_languages=index_languages,
+                primary_language=primary_language,
+                days=days,
+                batch_size=batch_size,
+                min_query_len=min_query_len,
+            )
+
+            now_iso = datetime.now(timezone.utc).isoformat()
+            docs = [self._candidate_to_doc(tenant_id, c, now_iso) for c in key_to_candidate.values()]
+
+            if docs:
+                bulk_result = self.es_client.bulk_index(index_name=index_name, docs=docs)
+                self.es_client.refresh(index_name)
+            else:
+                bulk_result = {"success": 0, "failed": 0, "errors": []}
+
+            if publish_alias:
+                alias_publish = self._publish_alias(
+                    tenant_id=tenant_id,
+                    index_name=index_name,
+                    keep_versions=keep_versions,
+                )
+
+            now_utc = datetime.now(timezone.utc).isoformat()
+            meta_patch: Dict[str, Any] = {
+                "last_full_build_at": now_utc,
+                "last_incremental_watermark": now_utc,
+            }
+            if publish_alias:
+                meta_patch["active_index"] = index_name
+                meta_patch["active_alias"] = get_suggestion_alias_name(tenant_id)
+            self._upsert_meta(tenant_id, meta_patch)
+
+            return {
+                "mode": "full",
+                "tenant_id": str(tenant_id),
+                "index_name": index_name,
+                "alias_published": bool(alias_publish),
+                "alias_publish": alias_publish,
+                "total_candidates": len(key_to_candidate),
+                "indexed_docs": len(docs),
+                "bulk_result": bulk_result,
+            }
+        except Exception:
+            if index_name and not alias_publish:
+                self.es_client.delete_index(index_name)
+            raise
+
+    def _build_incremental_deltas(
+        self,
+        tenant_id: str,
+        index_languages: List[str],
+        primary_language: str,
+        since: datetime,
+        until: datetime,
+        min_query_len: int,
+    ) -> Dict[Tuple[str, str], QueryDelta]:
+        now = datetime.now(timezone.utc)
+        since_7d = now - timedelta(days=7)
+        deltas: Dict[Tuple[str, str], QueryDelta] = {}
+
+        for row in self._iter_query_log_rows(tenant_id=tenant_id, since=since, until=until):
+            q = str(row.query or "").strip()
+            if len(q) < min_query_len:
+                continue
+
+            lang, conf, source, conflict = self._resolve_query_language(
+                query=q,
+                log_language=getattr(row, "language", None),
+                request_params=getattr(row, "request_params", None),
+                index_languages=index_languages,
+                primary_language=primary_language,
+            )
+            text_norm = self._normalize_text(q)
+            if self._looks_noise(text_norm):
+                continue
+
+            key = (lang, text_norm)
+            item = deltas.get(key)
+            if item is None:
+                item = QueryDelta(
+                    tenant_id=str(tenant_id),
+                    lang=lang,
+                    text=q,
+                    text_norm=text_norm,
+                    lang_confidence=conf,
+                    lang_source=source,
+                    lang_conflict=conflict,
+                )
+                deltas[key] = item
+
+            created_at = self._to_utc(getattr(row, "create_time", None))
+            item.delta_30d += 1
+            if created_at and created_at >= since_7d:
+                item.delta_7d += 1
+
+            if conf > item.lang_confidence:
+                item.lang_confidence = conf
+                item.lang_source = source
+            item.lang_conflict = item.lang_conflict or conflict
+
+        return deltas
+
+    def _delta_to_upsert_doc(self, delta: QueryDelta, now_iso: str) -> Dict[str, Any]:
+        rank_score = self._compute_rank_score(
+            query_count_30d=delta.delta_30d,
+            query_count_7d=delta.delta_7d,
+            qanchor_doc_count=0,
+            title_doc_count=0,
+            tag_doc_count=0,
+        )
+        return {
+            "tenant_id": delta.tenant_id,
+            "lang": delta.lang,
+            "text": delta.text,
+            "text_norm": delta.text_norm,
+            "sources": ["query_log"],
+            "title_doc_count": 0,
+            "qanchor_doc_count": 0,
+            "tag_doc_count": 0,
+            "query_count_7d": delta.delta_7d,
+            "query_count_30d": delta.delta_30d,
+            "rank_score": float(rank_score),
+            "lang_confidence": float(delta.lang_confidence),
+            "lang_source": delta.lang_source,
+            "lang_conflict": bool(delta.lang_conflict),
+            "status": 1,
+            "updated_at": now_iso,
+            "completion": {
+                delta.lang: {
+                    "input": [delta.text],
+                    "weight": int(max(rank_score, 1.0) * 100),
+                }
+            },
+            "sat": {delta.lang: delta.text},
+        }
+
+    @staticmethod
+    def _build_incremental_update_script() -> str:
+        return """
+            if (ctx._source == null || ctx._source.isEmpty()) {
+                ctx._source = params.upsert;
+                return;
+            }
+
+            if (ctx._source.query_count_30d == null) { ctx._source.query_count_30d = 0; }
+            if (ctx._source.query_count_7d == null) { ctx._source.query_count_7d = 0; }
+            if (ctx._source.qanchor_doc_count == null) { ctx._source.qanchor_doc_count = 0; }
+            if (ctx._source.title_doc_count == null) { ctx._source.title_doc_count = 0; }
+            if (ctx._source.tag_doc_count == null) { ctx._source.tag_doc_count = 0; }
+
+            ctx._source.query_count_30d += params.delta_30d;
+            ctx._source.query_count_7d += params.delta_7d;
+
+            if (ctx._source.sources == null) { ctx._source.sources = new ArrayList(); }
+            if (!ctx._source.sources.contains('query_log')) { ctx._source.sources.add('query_log'); }
+
+            if (ctx._source.lang_conflict == null) { ctx._source.lang_conflict = false; }
+            ctx._source.lang_conflict = ctx._source.lang_conflict || params.lang_conflict;
+
+            if (ctx._source.lang_confidence == null || params.lang_confidence > ctx._source.lang_confidence) {
+                ctx._source.lang_confidence = params.lang_confidence;
+                ctx._source.lang_source = params.lang_source;
+            }
+
+            int q30 = ctx._source.query_count_30d;
+            int q7 = ctx._source.query_count_7d;
+            int qa = ctx._source.qanchor_doc_count;
+            int td = ctx._source.title_doc_count;
+            int tg = ctx._source.tag_doc_count;
+
+            double score = 1.8 * Math.log(1 + q30)
+                         + 1.2 * Math.log(1 + q7)
+                         + 1.0 * Math.log(1 + qa)
+                         + 0.85 * Math.log(1 + tg)
+                         + 0.6 * Math.log(1 + td);
+            ctx._source.rank_score = score;
+            ctx._source.status = 1;
+            ctx._source.updated_at = params.now_iso;
+            ctx._source.text = params.text;
+            ctx._source.lang = params.lang;
+            ctx._source.text_norm = params.text_norm;
+
+            if (ctx._source.completion == null) { ctx._source.completion = new HashMap(); }
+            Map c = new HashMap();
+            c.put('input', params.completion_input);
+            c.put('weight', params.completion_weight);
+            ctx._source.completion.put(params.lang, c);
+
+            if (ctx._source.sat == null) { ctx._source.sat = new HashMap(); }
+            ctx._source.sat.put(params.lang, params.text);
+        """
+
+    def _build_incremental_actions(self, target_index: str, deltas: Dict[Tuple[str, str], QueryDelta]) -> List[Dict[str, Any]]:
+        now_iso = datetime.now(timezone.utc).isoformat()
+        script_source = self._build_incremental_update_script()
+        actions: List[Dict[str, Any]] = []
+
+        for delta in deltas.values():
+            upsert_doc = self._delta_to_upsert_doc(delta=delta, now_iso=now_iso)
+            upsert_rank = float(upsert_doc.get("rank_score") or 0.0)
+            action = {
+                "_op_type": "update",
+                "_index": target_index,
+                "_id": f"{delta.tenant_id}|{delta.lang}|{delta.text_norm}",
+                "scripted_upsert": True,
+                "script": {
+                    "lang": "painless",
+                    "source": script_source,
+                    "params": {
+                        "delta_30d": int(delta.delta_30d),
+                        "delta_7d": int(delta.delta_7d),
+                        "lang_confidence": float(delta.lang_confidence),
+                        "lang_source": delta.lang_source,
+                        "lang_conflict": bool(delta.lang_conflict),
+                        "now_iso": now_iso,
+                        "lang": delta.lang,
+                        "text": delta.text,
+                        "text_norm": delta.text_norm,
+                        "completion_input": [delta.text],
+                        "completion_weight": int(max(upsert_rank, 1.0) * 100),
+                        "upsert": upsert_doc,
+                    },
+                },
+                "upsert": upsert_doc,
+            }
+            actions.append(action)
+
+        return actions
+
+    def incremental_update_tenant_index(
+        self,
+        tenant_id: str,
+        min_query_len: int = 1,
+        fallback_days: int = 7,
+        overlap_minutes: int = 30,
+        bootstrap_if_missing: bool = True,
+        bootstrap_days: int = 30,
+        batch_size: int = 500,
+    ) -> Dict[str, Any]:
+        tenant_loader = get_tenant_config_loader()
+        tenant_cfg = tenant_loader.get_tenant_config(tenant_id)
+        index_languages: List[str] = tenant_cfg.get("index_languages") or ["en", "zh"]
+        primary_language: str = tenant_cfg.get("primary_language") or "en"
+
+        target_index = self._resolve_incremental_target_index(tenant_id)
+        if not target_index:
+            if not bootstrap_if_missing:
+                raise RuntimeError(
+                    f"No active suggestion index for tenant={tenant_id}. "
+                    "Run full rebuild first or enable bootstrap_if_missing."
+                )
+            full_result = self.rebuild_tenant_index(
+                tenant_id=tenant_id,
+                days=bootstrap_days,
+                batch_size=batch_size,
+                min_query_len=min_query_len,
+                publish_alias=True
+            )
+            return {
+                "mode": "incremental",
+                "tenant_id": str(tenant_id),
+                "bootstrapped": True,
+                "bootstrap_result": full_result,
+            }
+
+        meta = self._get_meta(tenant_id)
+        watermark_raw = meta.get("last_incremental_watermark") or meta.get("last_full_build_at")
+        now = datetime.now(timezone.utc)
+        default_since = now - timedelta(days=fallback_days)
+        since = None
+        if isinstance(watermark_raw, str) and watermark_raw.strip():
+            try:
+                since = self._to_utc(datetime.fromisoformat(watermark_raw.replace("Z", "+00:00")))
+            except Exception:
+                since = None
+        if since is None:
+            since = default_since
+        since = since - timedelta(minutes=max(overlap_minutes, 0))
+        if since < default_since:
+            since = default_since
+
+        deltas = self._build_incremental_deltas(
+            tenant_id=tenant_id,
+            index_languages=index_languages,
+            primary_language=primary_language,
+            since=since,
+            until=now,
+            min_query_len=min_query_len,
+        )
+
+        actions = self._build_incremental_actions(target_index=target_index, deltas=deltas)
+        bulk_result = self.es_client.bulk_actions(actions)
+        self.es_client.refresh(target_index)
+
+        now_iso = now.isoformat()
+        self._upsert_meta(
+            tenant_id,
+            {
+                "last_incremental_build_at": now_iso,
+                "last_incremental_watermark": now_iso,
+                "active_index": target_index,
+                "active_alias": get_suggestion_alias_name(tenant_id),
+            },
+        )
+
+        return {
+            "mode": "incremental",
+            "tenant_id": str(tenant_id),
+            "target_index": target_index,
+            "query_window": {
+                "since": since.isoformat(),
+                "until": now_iso,
+                "overlap_minutes": int(overlap_minutes),
+            },
+            "updated_terms": len(deltas),
+            "bulk_result": bulk_result,
+        }
@@ -345,8 +345,15 @@ def test_indexer_build_docs_from_db_contract(indexer_client: TestClient):
 def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch):
     import indexer.product_enrich as process_products
-    def _fake_build_index_content_fields(items: List[Dict[str, str]], tenant_id: str | None = None):
+    def _fake_build_index_content_fields(
+        items: List[Dict[str, str]],
+        tenant_id: str | None = None,
+        enrichment_scopes: List[str] | None = None,
+        category_taxonomy_profile: str = "apparel",
+    ):
         assert tenant_id == "162"
+        assert enrichment_scopes == ["generic", "category_taxonomy"]
+        assert category_taxonomy_profile == "apparel"
         return [
             {
                 "id": p["spu_id"],
@@ -358,6 +365,9 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
                 "enriched_attributes": [
                     {"name": "enriched_tags", "value": {"zh": ["tag1"], "en": ["tag1"]}},
                 ],
+                "enriched_taxonomy_attributes": [
+                    {"name": "Product Type", "value": {"zh": ["T恤"], "en": ["t-shirt"]}},
+                ],
             }
             for p in items
         ]
@@ -368,6 +378,8 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
         "/indexer/enrich-content",
         json={
             "tenant_id": "162",
+            "enrichment_scopes": ["generic", "category_taxonomy"],
+            "category_taxonomy_profile": "apparel",
             "items": [
                 {"spu_id": "1001", "title": "T-shirt"},
                 {"spu_id": "1002", "title": "Toy"},
@@ -377,6 +389,8 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
     assert response.status_code == 200
     data = response.json()
     assert data["tenant_id"] == "162"
+    assert data["enrichment_scopes"] == ["generic", "category_taxonomy"]
+    assert data["category_taxonomy_profile"] == "apparel"
     assert data["total"] == 2
     assert len(data["results"]) == 2
     assert data["results"][0]["spu_id"] == "1001"
@@ -388,6 +402,102 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
         "name": "enriched_tags",
         "value": {"zh": ["tag1"], "en": ["tag1"]},
     }
+    assert data["results"][0]["enriched_taxonomy_attributes"][0] == {
+        "name": "Product Type",
+        "value": {"zh": ["T恤"], "en": ["t-shirt"]},
+    }
+
+
+def test_indexer_enrich_content_contract_accepts_deprecated_analysis_kinds(indexer_client: TestClient, monkeypatch):
+    import indexer.product_enrich as process_products
+
+    seen: Dict[str, Any] = {}
+
+    def _fake_build_index_content_fields(
+        items: List[Dict[str, str]],
+        tenant_id: str | None = None,
+        enrichment_scopes: List[str] | None = None,
+        category_taxonomy_profile: str = "apparel",
+    ):
+        seen["tenant_id"] = tenant_id
+        seen["enrichment_scopes"] = enrichment_scopes
+        seen["category_taxonomy_profile"] = category_taxonomy_profile
+        return [
+            {
+                "id": items[0]["spu_id"],
+                "qanchors": {},
+                "enriched_tags": {},
+                "enriched_attributes": [],
+                "enriched_taxonomy_attributes": [],
+            }
+        ]
+
+    monkeypatch.setattr(process_products, "build_index_content_fields", _fake_build_index_content_fields)
+
+    response = indexer_client.post(
+        "/indexer/enrich-content",
+        json={
+            "tenant_id": "162",
+            "analysis_kinds": ["taxonomy"],
+            "items": [{"spu_id": "1001", "title": "T-shirt"}],
+        },
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert seen == {
+        "tenant_id": "162",
+        "enrichment_scopes": ["category_taxonomy"],
+        "category_taxonomy_profile": "apparel",
+    }
+    assert data["enrichment_scopes"] == ["category_taxonomy"]
+    assert data["category_taxonomy_profile"] == "apparel"
+
+
+def test_indexer_enrich_content_contract_supports_non_apparel_taxonomy_profiles(indexer_client: TestClient, monkeypatch):
+    import indexer.product_enrich as process_products
+
+    def _fake_build_index_content_fields(
+        items: List[Dict[str, str]],
+        tenant_id: str | None = None,
+        enrichment_scopes: List[str] | None = None,
+        category_taxonomy_profile: str = "apparel",
+    ):
+        assert tenant_id == "162"
+        assert enrichment_scopes == ["category_taxonomy"]
+        assert category_taxonomy_profile == "toys"
+        return [
+            {
+                "id": items[0]["spu_id"],
+                "qanchors": {},
+                "enriched_tags": {},
+                "enriched_attributes": [],
+                "enriched_taxonomy_attributes": [
+                    {"name": "Product Type", "value": {"en": ["doll set"]}},
+                    {"name": "Age Group", "value": {"en": ["kids"]}},
+                ],
+            }
+        ]
+
+    monkeypatch.setattr(process_products, "build_index_content_fields", _fake_build_index_content_fields)
+
+    response = indexer_client.post(
+        "/indexer/enrich-content",
+        json={
+            "tenant_id": "162",
+            "enrichment_scopes": ["category_taxonomy"],
+            "category_taxonomy_profile": "toys",
+            "items": [{"spu_id": "1001", "title": "Toy"}],
+        },
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["category_taxonomy_profile"] == "toys"
+    assert data["results"][0]["enriched_taxonomy_attributes"] == [
+        {"name": "Product Type", "value": {"en": ["doll set"]}},
+        {"name": "Age Group", "value": {"en": ["kids"]}},
+    ]
 def test_indexer_documents_contract(indexer_client: TestClient):
@@ -0,0 +1,5 @@
+# Manual Tests
+
+`tests/manual/` 存放需要人工启动依赖服务、手动观察结果或依赖真实外部环境的试跑脚本。
+
+这类脚本不属于 `pytest` 自动回归范围，也不应与 `tests/ci` 的契约测试混为一类。
@@ -4,9 +4,9 @@
 用法:
   1. 先启动 Indexer 服务: ./scripts/start_indexer.sh  (或 uvicorn api.indexer_app:app --port 6004)
-  2. 执行: python scripts/test_build_docs_api.py
+  2. 执行: python tests/manual/test_build_docs_api.py
-  也可指定地址: INDEXER_URL=http://localhost:6004 python scripts/test_build_docs_api.py
+  也可指定地址: INDEXER_URL=http://localhost:6004 python tests/manual/test_build_docs_api.py
 """
 import json
@@ -15,7 +15,7 @@ import sys
 from datetime import datetime, timezone
 # 项目根目录
-ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.insert(0, ROOT)
 # 默认使用 requests 调真实服务；若未安装则回退到 TestClient
@@ -122,7 +122,7 @@ def main():
             print("\n[错误] 无法连接 Indexer 服务:", e)
             print("请先启动: ./scripts/start_indexer.sh  或  uvicorn api.indexer_app:app --port 6004")
             if HAS_REQUESTS:
-                print("或使用进程内测试: USE_TEST_CLIENT=1 python scripts/test_build_docs_api.py")
+                print("或使用进程内测试: USE_TEST_CLIENT=1 python tests/manual/test_build_docs_api.py")
             sys.exit(1)
     else:
         if not use_http and not HAS_REQUESTS:
+from dataclasses import asdict
 from typing import Any, Dict, List, Optional
 import numpy as np
@@ -208,3 +208,36 @@ def test_image_knn_clause_is_added_alongside_base_translation_and_text_knn():
     assert image_knn["path"] == "image_embedding"
     assert image_knn["score_mode"] == "max"
     assert image_knn["query"]["knn"]["field"] == "image_embedding.vector"
+
+
+def test_text_knn_plan_is_reused_for_ann_and_exact_rescore():
+    qb = _builder()
+    parsed_query = SimpleNamespace(query_tokens=["a", "b", "c", "d", "e"])
+
+    ann_clause = qb.build_text_knn_clause(
+        np.array([0.1, 0.2, 0.3]),
+        parsed_query=parsed_query,
+    )
+    exact_clause = qb.build_exact_text_knn_rescore_clause(
+        np.array([0.1, 0.2, 0.3]),
+        parsed_query=parsed_query,
+    )
+
+    assert ann_clause is not None
+    assert exact_clause is not None
+    assert ann_clause["knn"]["k"] == qb.knn_text_k_long
+    assert ann_clause["knn"]["num_candidates"] == qb.knn_text_num_candidates_long
+    assert ann_clause["knn"]["boost"] == qb.knn_text_boost * 1.4
+    assert exact_clause["script_score"]["script"]["params"]["boost"] == qb.knn_text_boost * 1.4
+
+
+def test_image_knn_plan_is_reused_for_ann_and_exact_rescore():
+    qb = _builder()
+
+    ann_clause = qb.build_image_knn_clause(np.array([0.4, 0.5, 0.6]))
+    exact_clause = qb.build_exact_image_knn_rescore_clause(np.array([0.4, 0.5, 0.6]))
+
+    assert ann_clause is not None
+    assert exact_clause is not None
+    assert ann_clause["nested"]["query"]["knn"]["boost"] == qb.knn_image_boost
+    assert exact_clause["nested"]["query"]["script_score"]["script"]["params"]["boost"] == qb.knn_image_boost
@@ -10,8 +10,14 @@ from indexer.document_transformer import SPUDocumentTransformer
 def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
     seen_calls: List[Dict[str, Any]] = []
-    def _fake_build_index_content_fields(items, tenant_id=None):
-        seen_calls.append({"n": len(items), "tenant_id": tenant_id})
+    def _fake_build_index_content_fields(items, tenant_id=None, category_taxonomy_profile=None):
+        seen_calls.append(
+            {
+                "n": len(items),
+                "tenant_id": tenant_id,
+                "category_taxonomy_profile": category_taxonomy_profile,
+            }
+        )
         return [
             {
                 "id": item["id"],
@@ -19,10 +25,13 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
                     "zh": [f"zh-anchor-{item['id']}"],
                     "en": [f"en-anchor-{item['id']}"],
                 },
-                "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
+                "enriched_tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
                 "enriched_attributes": [
                     {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}},
                 ],
+                "enriched_taxonomy_attributes": [
+                    {"name": "Product Type", "value": {"zh": ["连衣裙"], "en": ["dress"]}},
+                ],
             }
             for item in items
         ]
@@ -50,10 +59,14 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
     transformer.fill_llm_attributes_batch(docs, rows)
-    assert seen_calls == [{"n": 45, "tenant_id": "162"}]
+    assert seen_calls == [{"n": 45, "tenant_id": "162", "category_taxonomy_profile": "apparel"}]
     assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"]
     assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
-    assert docs[0]["tags"]["zh"] == ["t1", "t2"]
-    assert docs[0]["tags"]["en"] == ["t1", "t2"]
+    assert docs[0]["enriched_tags"]["zh"] == ["t1", "t2"]
+    assert docs[0]["enriched_tags"]["en"] == ["t1", "t2"]
     assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"]
+    assert {
+        "name": "Product Type",
+        "value": {"zh": ["连衣裙"], "en": ["dress"]},
+    } in docs[0]["enriched_taxonomy_attributes"]
@@ -13,7 +13,15 @@ def test_analyze_products_caps_batch_size_to_20(monkeypatch):
     monkeypatch.setattr(process_products, "API_KEY", "fake-key")
     seen_batch_sizes: List[int] = []
-    def _fake_process_batch(batch_data: List[Dict[str, str]], batch_num: int, target_lang: str = "zh"):
+    def _fake_process_batch(
+        batch_data: List[Dict[str, str]],
+        batch_num: int,
+        target_lang: str = "zh",
+        analysis_kind: str = "content",
+        category_taxonomy_profile=None,
+    ):
+        assert analysis_kind == "content"
+        assert category_taxonomy_profile is None
         seen_batch_sizes.append(len(batch_data))
         return [
             {
@@ -35,7 +43,7 @@ def test_analyze_products_caps_batch_size_to_20(monkeypatch):
         ]
     monkeypatch.setattr(process_products, "process_batch", _fake_process_batch)
-    monkeypatch.setattr(process_products, "_set_cached_anchor_result", lambda *args, **kwargs: None)
+    monkeypatch.setattr(process_products, "_set_cached_analysis_result", lambda *args, **kwargs: None)
     out = process_products.analyze_products(
         products=_mk_products(45),
@@ -53,7 +61,15 @@ def test_analyze_products_uses_min_batch_size_1(monkeypatch):
     monkeypatch.setattr(process_products, "API_KEY", "fake-key")
     seen_batch_sizes: List[int] = []
-    def _fake_process_batch(batch_data: List[Dict[str, str]], batch_num: int, target_lang: str = "zh"):
+    def _fake_process_batch(
+        batch_data: List[Dict[str, str]],
+        batch_num: int,
+        target_lang: str = "zh",
+        analysis_kind: str = "content",
+        category_taxonomy_profile=None,
+    ):
+        assert analysis_kind == "content"
+        assert category_taxonomy_profile is None
         seen_batch_sizes.append(len(batch_data))
         return [
             {
@@ -75,7 +91,7 @@ def test_analyze_products_uses_min_batch_size_1(monkeypatch):
         ]
     monkeypatch.setattr(process_products, "process_batch", _fake_process_batch)
-    monkeypatch.setattr(process_products, "_set_cached_anchor_result", lambda *args, **kwargs: None)
+    monkeypatch.setattr(process_products, "_set_cached_analysis_result", lambda *args, **kwargs: None)
     out = process_products.analyze_products(
         products=_mk_products(3),
@@ -74,6 +74,28 @@ def test_create_prompt_splits_shared_context_and_localized_tail():
     assert prefix_en.startswith("| No. | Product title | Category path |")
+def test_create_prompt_supports_taxonomy_analysis_kind():
+    products = [{"id": "1", "title": "linen dress"}]
+
+    shared_zh, user_zh, prefix_zh = product_enrich.create_prompt(
+        products,
+        target_lang="zh",
+        analysis_kind="taxonomy",
+    )
+    shared_fr, user_fr, prefix_fr = product_enrich.create_prompt(
+        products,
+        target_lang="fr",
+        analysis_kind="taxonomy",
+    )
+
+    assert "apparel attribute taxonomy" in shared_zh
+    assert "1. linen dress" in shared_zh
+    assert "Language: Chinese" in user_zh
+    assert "Language: French" in user_fr
+    assert prefix_zh.startswith("| 序号 | 品类 | 目标性别 |")
+    assert prefix_fr.startswith("| No. | Product Type | Target Gender |")
+
+
 def test_call_llm_logs_shared_context_once_and_verbose_contains_full_requests():
     payloads = []
     response_bodies = [
@@ -228,6 +250,38 @@ def test_process_batch_reads_result_and_validates_expected_fields():
     assert row["anchor_text"] == "法式收腰连衣裙"
+def test_process_batch_reads_taxonomy_result_with_schema_specific_fields():
+    merged_markdown = """| 序号 | 品类 | 目标性别 | 年龄段 | 适用季节 | 版型 | 廓形 | 领型 | 袖长类型 | 袖型 | 肩带设计 | 腰型 | 裤型 | 裙型 | 长度类型 | 闭合方式 | 设计细节 | 面料 | 成分 | 面料特性 | 服装特征 | 功能 | 主颜色 | 色系 | 印花 / 图案 | 适用场景 | 风格 |
+|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
+| 1 | 连衣裙 | 女 | 成人 | 春季,夏季 | 修身 | A字 | V领 | 无袖 | | 细肩带 | 高腰 | | A字裙 | 中长款 | 拉链 | 褶皱 | 梭织 | 聚酯纤维,氨纶 | 轻薄,透气 | 有内衬 | 易打理 | 酒红色 | 红色 | 纯色 | 约会,度假 | 浪漫 |
+"""
+
+    with mock.patch.object(
+        product_enrich,
+        "call_llm",
+        return_value=(merged_markdown, json.dumps({"choices": [{"message": {"content": "stub"}}]})),
+    ):
+        results = product_enrich.process_batch(
+            [{"id": "sku-1", "title": "dress"}],
+            batch_num=1,
+            target_lang="zh",
+            analysis_kind="taxonomy",
+        )
+
+    assert len(results) == 1
+    row = results[0]
+    assert row["id"] == "sku-1"
+    assert row["lang"] == "zh"
+    assert row["title_input"] == "dress"
+    assert row["product_type"] == "连衣裙"
+    assert row["target_gender"] == "女"
+    assert row["age_group"] == "成人"
+    assert row["sleeve_length_type"] == "无袖"
+    assert row["material_composition"] == "聚酯纤维,氨纶"
+    assert row["occasion_end_use"] == "约会,度假"
+    assert row["style_aesthetic"] == "浪漫"
+
+
 def test_analyze_products_uses_product_level_cache_across_batch_requests():
     cache_store = {}
     process_calls = []
@@ -241,13 +295,36 @@ def test_analyze_products_uses_product_level_cache_across_batch_requests():
             product.get("image_url", ""),
         )
-    def fake_get_cached_anchor_result(product, target_lang):
+    def fake_get_cached_analysis_result(
+        product,
+        target_lang,
+        analysis_kind="content",
+        category_taxonomy_profile=None,
+    ):
+        assert analysis_kind == "content"
+        assert category_taxonomy_profile is None
         return cache_store.get(_cache_key(product, target_lang))
-    def fake_set_cached_anchor_result(product, target_lang, result):
+    def fake_set_cached_analysis_result(
+        product,
+        target_lang,
+        result,
+        analysis_kind="content",
+        category_taxonomy_profile=None,
+    ):
+        assert analysis_kind == "content"
+        assert category_taxonomy_profile is None
         cache_store[_cache_key(product, target_lang)] = result
-    def fake_process_batch(batch_data, batch_num, target_lang="zh"):
+    def fake_process_batch(
+        batch_data,
+        batch_num,
+        target_lang="zh",
+        analysis_kind="content",
+        category_taxonomy_profile=None,
+    ):
+        assert analysis_kind == "content"
+        assert category_taxonomy_profile is None
         process_calls.append(
             {
                 "batch_num": batch_num,
@@ -281,12 +358,12 @@ def test_analyze_products_uses_product_level_cache_across_batch_requests():
     with mock.patch.object(product_enrich, "API_KEY", "fake-key"), mock.patch.object(
         product_enrich,
-        "_get_cached_anchor_result",
-        side_effect=fake_get_cached_anchor_result,
+        "_get_cached_analysis_result",
+        side_effect=fake_get_cached_analysis_result,
     ), mock.patch.object(
         product_enrich,
-        "_set_cached_anchor_result",
-        side_effect=fake_set_cached_anchor_result,
+        "_set_cached_analysis_result",
+        side_effect=fake_set_cached_analysis_result,
     ), mock.patch.object(
         product_enrich,
         "process_batch",
@@ -342,11 +419,12 @@ def test_analyze_products_reuses_cached_content_with_current_product_identity():
     with mock.patch.object(product_enrich, "API_KEY", "fake-key"), mock.patch.object(
         product_enrich,
-        "_get_cached_anchor_result",
-        wraps=lambda product, target_lang: product_enrich._normalize_analysis_result(
+        "_get_cached_analysis_result",
+        wraps=lambda product, target_lang, analysis_kind="content", category_taxonomy_profile=None: product_enrich._normalize_analysis_result(
             cached_result,
             product=product,
             target_lang=target_lang,
+            schema=product_enrich._get_analysis_schema("content"),
         ),
     ), mock.patch.object(
         product_enrich,
@@ -379,7 +457,49 @@ def test_analyze_products_reuses_cached_content_with_current_product_identity():
 def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output():
-    def fake_analyze_products(products, target_lang="zh", batch_size=None, tenant_id=None):
+    def fake_analyze_products(
+        products,
+        target_lang="zh",
+        batch_size=None,
+        tenant_id=None,
+        analysis_kind="content",
+        category_taxonomy_profile=None,
+    ):
+        if analysis_kind == "taxonomy":
+            assert category_taxonomy_profile == "apparel"
+            return [
+                {
+                    "id": products[0]["id"],
+                    "lang": target_lang,
+                    "title_input": products[0]["title"],
+                    "product_type": f"{target_lang}-dress",
+                    "target_gender": f"{target_lang}-women",
+                    "age_group": "",
+                    "season": f"{target_lang}-summer",
+                    "fit": "",
+                    "silhouette": "",
+                    "neckline": "",
+                    "sleeve_length_type": "",
+                    "sleeve_style": "",
+                    "strap_type": "",
+                    "rise_waistline": "",
+                    "leg_shape": "",
+                    "skirt_shape": "",
+                    "length_type": "",
+                    "closure_type": "",
+                    "design_details": "",
+                    "fabric": "",
+                    "material_composition": "",
+                    "fabric_properties": "",
+                    "clothing_features": "",
+                    "functional_benefits": "",
+                    "color": "",
+                    "color_family": "",
+                    "print_pattern": "",
+                    "occasion_end_use": "",
+                    "style_aesthetic": "",
+                }
+            ]
         return [
             {
                 "id": products[0]["id"],
@@ -423,8 +543,103 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output()
                 },
                 {"name": "target_audience", "value": {"zh": ["zh-audience"], "en": ["en-audience"]}},
             ],
+            "enriched_taxonomy_attributes": [
+                {
+                    "name": "Product Type",
+                    "value": {"zh": ["zh-dress"], "en": ["en-dress"]},
+                },
+                {
+                    "name": "Target Gender",
+                    "value": {"zh": ["zh-women"], "en": ["en-women"]},
+                },
+                {
+                    "name": "Season",
+                    "value": {"zh": ["zh-summer"], "en": ["en-summer"]},
+                },
+            ],
         }
     ]
+def test_build_index_content_fields_non_apparel_taxonomy_returns_en_only():
+    seen_calls = []
+
+    def fake_analyze_products(
+        products,
+        target_lang="zh",
+        batch_size=None,
+        tenant_id=None,
+        analysis_kind="content",
+        category_taxonomy_profile=None,
+    ):
+        seen_calls.append((analysis_kind, target_lang, category_taxonomy_profile, tuple(p["id"] for p in products)))
+        if analysis_kind == "taxonomy":
+            assert category_taxonomy_profile == "toys"
+            assert target_lang == "en"
+            return [
+                {
+                    "id": products[0]["id"],
+                    "lang": "en",
+                    "title_input": products[0]["title"],
+                    "product_type": "doll set",
+                    "age_group": "kids",
+                    "character_theme": "",
+                    "material": "",
+                    "power_source": "",
+                    "interactive_features": "",
+                    "educational_play_value": "",
+                    "piece_count_size": "",
+                    "color": "",
+                    "use_scenario": "",
+                }
+            ]
+
+        return [
+            {
+                "id": product["id"],
+                "lang": target_lang,
+                "title_input": product["title"],
+                "title": product["title"],
+                "category_path": "",
+                "tags": f"{target_lang}-tag",
+                "target_audience": "",
+                "usage_scene": "",
+                "season": "",
+                "key_attributes": "",
+                "material": "",
+                "features": "",
+                "anchor_text": f"{target_lang}-anchor",
+            }
+            for product in products
+        ]
+
+    with mock.patch.object(product_enrich, "analyze_products", side_effect=fake_analyze_products):
+        result = product_enrich.build_index_content_fields(
+            items=[{"spu_id": "2", "title": "toy"}],
+            tenant_id="170",
+            category_taxonomy_profile="toys",
+        )
+
+    assert result == [
+        {
+            "id": "2",
+            "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]},
+            "enriched_tags": {"zh": ["zh-tag"], "en": ["en-tag"]},
+            "enriched_attributes": [
+                {
+                    "name": "enriched_tags",
+                    "value": {
+                        "zh": ["zh-tag"],
+                        "en": ["en-tag"],
+                    },
+                }
+            ],
+            "enriched_taxonomy_attributes": [
+                {"name": "Product Type", "value": {"en": ["doll set"]}},
+                {"name": "Age Group", "value": {"en": ["kids"]}},
+            ],
+        }
+    ]
+    assert ("taxonomy", "zh", "toys", ("2",)) not in seen_calls
+    assert ("taxonomy", "en", "toys", ("2",)) in seen_calls
 def test_anchor_cache_key_depends_on_product_input_not_identifiers():
@@ -461,6 +676,40 @@ def test_anchor_cache_key_depends_on_product_input_not_identifiers():
     assert key_a != key_c
+def test_analysis_cache_key_isolated_by_analysis_kind():
+    product = {
+        "id": "1",
+        "title": "dress",
+        "brief": "soft cotton",
+        "description": "summer dress",
+    }
+
+    content_key = product_enrich._make_analysis_cache_key(product, "zh", "content")
+    taxonomy_key = product_enrich._make_analysis_cache_key(product, "zh", "taxonomy")
+
+    assert content_key != taxonomy_key
+
+
+def test_analysis_cache_key_changes_when_prompt_contract_changes():
+    product = {
+        "id": "1",
+        "title": "dress",
+        "brief": "soft cotton",
+        "description": "summer dress",
+    }
+
+    original_key = product_enrich._make_analysis_cache_key(product, "zh", "taxonomy")
+
+    with mock.patch.object(
+        product_enrich,
+        "USER_INSTRUCTION_TEMPLATE",
+        "Please return JSON only. Language: {language}",
+    ):
+        changed_key = product_enrich._make_analysis_cache_key(product, "zh", "taxonomy")
+
+    assert original_key != changed_key
+
+
 def test_build_prompt_input_text_appends_brief_and_description_for_short_title():
     product = {
         "title": "T恤",
 from math import isclose
-from config.schema import RerankFusionConfig
-from search.rerank_client import fuse_scores_and_resort, run_lightweight_rerank
+from config.schema import CoarseRankFusionConfig, RerankFusionConfig
+from search.rerank_client import coarse_resort_hits, fuse_scores_and_resort, run_lightweight_rerank
 def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_primary():
@@ -172,6 +172,57 @@ def test_fuse_scores_and_resort_uses_max_of_text_and_image_knn_scores():
     assert isclose(debug[0]["image_knn_score"], 0.7, rel_tol=1e-9)
+def test_fuse_scores_and_resort_prefers_exact_knn_scores_over_ann_scores():
+    hits = [
+        {
+            "_id": "exact-mm-hit",
+            "_score": 1.0,
+            "matched_queries": {
+                "base_query": 1.5,
+                "knn_query": 0.2,
+                "image_knn_query": 0.7,
+                "exact_text_knn_query": 0.9,
+                "exact_image_knn_query": 0.1,
+            },
+        }
+    ]
+
+    debug = fuse_scores_and_resort(hits, [0.8], debug=True)
+
+    assert isclose(hits[0]["_knn_score"], 0.9, rel_tol=1e-9)
+    assert isclose(debug[0]["text_knn_score"], 0.9, rel_tol=1e-9)
+    assert isclose(debug[0]["image_knn_score"], 0.1, rel_tol=1e-9)
+    assert isclose(debug[0]["exact_text_knn_score"], 0.9, rel_tol=1e-9)
+    assert isclose(debug[0]["exact_image_knn_score"], 0.1, rel_tol=1e-9)
+    assert isclose(debug[0]["approx_text_knn_score"], 0.2, rel_tol=1e-9)
+    assert isclose(debug[0]["approx_image_knn_score"], 0.7, rel_tol=1e-9)
+    assert debug[0]["text_knn_source"] == "exact_text_knn_query"
+    assert debug[0]["image_knn_source"] == "exact_image_knn_query"
+
+
+def test_fuse_scores_and_resort_falls_back_to_ann_when_exact_knn_missing():
+    hits = [
+        {
+            "_id": "ann-only-hit",
+            "_score": 1.0,
+            "matched_queries": {
+                "base_query": 1.5,
+                "knn_query": 0.4,
+                "image_knn_query": 0.5,
+            },
+        }
+    ]
+
+    debug = fuse_scores_and_resort(hits, [0.8], debug=True)
+
+    assert isclose(debug[0]["text_knn_score"], 0.4, rel_tol=1e-9)
+    assert isclose(debug[0]["image_knn_score"], 0.5, rel_tol=1e-9)
+    assert isclose(debug[0]["approx_text_knn_score"], 0.4, rel_tol=1e-9)
+    assert isclose(debug[0]["approx_image_knn_score"], 0.5, rel_tol=1e-9)
+    assert debug[0]["text_knn_source"] == "knn_query"
+    assert debug[0]["image_knn_source"] == "image_knn_query"
+
+
 def test_fuse_scores_and_resort_applies_knn_dismax_weights_and_tie_breaker():
     hits = [
         {
@@ -206,6 +257,96 @@ def test_fuse_scores_and_resort_applies_knn_dismax_weights_and_tie_breaker():
     assert isclose(debug[0]["knn_support_score"], 0.5, rel_tol=1e-9)
+def test_fuse_scores_and_resort_can_add_weighted_text_and_image_knn_factors():
+    hits = [
+        {
+            "_id": "a",
+            "_score": 1.0,
+            "matched_queries": {
+                "base_query": 2.0,
+                "knn_query": 0.4,
+                "image_knn_query": 0.5,
+            },
+        }
+    ]
+    fusion = RerankFusionConfig(
+        rerank_bias=0.0,
+        rerank_exponent=1.0,
+        text_bias=0.0,
+        text_exponent=1.0,
+        knn_text_weight=2.0,
+        knn_image_weight=1.0,
+        knn_tie_breaker=0.25,
+        knn_bias=0.1,
+        knn_exponent=1.0,
+        knn_text_exponent=2.0,
+        knn_image_exponent=3.0,
+    )
+
+    debug = fuse_scores_and_resort(hits, [0.8], fusion=fusion, debug=True)
+
+    weighted_text_knn = 0.8
+    weighted_image_knn = 0.5
+    expected_knn = weighted_text_knn + 0.25 * weighted_image_knn
+    expected_fused = (
+        0.8
+        * 2.0
+        * (expected_knn + 0.1)
+        * ((weighted_text_knn + 0.1) ** 2.0)
+        * ((weighted_image_knn + 0.1) ** 3.0)
+    )
+
+    assert isclose(hits[0]["_fused_score"], expected_fused, rel_tol=1e-9)
+    assert isclose(debug[0]["text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9)
+    assert isclose(debug[0]["image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9)
+    assert "weighted_text_knn_score=" in debug[0]["fusion_summary"]
+    assert "weighted_image_knn_score=" in debug[0]["fusion_summary"]
+
+
+def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors():
+    hits = [
+        {
+            "_id": "coarse-a",
+            "_score": 1.0,
+            "matched_queries": {
+                "base_query": 2.0,
+                "knn_query": 0.4,
+                "image_knn_query": 0.5,
+            },
+        }
+    ]
+    fusion = CoarseRankFusionConfig(
+        es_bias=0.0,
+        es_exponent=1.0,
+        text_bias=0.0,
+        text_exponent=1.0,
+        knn_text_weight=2.0,
+        knn_image_weight=1.0,
+        knn_tie_breaker=0.25,
+        knn_bias=0.1,
+        knn_exponent=1.0,
+        knn_text_exponent=2.0,
+        knn_image_exponent=3.0,
+    )
+
+    debug = coarse_resort_hits(hits, fusion=fusion, debug=True)
+
+    weighted_text_knn = 0.8
+    weighted_image_knn = 0.5
+    expected_knn = weighted_text_knn + 0.25 * weighted_image_knn
+    expected_coarse = (
+        1.0
+        * 2.0
+        * (expected_knn + 0.1)
+        * ((weighted_text_knn + 0.1) ** 2.0)
+        * ((weighted_image_knn + 0.1) ** 3.0)
+    )
+
+    assert isclose(hits[0]["_coarse_score"], expected_coarse, rel_tol=1e-9)
+    assert isclose(debug[0]["coarse_text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9)
+    assert isclose(debug[0]["coarse_image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9)
+
+
 def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch):
     hits = [
         {
 from __future__ import annotations
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List
@@ -30,7 +30,10 @@ class _FakeParsedQuery:
     rewritten_query: str
     detected_language: str = "en"
     translations: Dict[str, str] = None
+    keywords_queries: Dict[str, str] = field(default_factory=dict)
     query_vector: Any = None
+    image_query_vector: Any = None
+    query_tokens: List[str] = field(default_factory=list)
     style_intent_profile: Any = None
     def text_for_rerank(self) -> str:
@@ -89,6 +92,15 @@ class _FakeQueryParser:
 class _FakeQueryBuilder:
+    knn_text_k = 120
+    knn_text_k_long = 160
+    knn_text_num_candidates = 400
+    knn_text_num_candidates_long = 500
+    knn_text_boost = 20.0
+    knn_image_k = 120
+    knn_image_num_candidates = 400
+    knn_image_boost = 20.0
+
     def build_query(self, **kwargs):
         return {
             "query": {"match_all": {}},
@@ -185,13 +197,24 @@ class _FakeESClient:
         }
-def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 384):
+def _build_search_config(
+    *,
+    rerank_enabled: bool = True,
+    rerank_window: int = 384,
+    exact_knn_rescore_enabled: bool = False,
+    exact_knn_rescore_window: int = 0,
+):
     return SearchConfig(
         field_boosts={"title.en": 3.0},
         indexes=[IndexConfig(name="default", label="default", fields=["title.en"])],
         query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False),
         function_score=FunctionScoreConfig(),
-        rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window),
+        rerank=RerankConfig(
+            enabled=rerank_enabled,
+            rerank_window=rerank_window,
+            exact_knn_rescore_enabled=exact_knn_rescore_enabled,
+            exact_knn_rescore_window=exact_knn_rescore_window,
+        ),
         spu_config=SPUConfig(enabled=False),
         es_index_name="test_products",
         es_settings={},
@@ -289,7 +312,11 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
         },
         "spu_config": {"enabled": False},
         "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []},
-        "rerank": {"rerank_window": 384},
+        "rerank": {
+            "rerank_window": 384,
+            "exact_knn_rescore_enabled": True,
+            "exact_knn_rescore_window": 160,
+        },
     }
     config_path = tmp_path / "config.yaml"
     config_path.write_text(yaml.safe_dump(config_data), encoding="utf-8")
@@ -298,6 +325,8 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
     loaded = loader.load_config(validate=False)
     assert loaded.rerank.enabled is True
+    assert loaded.rerank.exact_knn_rescore_enabled is True
+    assert loaded.rerank.exact_knn_rescore_window == 160
 def test_config_loader_parses_named_rerank_instances(tmp_path: Path):
@@ -583,7 +612,7 @@ def test_searcher_rerank_prefetch_source_includes_sku_fields_when_style_intent_a
     }
-def test_searcher_skips_rerank_when_request_explicitly_false(monkeypatch):
+def test_searcher_keeps_previous_stage_order_when_request_explicitly_disables_rerank(monkeypatch):
     es_client = _FakeESClient()
     searcher = _build_searcher(_build_search_config(rerank_enabled=True), es_client)
     context = create_request_context(reqid="t2", uid="u2")
@@ -593,28 +622,95 @@ def test_searcher_skips_rerank_when_request_explicitly_false(monkeypatch):
         lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
     )
-    called: Dict[str, int] = {"count": 0}
+    called: Dict[str, int] = {"count": 0, "fine": 0}
+
+    def _fake_run_lightweight_rerank(**kwargs):
+        called["fine"] += 1
+        hits = kwargs["es_hits"]
+        for idx, hit in enumerate(hits):
+            hit["_fine_score"] = float(idx + 1)
+        hits.reverse()
+        return [hit["_fine_score"] for hit in hits], {"stage": "fine"}, []
     def _fake_run_rerank(**kwargs):
         called["count"] += 1
         return kwargs["es_response"], None, []
+    monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
     monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
-    searcher.search(
+    result = searcher.search(
         query="toy",
         tenant_id="162",
         from_=20,
         size=10,
         context=context,
         enable_rerank=False,
+        debug=True,
     )
     assert called["count"] == 0
-    assert es_client.calls[0]["from_"] == 20
-    assert es_client.calls[0]["size"] == 10
-    assert es_client.calls[0]["include_named_queries_score"] is False
-    assert len(es_client.calls) == 1
+    assert called["fine"] == 1
+    assert es_client.calls[0]["from_"] == 0
+    assert es_client.calls[0]["size"] == searcher.config.coarse_rank.input_window
+    assert es_client.calls[0]["include_named_queries_score"] is True
+    assert len(es_client.calls) == 3
+    assert es_client.calls[2]["body"]["query"]["ids"]["values"] == [str(i) for i in range(363, 353, -1)]
+    assert len(result.results) == 10
+    assert [item.spu_id for item in result.results[:3]] == ["363", "362", "361"]
+    assert result.debug_info["rerank"]["enabled"] is False
+    assert result.debug_info["rerank"]["applied"] is False
+    assert result.debug_info["rerank"]["skipped_reason"] == "disabled"
+    assert result.debug_info["per_result"][0]["ranking_funnel"]["rerank"]["rank"] == 21
+
+
+def test_searcher_keeps_previous_stage_order_when_config_disables_rerank(monkeypatch):
+    es_client = _FakeESClient()
+    searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
+    context = create_request_context(reqid="t2b", uid="u2b")
+
+    monkeypatch.setattr(
+        "search.searcher.get_tenant_config_loader",
+        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
+    )
+
+    called: Dict[str, int] = {"count": 0, "fine": 0}
+
+    def _fake_run_lightweight_rerank(**kwargs):
+        called["fine"] += 1
+        hits = kwargs["es_hits"]
+        hits.reverse()
+        for idx, hit in enumerate(hits):
+            hit["_fine_score"] = float(len(hits) - idx)
+        return [hit["_fine_score"] for hit in hits], {"stage": "fine"}, []
+
+    def _fake_run_rerank(**kwargs):
+        called["count"] += 1
+        return kwargs["es_response"], None, []
+
+    monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
+    monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
+
+    result = searcher.search(
+        query="toy",
+        tenant_id="162",
+        from_=0,
+        size=5,
+        context=context,
+        enable_rerank=None,
+        debug=True,
+    )
+
+    assert called["count"] == 0
+    assert called["fine"] == 1
+    assert es_client.calls[0]["from_"] == 0
+    assert es_client.calls[0]["size"] == searcher.config.coarse_rank.input_window
+    assert es_client.calls[0]["include_named_queries_score"] is True
+    assert len(result.results) == 5
+    assert [item.spu_id for item in result.results] == ["383", "382", "381", "380", "379"]
+    assert result.debug_info["rerank"]["enabled"] is False
+    assert result.debug_info["rerank"]["applied"] is False
+    assert result.debug_info["rerank"]["skipped_reason"] == "disabled"
 def test_searcher_skips_rerank_when_page_exceeds_window(monkeypatch):
@@ -919,7 +1015,8 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
 def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeypatch):
     es_client = _FakeESClient(total_hits=3)
-    searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
+    cfg = _build_search_config(rerank_enabled=False)
+    searcher = _build_searcher(cfg, es_client)
     context = create_request_context(reqid="dbg", uid="u-dbg")
     monkeypatch.setattr(
@@ -939,7 +1036,8 @@ def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeyp
     assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"]
     assert result.debug_info["query_analysis"]["query_tokens"] == []
-    assert result.debug_info["es_query_context"]["es_fetch_size"] == 2
+    expected_es_fetch = max(cfg.rerank.rerank_window, cfg.coarse_rank.input_window)
+    assert result.debug_info["es_query_context"]["es_fetch_size"] == expected_es_fetch
     assert result.debug_info["es_response"]["es_score_normalization_factor"] == 3.0
     assert result.debug_info["per_result"][0]["initial_rank"] == 1
     assert result.debug_info["per_result"][0]["final_rank"] == 1
@@ -947,6 +1045,166 @@ def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeyp
     assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0
+def test_searcher_attaches_exact_knn_rescore_for_rank_window(monkeypatch):
+    class _VectorQueryParser:
+        def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any, target_languages: Any = None):
+            return _FakeParsedQuery(
+                original_query=query,
+                query_normalized=query,
+                rewritten_query=query,
+                translations={},
+                query_vector=np.array([0.1, 0.2, 0.3], dtype=np.float32),
+                image_query_vector=np.array([0.4, 0.5, 0.6], dtype=np.float32),
+                query_tokens=["dress", "formal", "spring", "summer", "floral"],
+            )
+
+    es_client = _FakeESClient(total_hits=5)
+    base = _build_search_config(
+        rerank_enabled=True,
+        rerank_window=5,
+        exact_knn_rescore_enabled=True,
+        exact_knn_rescore_window=3,
+    )
+    config = SearchConfig(
+        field_boosts=base.field_boosts,
+        indexes=base.indexes,
+        query_config=QueryConfig(
+            enable_text_embedding=True,
+            enable_query_rewrite=False,
+            text_embedding_field="title_embedding",
+            image_embedding_field="image_embedding.vector",
+        ),
+        function_score=base.function_score,
+        coarse_rank=base.coarse_rank,
+        fine_rank=FineRankConfig(enabled=False, input_window=5, output_window=5),
+        rerank=base.rerank,
+        spu_config=base.spu_config,
+        es_index_name=base.es_index_name,
+        es_settings=base.es_settings,
+    )
+    searcher = Searcher(
+        es_client=es_client,
+        config=config,
+        query_parser=_VectorQueryParser(),
+        image_encoder=SimpleNamespace(),
+    )
+    context = create_request_context(reqid="exact-rescore", uid="u-exact")
+
+    monkeypatch.setattr(
+        "search.searcher.get_tenant_config_loader",
+        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
+    )
+
+    searcher.search(
+        query="dress",
+        tenant_id="162",
+        from_=0,
+        size=2,
+        context=context,
+        enable_rerank=False,
+        debug=True,
+    )
+
+    body = es_client.calls[0]["body"]
+    assert body["rescore"]["window_size"] == 3
+    assert body["rescore"]["query"]["score_mode"] == "total"
+    assert body["rescore"]["query"]["rescore_query_weight"] == 0.0
+    should = body["rescore"]["query"]["rescore_query"]["bool"]["should"]
+    names = []
+    for clause in should:
+        if "script_score" in clause:
+            names.append(clause["script_score"]["_name"])
+        elif "nested" in clause:
+            names.append(clause["nested"]["_name"])
+    assert names == ["exact_text_knn_query", "exact_image_knn_query"]
+    recall_query = body["query"]
+    if "bool" in recall_query and recall_query["bool"].get("must"):
+        recall_query = recall_query["bool"]["must"][0]
+    if "function_score" in recall_query:
+        recall_query = recall_query["function_score"]["query"]
+    recall_should = recall_query["bool"]["should"]
+    text_knn_clause = next(
+        clause["knn"]
+        for clause in recall_should
+        if clause.get("knn", {}).get("_name") == "knn_query"
+    )
+    image_knn_clause = next(
+        clause["nested"]["query"]["knn"]
+        for clause in recall_should
+        if clause.get("nested", {}).get("_name") == "image_knn_query"
+    )
+    exact_text_clause = next(
+        clause["script_score"]
+        for clause in should
+        if clause.get("script_score", {}).get("_name") == "exact_text_knn_query"
+    )
+    exact_image_clause = next(
+        clause["nested"]["query"]["script_score"]
+        for clause in should
+        if clause.get("nested", {}).get("_name") == "exact_image_knn_query"
+    )
+    assert text_knn_clause["boost"] == 28.0
+    assert exact_text_clause["script"]["params"]["boost"] == text_knn_clause["boost"]
+    assert image_knn_clause["boost"] == 20.0
+    assert exact_image_clause["script"]["params"]["boost"] == image_knn_clause["boost"]
+
+
+def test_searcher_skips_exact_knn_rescore_outside_rank_window(monkeypatch):
+    class _VectorQueryParser:
+        def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any, target_languages: Any = None):
+            return _FakeParsedQuery(
+                original_query=query,
+                query_normalized=query,
+                rewritten_query=query,
+                translations={},
+                query_vector=np.array([0.1, 0.2, 0.3], dtype=np.float32),
+            )
+
+    es_client = _FakeESClient(total_hits=20)
+    base = _build_search_config(
+        rerank_enabled=True,
+        rerank_window=5,
+        exact_knn_rescore_enabled=True,
+        exact_knn_rescore_window=4,
+    )
+    config = SearchConfig(
+        field_boosts=base.field_boosts,
+        indexes=base.indexes,
+        query_config=QueryConfig(
+            enable_text_embedding=True,
+            enable_query_rewrite=False,
+            text_embedding_field="title_embedding",
+        ),
+        function_score=base.function_score,
+        coarse_rank=base.coarse_rank,
+        fine_rank=FineRankConfig(enabled=False, input_window=5, output_window=5),
+        rerank=base.rerank,
+        spu_config=base.spu_config,
+        es_index_name=base.es_index_name,
+        es_settings=base.es_settings,
+    )
+    searcher = _build_searcher(config, es_client)
+    searcher.query_parser = _VectorQueryParser()
+    context = create_request_context(reqid="exact-rescore-off", uid="u-exact-off")
+
+    monkeypatch.setattr(
+        "search.searcher.get_tenant_config_loader",
+        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
+    )
+
+    searcher.search(
+        query="dress",
+        tenant_id="162",
+        from_=5,
+        size=2,
+        context=context,
+        enable_rerank=False,
+    )
+
+    body = es_client.calls[0]["body"]
+    assert "rescore" not in body
+
+
 def test_searcher_rerank_rank_change_falls_back_to_coarse_rank_when_fine_disabled(monkeypatch):
     es_client = _FakeESClient(total_hits=5)
     config = _build_search_config(rerank_enabled=True, rerank_window=5)
@@ -970,6 +1228,12 @@ def test_searcher_rerank_rank_change_falls_back_to_coarse_rank_when_fine_disable
         lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
     )
+    fine_called: Dict[str, int] = {"count": 0}
+
+    def _fake_run_lightweight_rerank(**kwargs):
+        fine_called["count"] += 1
+        return [], {"stage": "fine"}, []
+
     def _fake_run_rerank(**kwargs):
         hits = kwargs["es_response"]["hits"]["hits"]
         hits.reverse()
@@ -994,6 +1258,7 @@ def test_searcher_rerank_rank_change_falls_back_to_coarse_rank_when_fine_disable
             )
         return kwargs["es_response"], {"model": "final-reranker"}, fused_debug
+    monkeypatch.setattr("search.rerank_client.run_lightweight_rerank", _fake_run_lightweight_rerank)
     monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
     result = searcher.search(
@@ -1008,7 +1273,12 @@ def test_searcher_rerank_rank_change_falls_back_to_coarse_rank_when_fine_disable
     per_result = {row["spu_id"]: row for row in result.debug_info["per_result"]}
     moved = per_result["4"]["ranking_funnel"]
-    assert moved["fine_rank"]["rank"] is None
+    assert fine_called["count"] == 0
+    assert result.debug_info["fine_rank"]["enabled"] is False
+    assert result.debug_info["fine_rank"]["applied"] is False
+    assert result.debug_info["fine_rank"]["skipped_reason"] == "disabled"
+    assert moved["fine_rank"]["rank"] == 5
+    assert moved["fine_rank"]["rank_change"] == 0
     assert moved["rerank"]["rank"] == 1
     assert moved["rerank"]["rank_change"] == 4
     assert moved["final_page"]["rank_change"] == 0
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import sys
+import types
+
+import pytest
+
+import translation.ct2_conversion as ct2_conversion
+
+
+class _FakeTransformersConverter:
+    def __init__(self, model_name_or_path):
+        self.model_name_or_path = model_name_or_path
+        self.load_calls = []
+
+    def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
+        self.load_calls.append(
+            {
+                "model_class": model_class,
+                "resolved_model_name_or_path": resolved_model_name_or_path,
+                "kwargs": dict(kwargs),
+            }
+        )
+        if "dtype" in kwargs or "torch_dtype" in kwargs:
+            raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'")
+        return {"loaded": True, "path": resolved_model_name_or_path}
+
+    def convert(self, output_dir, quantization=None, force=False):
+        loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32")
+        return {
+            "loaded": loaded,
+            "output_dir": output_dir,
+            "quantization": quantization,
+            "force": force,
+            "load_calls": list(self.load_calls),
+        }
+
+
+def _install_fake_ctranslate2(monkeypatch, base_converter):
+    converters_module = types.ModuleType("ctranslate2.converters")
+    converters_module.TransformersConverter = base_converter
+    ctranslate2_module = types.ModuleType("ctranslate2")
+    ctranslate2_module.converters = converters_module
+
+    monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module)
+    monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module)
+
+
+def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch):
+    _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter)
+    fake_transformers = types.ModuleType("transformers")
+    fake_transformers.AutoConfig = types.SimpleNamespace(
+        from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path)
+    )
+    monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
+
+    result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
+
+    assert result["loaded"] == {"loaded": True, "path": "fake-model"}
+    assert result["output_dir"] == "/tmp/out"
+    assert result["quantization"] == "float16"
+    assert result["force"] is False
+    assert len(result["load_calls"]) == 2
+    assert result["load_calls"][0] == {
+        "model_class": "FakeModel",
+        "resolved_model_name_or_path": "fake-model",
+        "kwargs": {"dtype": "float32"},
+    }
+    assert result["load_calls"][1]["model_class"] == "FakeModel"
+    assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model"
+    assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None
+
+
+def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch):
+    class _AlwaysFailingConverter(_FakeTransformersConverter):
+        def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
+            raise TypeError("different constructor error")
+
+    _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter)
+    fake_transformers = types.ModuleType("transformers")
+    fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path))
+    monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
+
+    with pytest.raises(TypeError, match="different constructor error"):
+        ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
@@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch):
     assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]]
+def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch):
+    wrong_dir = tmp_path / "wrong-nllb"
+    wrong_dir.mkdir()
+    (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8")
+
+    monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
+
+    backend = NLLBCTranslate2TranslationBackend(
+        name="nllb-200-distilled-600m",
+        model_id="facebook/nllb-200-distilled-600M",
+        model_dir=str(wrong_dir),
+        device="cpu",
+        torch_dtype="float32",
+        batch_size=1,
+        max_input_length=16,
+        max_new_tokens=16,
+        num_beams=1,
+    )
+
+    assert backend._model_source() == "facebook/nllb-200-distilled-600M"
+    assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M"
+
+
+def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch):
+    incomplete_dir = tmp_path / "incomplete-nllb"
+    incomplete_dir.mkdir()
+    (incomplete_dir / "ctranslate2-float16").mkdir()
+
+    monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
+
+    backend = NLLBCTranslate2TranslationBackend(
+        name="nllb-200-distilled-600m",
+        model_id="facebook/nllb-200-distilled-600M",
+        model_dir=str(incomplete_dir),
+        device="cpu",
+        torch_dtype="float32",
+        batch_size=1,
+        max_input_length=16,
+        max_new_tokens=16,
+        num_beams=1,
+    )
+
+    assert backend._model_source() == "facebook/nllb-200-distilled-600M"
+
+
 def test_nllb_resolves_flores_short_tags_and_iso_no():
     cat = build_nllb_language_catalog(None)
     assert resolve_nllb_language_code("ca", cat) == "cat_Latn"
@@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog):
     ]
+def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch):
+    monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
+
+    def _fake_create_backend(self, *, name, backend_type, cfg):
+        del self, backend_type, cfg
+        if name == "broken-nllb":
+            raise RuntimeError("broken model dir")
+
+        class _Backend:
+            model = name
+
+            @property
+            def supports_batch(self):
+                return True
+
+            def translate(self, text, target_lang, source_lang=None, scene=None):
+                del target_lang, source_lang, scene
+                return text
+
+        return _Backend()
+
+    monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
+    service = TranslationService(
+        {
+            "service_url": "http://127.0.0.1:6006",
+            "timeout_sec": 10.0,
+            "default_model": "llm",
+            "default_scene": "general",
+            "capabilities": {
+                "llm": {
+                    "enabled": True,
+                    "backend": "llm",
+                    "model": "dummy-llm",
+                    "base_url": "https://example.com",
+                    "timeout_sec": 10.0,
+                    "use_cache": True,
+                },
+                "broken-nllb": {
+                    "enabled": True,
+                    "backend": "local_nllb",
+                    "model_id": "dummy",
+                    "model_dir": "dummy",
+                    "device": "cpu",
+                    "torch_dtype": "float32",
+                    "batch_size": 8,
+                    "max_input_length": 16,
+                    "max_new_tokens": 16,
+                    "num_beams": 1,
+                    "use_cache": True,
+                },
+            },
+            "cache": {
+                "ttl_seconds": 60,
+                "sliding_expiration": True,
+            },
+        }
+    )
+
+    assert service.available_models == ["llm", "broken-nllb"]
+    assert service.loaded_models == ["llm"]
+    assert service.failed_models == ["broken-nllb"]
+    assert service.backend_errors["broken-nllb"] == "broken model dir"
+
+    with pytest.raises(RuntimeError, match="failed to initialize"):
+        service.get_backend("broken-nllb")
+
+
 def test_translation_cache_probe_models_order():
     cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}}
     assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"]
-Subproject commit 03410570d4398084f5ca5c88ad968248e0f3fc5d
+Subproject commit 4450c293368655449f14b5fc89e1d06e28d7f307
@@ -11,9 +11,9 @@
 相关脚本与报告：
 - 启动脚本：[`scripts/start_translator.sh`](/data/saas-search/scripts/start_translator.sh)
 - 虚拟环境：[`scripts/setup_translator_venv.sh`](/data/saas-search/scripts/setup_translator_venv.sh)
-- 模型下载：[`scripts/download_translation_models.py`](/data/saas-search/scripts/download_translation_models.py)
-- 本地模型压测：[`scripts/benchmark_translation_local_models.py`](/data/saas-search/scripts/benchmark_translation_local_models.py)
-- 聚焦压测脚本：[`scripts/benchmark_translation_local_models_focus.py`](/data/saas-search/scripts/benchmark_translation_local_models_focus.py)
+- 模型下载：[`scripts/translation/download_translation_models.py`](/data/saas-search/scripts/translation/download_translation_models.py)
+- 本地模型压测：[`benchmarks/translation/benchmark_translation_local_models.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models.py)
+- 聚焦压测脚本：[`benchmarks/translation/benchmark_translation_local_models_focus.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models_focus.py)
 - 基线性能报告：[`perf_reports/20260318/translation_local_models/README.md`](/data/saas-search/perf_reports/20260318/translation_local_models/README.md)
 - CT2 扩展报告：[`perf_reports/20260318/translation_local_models_ct2/README.md`](/data/saas-search/perf_reports/20260318/translation_local_models_ct2/README.md)
 - CT2 聚焦调优报告：[`perf_reports/20260318/translation_local_models_ct2_focus/README.md`](/data/saas-search/perf_reports/20260318/translation_local_models_ct2_focus/README.md)
@@ -493,7 +493,7 @@ cd /data/saas-search
 下载全部本地模型：
 ```bash
-./.venv-translator/bin/python scripts/download_translation_models.py --all-local
+./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local
 ```
 下载完成后，默认目录应存在：
@@ -550,8 +550,8 @@ curl -X POST http://127.0.0.1:6006/translate \
 - 切换到 CTranslate2 后需要重新跑一轮基准，尤其关注 `nllb-200-distilled-600m` 的单条延迟、并发 tail latency 和 `opus-mt-*` 的 batch throughput。
 性能脚本：
-- [`scripts/benchmark_translation_local_models.py`](/data/saas-search/scripts/benchmark_translation_local_models.py)
-- [`scripts/benchmark_translation_local_models_focus.py`](/data/saas-search/scripts/benchmark_translation_local_models_focus.py)
+- [`benchmarks/translation/benchmark_translation_local_models.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models.py)
+- [`benchmarks/translation/benchmark_translation_local_models_focus.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models_focus.py)
 数据集：
 - [`products_analyzed.csv`](/data/saas-search/products_analyzed.csv)
@@ -601,14 +601,14 @@ curl -X POST http://127.0.0.1:6006/translate \
 ```bash
 cd /data/saas-search
-./.venv-translator/bin/python scripts/benchmark_translation_local_models.py
+./.venv-translator/bin/python benchmarks/translation/benchmark_translation_local_models.py
 ```
 本轮扩展压测复现命令：
 ```bash
 cd /data/saas-search
-./.venv-translator/bin/python scripts/benchmark_translation_local_models.py \
+./.venv-translator/bin/python benchmarks/translation/benchmark_translation_local_models.py \
   --suite extended \
   --disable-cache \
   --serial-items-per-case 256 \
@@ -620,7 +620,7 @@ cd /data/saas-search
 单模型扩展压测示例：
 ```bash
-./.venv-translator/bin/python scripts/benchmark_translation_local_models.py \
+./.venv-translator/bin/python benchmarks/translation/benchmark_translation_local_models.py \
   --single \
   --suite extended \
   --model opus-mt-zh-en \
@@ -639,7 +639,7 @@ cd /data/saas-search
 单条请求延迟复现：
 ```bash
-./.venv-translator/bin/python scripts/benchmark_translation_local_models.py \
+./.venv-translator/bin/python benchmarks/translation/benchmark_translation_local_models.py \
   --single \
   --suite extended \
   --model nllb-200-distilled-600m \
@@ -4,9 +4,7 @@ from __future__ import annotations
 import logging
 import os
-import shutil
-import subprocess
-import sys
+import json
 import threading
 from pathlib import Path
 from typing import Dict, List, Optional, Sequence, Union
@@ -24,6 +22,7 @@ from translation.text_splitter import (
     join_translated_segments,
     split_text_for_translation,
 )
+from translation.ct2_conversion import convert_transformers_model
 logger = logging.getLogger(__name__)
@@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str:
     return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}")
-def _resolve_converter_binary() -> str:
-    candidate = shutil.which("ct2-transformers-converter")
-    if candidate:
-        return candidate
-    venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
-    if venv_candidate.exists():
-        return str(venv_candidate)
-    raise RuntimeError(
-        "ct2-transformers-converter was not found. "
-        "Ensure ctranslate2 is installed in the active translator environment."
-    )
+def _detect_local_model_type(model_dir: str) -> Optional[str]:
+    config_path = Path(model_dir).expanduser() / "config.json"
+    if not config_path.exists():
+        return None
+    try:
+        with open(config_path, "r", encoding="utf-8") as handle:
+            payload = json.load(handle) or {}
+    except Exception as exc:
+        logger.warning("Failed to inspect local translation config %s: %s", config_path, exc)
+        return None
+    model_type = str(payload.get("model_type") or "").strip().lower()
+    return model_type or None
 class LocalCTranslate2TranslationBackend:
@@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend:
         self.ct2_decoding_length_extra = int(ct2_decoding_length_extra)
         self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min))
         self._tokenizer_lock = threading.Lock()
+        self._local_model_source = self._resolve_local_model_source()
         self._load_runtime()
     @property
@@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend:
         return True
     def _tokenizer_source(self) -> str:
-        return self.model_dir if os.path.exists(self.model_dir) else self.model_id
+        return self._local_model_source or self.model_id
     def _model_source(self) -> str:
-        return self.model_dir if os.path.exists(self.model_dir) else self.model_id
+        return self._local_model_source or self.model_id
+
+    def _expected_local_model_types(self) -> Optional[set[str]]:
+        return None
+
+    def _resolve_local_model_source(self) -> Optional[str]:
+        model_path = Path(self.model_dir).expanduser()
+        if not model_path.exists():
+            return None
+        if not (model_path / "config.json").exists():
+            logger.warning(
+                "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id",
+                self.model,
+                model_path,
+            )
+            return None
+
+        expected_types = self._expected_local_model_types()
+        if not expected_types:
+            return str(model_path)
+
+        detected_type = _detect_local_model_type(str(model_path))
+        if detected_type is None:
+            return str(model_path)
+        if detected_type in expected_types:
+            return str(model_path)
+
+        logger.warning(
+            "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id",
+            self.model,
+            model_path,
+            detected_type,
+            sorted(expected_types),
+        )
+        return None
     def _tokenizer_kwargs(self) -> Dict[str, object]:
         return {}
@@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend:
             )
         ct2_path.parent.mkdir(parents=True, exist_ok=True)
-        converter = _resolve_converter_binary()
         logger.info(
             "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s",
             self.model,
@@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend:
             self.ct2_conversion_quantization,
         )
         try:
-            subprocess.run(
-                [
-                    converter,
-                    "--model",
-                    model_source,
-                    "--output_dir",
-                    str(ct2_path),
-                    "--quantization",
-                    self.ct2_conversion_quantization,
-                ],
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
+            convert_transformers_model(
+                model_source,
+                str(ct2_path),
+                self.ct2_conversion_quantization,
             )
-        except subprocess.CalledProcessError as exc:
-            stderr = exc.stderr.strip()
+        except Exception as exc:
             raise RuntimeError(
-                f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}"
+                f"Failed to convert model '{self.model}' to CTranslate2: {exc}"
             ) from exc
     def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]:
@@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
                 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}"
             )
+    def _expected_local_model_types(self) -> Optional[set[str]]:
+        return {"marian"}
+
 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
     """Local backend for NLLB models on CTranslate2."""
@@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
         if resolve_nllb_language_code(target_lang, self.language_codes) is None:
             raise ValueError(f"Unsupported NLLB target language: {target_lang}")
+    def _expected_local_model_types(self) -> Optional[set[str]]:
+        return {"m2m_100", "nllb_moe"}
+
     def _get_tokenizer_for_source(self, source_lang: str):
         src_code = resolve_nllb_language_code(source_lang, self.language_codes)
         if src_code is None:
@@ -0,0 +1,52 @@
+"""Helpers for converting Hugging Face translation models to CTranslate2."""
+
+from __future__ import annotations
+
+import copy
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def convert_transformers_model(
+    model_name_or_path: str,
+    output_dir: str,
+    quantization: str,
+    *,
+    force: bool = False,
+) -> str:
+    from ctranslate2.converters import TransformersConverter
+    from transformers import AutoConfig
+
+    class _CompatibleTransformersConverter(TransformersConverter):
+        def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
+            try:
+                return super().load_model(model_class, resolved_model_name_or_path, **kwargs)
+            except TypeError as exc:
+                if "unexpected keyword argument 'dtype'" not in str(exc):
+                    raise
+                if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None:
+                    raise
+
+                logger.warning(
+                    "Retrying CTranslate2 model load without dtype hints | model=%s class=%s",
+                    resolved_model_name_or_path,
+                    getattr(model_class, "__name__", model_class),
+                )
+                retry_kwargs = dict(kwargs)
+                retry_kwargs.pop("dtype", None)
+                retry_kwargs.pop("torch_dtype", None)
+                config = retry_kwargs.get("config")
+                if config is None:
+                    config = AutoConfig.from_pretrained(resolved_model_name_or_path)
+                else:
+                    config = copy.deepcopy(config)
+                if hasattr(config, "dtype"):
+                    config.dtype = None
+                if hasattr(config, "torch_dtype"):
+                    config.torch_dtype = None
+                retry_kwargs["config"] = config
+                return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs)
+
+    converter = _CompatibleTransformersConverter(model_name_or_path)
+    return converter.convert(output_dir=output_dir, quantization=quantization, force=force)
@@ -31,7 +31,12 @@ class TranslationService:
         if not self._enabled_capabilities:
             raise ValueError("No enabled translation backends found in services.translation.capabilities")
         self._translation_cache = TranslationCache(self.config["cache"])
-        self._backends = self._initialize_backends()
+        self._backends: Dict[str, TranslationBackendProtocol] = {}
+        self._backend_errors: Dict[str, str] = {}
+        self._initialize_backends()
+        if not self._backends:
+            details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error"
+            raise RuntimeError(f"No translation backends could be initialized: {details}")
     def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
         enabled: Dict[str, Dict[str, object]] = {}
@@ -62,24 +67,47 @@ class TranslationService:
             raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
         return factory(name=name, cfg=cfg)
-    def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:
-        backends: Dict[str, TranslationBackendProtocol] = {}
-        for name, capability_cfg in self._enabled_capabilities.items():
-            backend_type = str(capability_cfg["backend"])
-            logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
-            backends[name] = self._create_backend(
+    def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]:
+        capability_cfg = self._enabled_capabilities.get(name)
+        if capability_cfg is None:
+            return None
+        if name in self._backends:
+            return self._backends[name]
+
+        backend_type = str(capability_cfg["backend"])
+        logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
+        try:
+            backend = self._create_backend(
                 name=name,
                 backend_type=backend_type,
                 cfg=capability_cfg,
             )
-            logger.info(
-                "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
+        except Exception as exc:
+            error_text = str(exc).strip() or exc.__class__.__name__
+            self._backend_errors[name] = error_text
+            logger.error(
+                "Translation backend initialization failed | model=%s backend=%s error=%s",
                 name,
                 backend_type,
-                bool(capability_cfg.get("use_cache")),
-                getattr(backends[name], "model", name),
+                error_text,
+                exc_info=True,
             )
-        return backends
+            return None
+
+        self._backends[name] = backend
+        self._backend_errors.pop(name, None)
+        logger.info(
+            "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
+            name,
+            backend_type,
+            bool(capability_cfg.get("use_cache")),
+            getattr(backend, "model", name),
+        )
+        return backend
+
+    def _initialize_backends(self) -> None:
+        for name, capability_cfg in self._enabled_capabilities.items():
+            self._load_backend(name)
     def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
         from translation.backends.qwen_mt import QwenMTTranslationBackend
@@ -178,13 +206,27 @@ class TranslationService:
     def loaded_models(self) -> List[str]:
         return list(self._backends.keys())
+    @property
+    def failed_models(self) -> List[str]:
+        return list(self._backend_errors.keys())
+
+    @property
+    def backend_errors(self) -> Dict[str, str]:
+        return dict(self._backend_errors)
+
     def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
         normalized = normalize_translation_model(self.config, model)
-        backend = self._backends.get(normalized)
+        backend = self._backends.get(normalized) or self._load_backend(normalized)
         if backend is None:
-            raise ValueError(
-                f"Translation model '{normalized}' is not enabled. "
-                f"Available models: {', '.join(self.available_models) or 'none'}"
+            if normalized not in self._enabled_capabilities:
+                raise ValueError(
+                    f"Translation model '{normalized}' is not enabled. "
+                    f"Available models: {', '.join(self.available_models) or 'none'}"
+                )
+            error_text = self._backend_errors.get(normalized) or "unknown initialization error"
+            raise RuntimeError(
+                f"Translation model '{normalized}' failed to initialize: {error_text}. "
+                f"Loaded models: {', '.join(self.loaded_models) or 'none'}"
             )
         return backend