From 2efad04b21072915abc94097289cee2cb1287888 Mon Sep 17 00:00:00 2001
From: tangwang <tangwang@hsyl>
Date: Tue, 24 Mar 2026 15:42:46 +0800
Subject: [PATCH] 意图匹配的性能优化： 上面一版实现，性能上完全无法接受。因此进行了一轮策略简化

---
 context/request_context.py         |   2 --
 docs/TODO-意图判断.md          |  46 ++++++++++++++++++++++++++++++++++++++++++++++
 frontend/static/js/app.js          |  44 +++++++++++++++++++++++++++++++++++++++++++-
 search/searcher.py                 |   7 ++-----
 search/sku_intent_selector.py      | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------------------------------------------------------------------------------------
 tests/test_search_rerank_window.py | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/context/request_context.py b/context/request_context.py
index 22d6875..4539ad4 100644
--- a/context/request_context.py
+++ b/context/request_context.py
@@ -42,7 +42,6 @@ class QueryAnalysisResult:
     query_vector: Optional[List[float]] = None
     boolean_ast: Optional[str] = None
     is_simple_query: bool = True
-    domain: str = "default"
 
 
 @dataclass
@@ -281,7 +280,6 @@ class RequestContext:
                 'query_normalized': self.query_analysis.query_normalized,
                 'rewritten_query': self.query_analysis.rewritten_query,
                 'detected_language': self.query_analysis.detected_language,
-                'domain': self.query_analysis.domain,
                 'is_simple_query': self.query_analysis.is_simple_query
             },
             'performance': {
diff --git a/docs/TODO-意图判断.md b/docs/TODO-意图判断.md
index 4686d4d..ce70c55 100644
--- a/docs/TODO-意图判断.md
+++ b/docs/TODO-意图判断.md
@@ -51,3 +51,49 @@ QueryParser 负责产出统一的“款式意图分析结果”，包含：命�
 Searcher 只编排：解析 query -> 首次 ES 召回 -> 若有款式意图则对 rerank 窗口内全部 hits 做 SKU 预筛选并补充 rerank doc 文本 -> run_rerank -> 分页 -> page fill -> ResultFormatter。这样旧的页内 SKU 置顶逻辑可以下沉或删除，避免前后两个阶段重复做同一件事。我再补看一下你最近写的意图草稿文档，确认有没有现成词表示例或边界条件，避免我这边把词表格式和你预期做偏。
 
 
+
+
+
+意图匹配的性能优化：
+上面一版实现，性能上完全无法接受。因此进行了一轮策略简化
+
+prompt
+style_sku_prepare_hits阶段耗时太长。请根据需求，思考优化的方法，给出性能优化的方案。
+1. _select_by_embedding，有缓存吗，option_value的值是有限的，之前已经算过的，就不用再算了。不仅仅是embedding相似的结果，整个option_value的匹配结果，是有包含、还是没包含，相似度多少，都不用重新计算。比如之前已经有一个sku的某个属性值叫做“卡其色”，已经算出来是否文本匹配了，那么不需要再去做文本匹配。如果已经算出来向量的相似度，那么不需要再去取向量以及计算相似度。
+2. 匹配可以适当的优化：
+匹配流程简化：
+1）找到第一个文本匹配的，如果有直接匹配成功。不需要考虑匹配多个的情况。
+2）如果全部都没有匹配，那么进行embedding筛选。
+
+匹配规则：
+option_name的匹配，直接看规范化后的option_name是不是意图维度的泛化词之一（比如颜色、color、colour），如果没有匹配的，现在应该是把所有维度都算上，这样匹配成本和比较成本太高了，去掉这些逻辑，这种情况不需要加后缀、不需要选择sku。
+ption_value的匹配。意图检测的时候，有匹配的query中的命中的词，这个词被包含在属性值中，那么就算匹配。属性值被包含在query（包括翻译文本）中，也算匹配。提高匹配的覆盖率。
+
+3. 这一阶段得到sku选择的结果即可（选中的sku的id，也可以为空值表示没找到匹配成功的，这种情况不需要拼接title后缀给重排输入），但是不用着急做image_url的替换和sku的置顶。等最后填充的时候判断有选中sku的时候直接做替换和置顶即可。
+请你思考如何进行设计，提高性能的时候不带来复杂度的提升，可以适当的重构以降低修改后的代码行数。
+@search/sku_intent_selector.py @query/style_intent.py 
+
+
+修改结果：
+核心变化在 `search/sku_intent_selector.py` 和 `search/searcher.py`。
+
+现在的策略是：
+- `option_name` 只做规范化后 alias 精确匹配；任何意图维度没 resolve 到字段，就直接不选 SKU、不加 suffix。
+- `option_value` 文本匹配改成顺序扫描 SKU，找到第一个命中的就直接返回，不再收集多个候选再比较。
+- 文本匹配规则支持两种：
+  - query 命中的意图词被属性值包含
+  - 属性值被 query/translation 包含
+- 只有当所有 SKU 都没有文本命中时，才做 embedding 选择。
+- `prepare_hits()` 现在只产出决策和 `_style_rerank_suffix`，不再提前改 `_source`；真正的 `image_url` 替换和 SKU 置顶统一后移到 `apply_precomputed_decisions()`。
+- 加了请求级缓存，避免同一个 `option_value`/`selection_text` 在一次请求里重复做文本判断、取向量和算相似度。
+- 顺手删掉了旧的 generalized match / fallback_text 路径，代码比之前更短也更直。
+
+另外补了回归测试，覆盖了：
+- 翻译文本命中后正确选 SKU
+- 多个文本命中时取第一个
+- `option_name` 不命中 alias 时不做 SKU 选择
+- 无文本命中时走 embedding fallback
+
+验证过：
+- `pytest tests/test_search_rerank_window.py -q` 通过
+- 变更文件 lint 无报错
diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js
index a8a0630..23363ce 100644
--- a/frontend/static/js/app.js
+++ b/frontend/static/js/app.js
@@ -885,6 +885,40 @@ function goToPage(page) {
     window.scrollTo({ top: 0, behavior: 'smooth' });
 }
 
+/** Query-analysis intent block: dimensions, matched surface form, canonical value, source query variant. */
+function formatIntentDetectionHtml(intent) {
+    const profile = intent || null;
+    let block = '<div style="margin-top: 10px;"><strong style="font-size: 13px;">intent_detection:</strong></div>';
+    if (!profile || typeof profile !== 'object') {
+        block += '<div>(no intent payload — style intent may be disabled or context missing)</div>';
+        return block;
+    }
+    const active = !!profile.active;
+    block += `<div>active: ${active ? 'yes' : 'no'}</div>`;
+    const intents = Array.isArray(profile.intents) ? profile.intents : [];
+    if (!intents.length) {
+        block += '<div>intents: (none — no vocabulary match on query variants)</div>';
+        return block;
+    }
+    block += '<div style="margin-top: 4px;">intents:</div><ul style="margin: 4px 0 8px 20px; padding: 0;">';
+    for (const it of intents) {
+        const aliases = Array.isArray(it.dimension_aliases) ? it.dimension_aliases.join(', ') : '';
+        block += '<li style="margin-bottom: 6px;">';
+        block += `<div><strong>intent_type</strong>: ${escapeHtml(it.intent_type || '')}</div>`;
+        block += `<div><strong>dimension_aliases</strong>: ${escapeHtml(aliases || 'N/A')}</div>`;
+        block += `<div><strong>matched_term</strong>: ${escapeHtml(it.matched_term || '')}</div>`;
+        block += `<div><strong>canonical_value</strong>: ${escapeHtml(it.canonical_value || '')}</div>`;
+        block += `<div><strong>matched_query_text</strong>: ${escapeHtml(it.matched_query_text || '')}</div>`;
+        block += '</li>';
+    }
+    block += '</ul>';
+    if (Array.isArray(profile.query_variants) && profile.query_variants.length > 0) {
+        block += '<div style="margin-top: 6px;"><strong>query_variants</strong>:</div>';
+        block += `<pre style="background: #f5f5f5; padding: 8px; overflow: auto; max-height: 200px; margin-top: 4px;">${escapeHtml(customStringify(profile.query_variants))}</pre>`;
+    }
+    return block;
+}
+
 // Display debug info
 function displayDebugInfo(data) {
     const debugInfoDiv = document.getElementById('debugInfo');
@@ -916,7 +950,6 @@ function displayDebugInfo(data) {
         html += `<div>detected_language: ${escapeHtml(debugInfo.query_analysis.detected_language || 'N/A')}</div>`;
         html += `<div>index_languages: ${escapeHtml((debugInfo.query_analysis.index_languages || []).join(', ') || 'N/A')}</div>`;
         html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`;
-        html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`;
         html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`;
         
         if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) {
@@ -932,6 +965,9 @@ function displayDebugInfo(data) {
         if (debugInfo.query_analysis.boolean_ast) {
             html += `<div>boolean_ast: ${escapeHtml(debugInfo.query_analysis.boolean_ast)}</div>`;
         }
+
+        const intentPayload = debugInfo.query_analysis.intent_detection ?? debugInfo.query_analysis.style_intent_profile;
+        html += formatIntentDetectionHtml(intentPayload);
         
         html += '</div>';
     }
@@ -942,6 +978,12 @@ function displayDebugInfo(data) {
         html += `<div>translation_enabled: ${debugInfo.feature_flags.translation_enabled ? 'enabled' : 'disabled'}</div>`;
         html += `<div>embedding_enabled: ${debugInfo.feature_flags.embedding_enabled ? 'enabled' : 'disabled'}</div>`;
         html += `<div>rerank_enabled: ${debugInfo.feature_flags.rerank_enabled ? 'enabled' : 'disabled'}</div>`;
+        if (debugInfo.feature_flags.style_intent_enabled !== undefined) {
+            html += `<div>style_intent_enabled: ${debugInfo.feature_flags.style_intent_enabled ? 'enabled' : 'disabled'}</div>`;
+        }
+        if (debugInfo.feature_flags.style_intent_active !== undefined) {
+            html += `<div>style_intent_active: ${debugInfo.feature_flags.style_intent_active ? 'yes' : 'no'}</div>`;
+        }
         html += '</div>';
     }
     
diff --git a/search/searcher.py b/search/searcher.py
index e3e7138..d7bccea 100644
--- a/search/searcher.py
+++ b/search/searcher.py
@@ -119,7 +119,6 @@ class Searcher:
         self.style_sku_selector = StyleSkuSelector(
             self.style_intent_registry,
             text_encoder_getter=lambda: getattr(self.query_parser, "text_encoder", None),
-            tokenizer_getter=lambda: getattr(self.query_parser, "_tokenizer", None),
         )
 
         # Query builder - simplified single-layer architecture
@@ -397,7 +396,6 @@ class Searcher:
                 detected_language=parsed_query.detected_language,
                 translations=parsed_query.translations,
                 query_vector=parsed_query.query_vector.tolist() if parsed_query.query_vector is not None else None,
-                domain="default",
                 is_simple_query=True
             )
             context.metadata["feature_flags"]["style_intent_active"] = self._has_style_intent(parsed_query)
@@ -732,7 +730,7 @@ class Searcher:
                     rerank_debug_by_doc[str(doc_id)] = item
 
             if self._has_style_intent(parsed_query):
-                if in_rerank_window and style_intent_decisions:
+                if style_intent_decisions:
                     self.style_sku_selector.apply_precomputed_decisions(
                         es_hits,
                         style_intent_decisions,
@@ -867,8 +865,7 @@ class Searcher:
                     "has_vector": context.query_analysis.query_vector is not None,
                     "query_tokens": getattr(parsed_query, "query_tokens", []),
                     "is_simple_query": context.query_analysis.is_simple_query,
-                    "domain": context.query_analysis.domain,
-                    "style_intent_profile": context.get_intermediate_result("style_intent_profile"),
+                    "intent_detection": context.get_intermediate_result("style_intent_profile"),
                 },
                 "es_query": context.get_intermediate_result('es_query', {}),
                 "es_query_context": {
diff --git a/search/sku_intent_selector.py b/search/sku_intent_selector.py
index c832573..4f9216a 100644
--- a/search/sku_intent_selector.py
+++ b/search/sku_intent_selector.py
@@ -5,7 +5,7 @@ SKU selection for style-intent-aware search results.
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
 
 import numpy as np
 
@@ -39,7 +39,18 @@ class _SkuCandidate:
     sku_id: str
     sku: Dict[str, Any]
     selection_text: str
-    intent_texts: Dict[str, str]
+    normalized_selection_text: str
+    intent_values: Dict[str, str]
+
+
+@dataclass
+class _SelectionContext:
+    query_texts: Tuple[str, ...]
+    matched_terms_by_intent: Dict[str, Tuple[str, ...]]
+    query_vector: Optional[np.ndarray]
+    text_match_cache: Dict[Tuple[str, str], bool] = field(default_factory=dict)
+    selection_vector_cache: Dict[str, Optional[np.ndarray]] = field(default_factory=dict)
+    similarity_cache: Dict[str, Optional[float]] = field(default_factory=dict)
 
 
 class StyleSkuSelector:
@@ -50,11 +61,9 @@ class StyleSkuSelector:
         registry: StyleIntentRegistry,
         *,
         text_encoder_getter: Optional[Callable[[], Any]] = None,
-        tokenizer_getter: Optional[Callable[[], Any]] = None,
     ) -> None:
         self.registry = registry
         self._text_encoder_getter = text_encoder_getter
-        self._tokenizer_getter = tokenizer_getter
 
     def prepare_hits(
         self,
@@ -66,9 +75,7 @@ class StyleSkuSelector:
         if not isinstance(style_profile, StyleIntentProfile) or not style_profile.is_active:
             return decisions
 
-        query_texts = self._build_query_texts(parsed_query, style_profile)
-        query_vector = self._get_query_vector(parsed_query)
-        tokenizer = self._get_tokenizer()
+        selection_context = self._build_selection_context(parsed_query, style_profile)
 
         for hit in es_hits:
             source = hit.get("_source")
@@ -78,16 +85,15 @@ class StyleSkuSelector:
             decision = self._select_for_source(
                 source,
                 style_profile=style_profile,
-                query_texts=query_texts,
-                query_vector=query_vector,
-                tokenizer=tokenizer,
+                selection_context=selection_context,
             )
             if decision is None:
                 continue
 
-            self._apply_decision_to_source(source, decision)
             if decision.rerank_suffix:
                 hit["_style_rerank_suffix"] = decision.rerank_suffix
+            else:
+                hit.pop("_style_rerank_suffix", None)
 
             doc_id = hit.get("_id")
             if doc_id is not None:
@@ -116,6 +122,8 @@ class StyleSkuSelector:
             self._apply_decision_to_source(source, decision)
             if decision.rerank_suffix:
                 hit["_style_rerank_suffix"] = decision.rerank_suffix
+            else:
+                hit.pop("_style_rerank_suffix", None)
 
     def _build_query_texts(
         self,
@@ -165,25 +173,34 @@ class StyleSkuSelector:
             return None
         return np.asarray(vectors[0], dtype=np.float32)
 
+    def _build_selection_context(
+        self,
+        parsed_query: Any,
+        style_profile: StyleIntentProfile,
+    ) -> _SelectionContext:
+        matched_terms_by_intent: Dict[str, List[str]] = {}
+        for intent in style_profile.intents:
+            normalized_term = normalize_query_text(intent.matched_term)
+            if not normalized_term:
+                continue
+            matched_terms = matched_terms_by_intent.setdefault(intent.intent_type, [])
+            if normalized_term not in matched_terms:
+                matched_terms.append(normalized_term)
+
+        return _SelectionContext(
+            query_texts=tuple(self._build_query_texts(parsed_query, style_profile)),
+            matched_terms_by_intent={
+                intent_type: tuple(terms)
+                for intent_type, terms in matched_terms_by_intent.items()
+            },
+            query_vector=self._get_query_vector(parsed_query),
+        )
+
     def _get_text_encoder(self) -> Any:
         if self._text_encoder_getter is None:
             return None
         return self._text_encoder_getter()
 
-    def _get_tokenizer(self) -> Any:
-        if self._tokenizer_getter is None:
-            return None
-        return self._tokenizer_getter()
-
-    @staticmethod
-    def _fallback_sku_text(sku: Dict[str, Any]) -> str:
-        parts = []
-        for field_name in ("option1_value", "option2_value", "option3_value"):
-            value = str(sku.get(field_name) or "").strip()
-            if value:
-                parts.append(value)
-        return " ".join(parts)
-
     def _resolve_dimensions(
         self,
         source: Dict[str, Any],
@@ -212,157 +229,171 @@ class StyleSkuSelector:
         skus: List[Dict[str, Any]],
         resolved_dimensions: Dict[str, Optional[str]],
     ) -> List[_SkuCandidate]:
+        if not resolved_dimensions or any(not field_name for field_name in resolved_dimensions.values()):
+            return []
+
         candidates: List[_SkuCandidate] = []
         for index, sku in enumerate(skus):
-            fallback_text = self._fallback_sku_text(sku)
-            intent_texts: Dict[str, str] = {}
+            intent_values: Dict[str, str] = {}
             for intent_type, field_name in resolved_dimensions.items():
-                if field_name:
-                    value = str(sku.get(field_name) or "").strip()
-                    intent_texts[intent_type] = value or fallback_text
-                else:
-                    intent_texts[intent_type] = fallback_text
+                if not field_name:
+                    continue
+                intent_values[intent_type] = str(sku.get(field_name) or "").strip()
 
             selection_parts: List[str] = []
             seen = set()
-            for value in intent_texts.values():
+            for value in intent_values.values():
                 normalized = normalize_query_text(value)
                 if not normalized or normalized in seen:
                     continue
                 seen.add(normalized)
-                selection_parts.append(str(value).strip())
+                selection_parts.append(value)
 
-            selection_text = " ".join(selection_parts).strip() or fallback_text
+            selection_text = " ".join(selection_parts).strip()
             candidates.append(
                 _SkuCandidate(
                     index=index,
                     sku_id=str(sku.get("sku_id") or ""),
                     sku=sku,
                     selection_text=selection_text,
-                    intent_texts=intent_texts,
+                    normalized_selection_text=normalize_query_text(selection_text),
+                    intent_values=intent_values,
                 )
             )
         return candidates
 
     @staticmethod
-    def _is_direct_match(
-        candidate: _SkuCandidate,
-        query_texts: Sequence[str],
-    ) -> bool:
-        if not candidate.intent_texts or not query_texts:
-            return False
-        for value in candidate.intent_texts.values():
-            normalized_value = normalize_query_text(value)
-            if not normalized_value:
-                return False
-            if not any(normalized_value in query_text for query_text in query_texts):
-                return False
-        return True
-
-    def _is_generalized_match(
+    def _empty_decision(
+        resolved_dimensions: Dict[str, Optional[str]],
+        matched_stage: str,
+    ) -> SkuSelectionDecision:
+        return SkuSelectionDecision(
+            selected_sku_id=None,
+            rerank_suffix="",
+            selected_text="",
+            matched_stage=matched_stage,
+            resolved_dimensions=dict(resolved_dimensions),
+        )
+
+    def _is_text_match(
         self,
-        candidate: _SkuCandidate,
-        style_profile: StyleIntentProfile,
-        tokenizer: Any,
+        intent_type: str,
+        value: str,
+        selection_context: _SelectionContext,
     ) -> bool:
-        if not candidate.intent_texts:
+        normalized_value = normalize_query_text(value)
+        if not normalized_value:
             return False
 
-        for intent_type, value in candidate.intent_texts.items():
-            definition = self.registry.get_definition(intent_type)
-            if definition is None:
-                return False
-            matched_canonicals = definition.match_text(value, tokenizer=tokenizer)
-            if not matched_canonicals.intersection(style_profile.get_canonical_values(intent_type)):
-                return False
-        return True
+        cache_key = (intent_type, normalized_value)
+        cached = selection_context.text_match_cache.get(cache_key)
+        if cached is not None:
+            return cached
+
+        matched_terms = selection_context.matched_terms_by_intent.get(intent_type, ())
+        has_term_match = any(term in normalized_value for term in matched_terms if term)
+        query_contains_value = any(
+            normalized_value in query_text
+            for query_text in selection_context.query_texts
+        )
+        matched = bool(has_term_match or query_contains_value)
+        selection_context.text_match_cache[cache_key] = matched
+        return matched
+
+    def _find_first_text_match(
+        self,
+        candidates: Sequence[_SkuCandidate],
+        selection_context: _SelectionContext,
+    ) -> Optional[_SkuCandidate]:
+        for candidate in candidates:
+            if candidate.intent_values and all(
+                self._is_text_match(intent_type, value, selection_context)
+                for intent_type, value in candidate.intent_values.items()
+            ):
+                return candidate
+        return None
 
     def _select_by_embedding(
         self,
         candidates: Sequence[_SkuCandidate],
-        query_vector: Optional[np.ndarray],
+        selection_context: _SelectionContext,
     ) -> Tuple[Optional[_SkuCandidate], Optional[float]]:
         if not candidates:
             return None, None
         text_encoder = self._get_text_encoder()
-        if query_vector is None or text_encoder is None:
-            return candidates[0], None
+        if selection_context.query_vector is None or text_encoder is None:
+            return None, None
 
         unique_texts = list(
             dict.fromkeys(
-                normalize_query_text(candidate.selection_text)
+                candidate.normalized_selection_text
                 for candidate in candidates
-                if normalize_query_text(candidate.selection_text)
+                if candidate.normalized_selection_text
+                and candidate.normalized_selection_text not in selection_context.selection_vector_cache
             )
         )
-        if not unique_texts:
-            return candidates[0], None
-
-        vectors = text_encoder.encode(unique_texts, priority=1)
-        vector_map: Dict[str, np.ndarray] = {}
-        for key, vector in zip(unique_texts, vectors):
-            if vector is None:
-                continue
-            vector_map[key] = np.asarray(vector, dtype=np.float32)
+        if unique_texts:
+            vectors = text_encoder.encode(unique_texts, priority=1)
+            for key, vector in zip(unique_texts, vectors):
+                selection_context.selection_vector_cache[key] = (
+                    np.asarray(vector, dtype=np.float32) if vector is not None else None
+                )
 
         best_candidate: Optional[_SkuCandidate] = None
         best_score: Optional[float] = None
-        query_vector_array = np.asarray(query_vector, dtype=np.float32)
+        query_vector_array = np.asarray(selection_context.query_vector, dtype=np.float32)
         for candidate in candidates:
-            normalized_text = normalize_query_text(candidate.selection_text)
-            candidate_vector = vector_map.get(normalized_text)
-            if candidate_vector is None:
+            normalized_text = candidate.normalized_selection_text
+            if not normalized_text:
+                continue
+
+            score = selection_context.similarity_cache.get(normalized_text)
+            if score is None:
+                candidate_vector = selection_context.selection_vector_cache.get(normalized_text)
+                if candidate_vector is None:
+                    selection_context.similarity_cache[normalized_text] = None
+                    continue
+                score = float(np.inner(query_vector_array, candidate_vector))
+                selection_context.similarity_cache[normalized_text] = score
+
+            if score is None:
                 continue
-            score = float(np.inner(query_vector_array, candidate_vector))
             if best_score is None or score > best_score:
                 best_candidate = candidate
                 best_score = score
 
-        return best_candidate or candidates[0], best_score
+        return best_candidate, best_score
 
     def _select_for_source(
         self,
         source: Dict[str, Any],
         *,
         style_profile: StyleIntentProfile,
-        query_texts: Sequence[str],
-        query_vector: Optional[np.ndarray],
-        tokenizer: Any,
+        selection_context: _SelectionContext,
     ) -> Optional[SkuSelectionDecision]:
         skus = source.get("skus")
         if not isinstance(skus, list) or not skus:
             return None
 
         resolved_dimensions = self._resolve_dimensions(source, style_profile)
+        if not resolved_dimensions or any(not field_name for field_name in resolved_dimensions.values()):
+            return self._empty_decision(resolved_dimensions, matched_stage="unresolved")
+
         candidates = self._build_candidates(skus, resolved_dimensions)
         if not candidates:
-            return None
+            return self._empty_decision(resolved_dimensions, matched_stage="no_candidates")
 
-        direct_matches = [candidate for candidate in candidates if self._is_direct_match(candidate, query_texts)]
-        if len(direct_matches) == 1:
-            chosen = direct_matches[0]
-            return self._build_decision(chosen, resolved_dimensions, matched_stage="direct")
+        text_match = self._find_first_text_match(candidates, selection_context)
+        if text_match is not None:
+            return self._build_decision(text_match, resolved_dimensions, matched_stage="text")
 
-        generalized_matches: List[_SkuCandidate] = []
-        if not direct_matches:
-            generalized_matches = [
-                candidate
-                for candidate in candidates
-                if self._is_generalized_match(candidate, style_profile, tokenizer)
-            ]
-            if len(generalized_matches) == 1:
-                chosen = generalized_matches[0]
-                return self._build_decision(chosen, resolved_dimensions, matched_stage="generalized")
-
-        embedding_pool = direct_matches or generalized_matches or candidates
-        chosen, similarity_score = self._select_by_embedding(embedding_pool, query_vector)
+        chosen, similarity_score = self._select_by_embedding(candidates, selection_context)
         if chosen is None:
-            return None
-        stage = "embedding_from_matches" if direct_matches or generalized_matches else "embedding_from_all"
+            return self._empty_decision(resolved_dimensions, matched_stage="no_match")
         return self._build_decision(
             chosen,
             resolved_dimensions,
-            matched_stage=stage,
+            matched_stage="embedding",
             similarity_score=similarity_score,
         )
 
diff --git a/tests/test_search_rerank_window.py b/tests/test_search_rerank_window.py
index cfb798e..2ca38f0 100644
--- a/tests/test_search_rerank_window.py
+++ b/tests/test_search_rerank_window.py
@@ -30,7 +30,6 @@ class _FakeParsedQuery:
     detected_language: str = "en"
     translations: Dict[str, str] = None
     query_vector: Any = None
-    domain: str = "default"
     style_intent_profile: Any = None
 
     def to_dict(self) -> Dict[str, Any]:
@@ -40,7 +39,6 @@ class _FakeParsedQuery:
             "rewritten_query": self.rewritten_query,
             "detected_language": self.detected_language,
             "translations": self.translations or {},
-            "domain": self.domain,
             "style_intent_profile": (
                 self.style_intent_profile.to_dict() if self.style_intent_profile is not None else None
             ),
@@ -542,6 +540,137 @@ def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch
     assert result.results[0].image_url == "https://img/black.jpg"
 
 
+def test_searcher_uses_first_text_match_without_comparing_all_matches(monkeypatch):
+    es_client = _FakeESClient(total_hits=1)
+    searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
+    context = create_request_context(reqid="sku-first-text", uid="u-sku-first-text")
+
+    monkeypatch.setattr(
+        "search.searcher.get_tenant_config_loader",
+        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
+    )
+
+    class _TextMatchQueryParser:
+        text_encoder = None
+
+        def parse(
+            self,
+            query: str,
+            tenant_id: str,
+            generate_vector: bool,
+            context: Any,
+            target_languages: Any = None,
+        ):
+            return _FakeParsedQuery(
+                original_query=query,
+                query_normalized=query,
+                rewritten_query=query,
+                translations={},
+                style_intent_profile=_build_style_intent_profile(
+                    "color", "black", "color", "colors", "颜色"
+                ),
+            )
+
+    searcher.query_parser = _TextMatchQueryParser()
+
+    def _full_source_with_multiple_text_matches(doc_id: str) -> Dict[str, Any]:
+        return {
+            "spu_id": doc_id,
+            "title": {"en": f"product-{doc_id}"},
+            "brief": {"en": f"brief-{doc_id}"},
+            "vendor": {"en": f"vendor-{doc_id}"},
+            "option1_name": "Color",
+            "image_url": "https://img/default.jpg",
+            "skus": [
+                {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"},
+                {
+                    "sku_id": "sku-gloss-black",
+                    "option1_value": "Gloss Black",
+                    "image_src": "https://img/gloss-black.jpg",
+                },
+                {"sku_id": "sku-black", "option1_value": "Black", "image_src": "https://img/black.jpg"},
+            ],
+        }
+
+    monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_multiple_text_matches))
+
+    result = searcher.search(
+        query="black dress",
+        tenant_id="162",
+        from_=0,
+        size=1,
+        context=context,
+        enable_rerank=False,
+    )
+
+    assert len(result.results) == 1
+    assert result.results[0].skus[0].sku_id == "sku-gloss-black"
+    assert result.results[0].image_url == "https://img/gloss-black.jpg"
+
+
+def test_searcher_skips_sku_selection_when_option_name_does_not_match_dimension_alias(monkeypatch):
+    es_client = _FakeESClient(total_hits=1)
+    searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
+    context = create_request_context(reqid="sku-unresolved-dimension", uid="u-sku-unresolved-dimension")
+
+    monkeypatch.setattr(
+        "search.searcher.get_tenant_config_loader",
+        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
+    )
+
+    class _UnresolvedDimensionQueryParser:
+        text_encoder = None
+
+        def parse(
+            self,
+            query: str,
+            tenant_id: str,
+            generate_vector: bool,
+            context: Any,
+            target_languages: Any = None,
+        ):
+            return _FakeParsedQuery(
+                original_query=query,
+                query_normalized=query,
+                rewritten_query=query,
+                translations={"en": "black dress"},
+                style_intent_profile=_build_style_intent_profile(
+                    "color", "black", "color", "colors", "颜色"
+                ),
+            )
+
+    searcher.query_parser = _UnresolvedDimensionQueryParser()
+
+    def _full_source_with_unmatched_option_name(doc_id: str) -> Dict[str, Any]:
+        return {
+            "spu_id": doc_id,
+            "title": {"en": f"product-{doc_id}"},
+            "brief": {"en": f"brief-{doc_id}"},
+            "vendor": {"en": f"vendor-{doc_id}"},
+            "option1_name": "Tone",
+            "image_url": "https://img/default.jpg",
+            "skus": [
+                {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"},
+                {"sku_id": "sku-black", "option1_value": "Black", "image_src": "https://img/black.jpg"},
+            ],
+        }
+
+    monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_unmatched_option_name))
+
+    result = searcher.search(
+        query="黑色 连衣裙",
+        tenant_id="162",
+        from_=0,
+        size=1,
+        context=context,
+        enable_rerank=False,
+    )
+
+    assert len(result.results) == 1
+    assert result.results[0].skus[0].sku_id == "sku-red"
+    assert result.results[0].image_url == "https://img/default.jpg"
+
+
 def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_match(monkeypatch):
     es_client = _FakeESClient(total_hits=1)
     searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
--
libgit2 0.21.2