from types import SimpleNamespace import pytest from config import QueryConfig from query.style_intent import DetectedStyleIntent, StyleIntentProfile, StyleIntentRegistry from search.sku_intent_selector import StyleSkuSelector pytestmark = [pytest.mark.intent, pytest.mark.regression] def test_style_sku_selector_matches_first_sku_by_attribute_terms(): registry = StyleIntentRegistry.from_query_config( QueryConfig( style_intent_terms={ "color": [{"en_terms": ["navy"], "zh_terms": ["藏青"], "attribute_terms": ["navy"]}], "size": [{"en_terms": ["xl"], "zh_terms": ["加大码"], "attribute_terms": ["x-large"]}], }, style_intent_dimension_aliases={ "color": ["color", "颜色"], "size": ["size", "尺码"], }, ) ) selector = StyleSkuSelector(registry) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile( intents=( DetectedStyleIntent( intent_type="color", canonical_value="navy", matched_term="藏青", matched_query_text="藏青", attribute_terms=("navy",), dimension_aliases=("color", "颜色"), ), DetectedStyleIntent( intent_type="size", canonical_value="x-large", matched_term="xl", matched_query_text="xl", attribute_terms=("x-large",), dimension_aliases=("size", "尺码"), ), ), ) ) source = { "option1_name": "Color", "option2_name": "Size", "skus": [ {"sku_id": "1", "option1_value": "Black", "option2_value": "M"}, {"sku_id": "2", "option1_value": "Navy Blue", "option2_value": "X-Large", "image_src": "matched.jpg"}, {"sku_id": "3", "option1_value": "Navy", "option2_value": "XL"}, ], } hits = [{"_id": "spu-1", "_source": source}] decisions = selector.prepare_hits(hits, parsed_query) decision = decisions["spu-1"] assert decision.selected_sku_id == "2" assert decision.selected_text == "Navy Blue X-Large" assert decision.final_source == "option" assert decision.matched_sources == {"color": "option", "size": "option"} assert decision.matched_stage == "option" # back-compat alias selector.apply_precomputed_decisions(hits, decisions) assert source["skus"][0]["sku_id"] == "2" assert source["image_url"] == "matched.jpg" def test_style_sku_selector_returns_no_match_without_attribute_contains(): registry = StyleIntentRegistry.from_query_config( QueryConfig( style_intent_terms={ "color": [{"en_terms": ["beige"], "zh_terms": ["米色"], "attribute_terms": ["beige"]}], }, style_intent_dimension_aliases={"color": ["color", "颜色"]}, ) ) selector = StyleSkuSelector(registry) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile( intents=( DetectedStyleIntent( intent_type="color", canonical_value="beige", matched_term="米色", matched_query_text="米色", attribute_terms=("beige",), dimension_aliases=("color", "颜色"), ), ), ) ) hits = [{ "_id": "spu-1", "_source": { "option1_name": "Color", "skus": [ {"sku_id": "1", "option1_value": "Khaki"}, {"sku_id": "2", "option1_value": "Light Brown"}, ], }, }] decisions = selector.prepare_hits(hits, parsed_query) assert decisions["spu-1"].selected_sku_id is None assert decisions["spu-1"].final_source == "none" def test_is_text_match_uses_token_boundaries_for_sizes(): registry = StyleIntentRegistry.from_query_config( QueryConfig( style_intent_terms={ "size": [{"en_terms": ["l"], "zh_terms": ["大码"], "attribute_terms": ["l"]}], }, style_intent_dimension_aliases={"size": ["size", "尺码"]}, ) ) selector = StyleSkuSelector(registry) style_profile = StyleIntentProfile( intents=( DetectedStyleIntent( intent_type="size", canonical_value="l", matched_term="l", matched_query_text="l", attribute_terms=("l",), dimension_aliases=("size", "尺码"), ), ), ) selection_context = selector._build_selection_context(style_profile) assert selector._is_text_match("size", selection_context, normalized_value="l") assert not selector._is_text_match("size", selection_context, normalized_value="xl") assert not selector._is_text_match("size", selection_context, normalized_value="xxl") def test_is_text_match_handles_punctuation_and_descriptive_attribute_values(): registry = StyleIntentRegistry.from_query_config( QueryConfig( style_intent_terms={ "color": [{"en_terms": ["blue"], "zh_terms": ["蓝色"], "attribute_terms": ["blue"]}], "style": [{"en_terms": ["off-white"], "zh_terms": ["米白"], "attribute_terms": ["off-white"]}], "accessory": [{"en_terms": ["headscarf"], "zh_terms": ["头巾"], "attribute_terms": ["headscarf"]}], "size": [{"en_terms": ["2xl"], "zh_terms": ["2xl"], "attribute_terms": ["2xl"]}], }, style_intent_dimension_aliases={ "color": ["color", "颜色"], "style": ["style", "风格"], "accessory": ["accessory", "配饰"], "size": ["size", "尺码"], }, ) ) selector = StyleSkuSelector(registry) style_profile = StyleIntentProfile( intents=( DetectedStyleIntent( intent_type="color", canonical_value="blue", matched_term="blue", matched_query_text="blue", attribute_terms=("blue",), dimension_aliases=("color", "颜色"), ), DetectedStyleIntent( intent_type="style", canonical_value="off-white", matched_term="off-white", matched_query_text="off-white", attribute_terms=("off-white",), dimension_aliases=("style", "风格"), ), DetectedStyleIntent( intent_type="accessory", canonical_value="headscarf", matched_term="headscarf", matched_query_text="headscarf", attribute_terms=("headscarf",), dimension_aliases=("accessory", "配饰"), ), DetectedStyleIntent( intent_type="size", canonical_value="2xl", matched_term="2xl", matched_query_text="2xl", attribute_terms=("2xl",), dimension_aliases=("size", "尺码"), ), ), ) selection_context = selector._build_selection_context(style_profile) assert selector._is_text_match("color", selection_context, normalized_value="gray blue") assert selector._is_text_match("style", selection_context, normalized_value="off-white/lined") assert selector._is_text_match("accessory", selection_context, normalized_value="army green + headscarf") assert selector._is_text_match("size", selection_context, normalized_value="2xl recommended 65-70kg") def _khaki_intent() -> DetectedStyleIntent: """Mirrors what StyleIntentDetector now emits (all_terms union of zh/en/attribute).""" return DetectedStyleIntent( intent_type="color", canonical_value="beige", matched_term="卡其色", matched_query_text="卡其色", attribute_terms=("beige", "khaki"), dimension_aliases=("color", "颜色"), all_terms=("米色", "卡其色", "beige", "khaki"), ) def _color_registry() -> StyleIntentRegistry: return StyleIntentRegistry.from_query_config( QueryConfig( style_intent_terms={ "color": [ { "en_terms": ["beige", "khaki"], "zh_terms": ["米色", "卡其色"], "attribute_terms": ["beige", "khaki"], } ], }, style_intent_dimension_aliases={"color": ["color", "颜色"]}, ) ) def test_zh_color_intent_matches_noisy_option_value(): """卡其色裙子 → SKU 的 option1_value 以"卡其色"开头但带 V 领等后缀,也应命中。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", "skus": [ {"sku_id": "1", "option1_value": "黑色长裙"}, {"sku_id": "2", "option1_value": "卡其色v领收腰长裙【常规款】"}, ], }, } ] decisions = selector.prepare_hits(hits, parsed_query) assert decisions["spu-1"].selected_sku_id == "2" assert decisions["spu-1"].final_source == "option" @pytest.mark.parametrize( "option_value", [ "卡其色(无内衬)", "卡其色(无内衬)", "卡其色【常规款】", "卡其色/常规款", "卡其色·无内衬", "卡其色 - 常规", "卡其色,常规", "卡其色|常规", "卡其色—加厚", ], ) def test_zh_color_intent_matches_various_brackets_and_separators(option_value: str): selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", "skus": [ {"sku_id": "441670", "option1_value": "白色(无内衬)"}, {"sku_id": "441679", "option1_value": option_value}, ], }, } ] assert selector.prepare_hits(hits, parsed_query)["spu-1"].selected_sku_id == "441679" def test_zh_color_intent_matches_noisy_option_value_with_fullwidth_parens(): """卡其色(无内衬) 是前面 taxonomy-override bug 的实地复现;option 分支现在必须命中。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", # Even if SPU-level taxonomy existed, white SKU must NOT leak in. "enriched_taxonomy_attributes": [ {"name": "Color", "value": {"zh": "卡其色"}} ], "skus": [ {"sku_id": "441670", "option1_value": "白色(无内衬)"}, {"sku_id": "441679", "option1_value": "卡其色(无内衬)"}, ], }, } ] decisions = selector.prepare_hits(hits, parsed_query) d = decisions["spu-1"] assert d.selected_sku_id == "441679" assert d.selected_text == "卡其色(无内衬)" assert d.final_source == "option" assert d.matched_sources == {"color": "option"} def test_taxonomy_attribute_extends_text_matching_source(): """即使 optionN 无法区分 SKU,enriched_taxonomy_attributes 的 Color 也可让 SPU 全部 SKU 通过文本 匹配,之后由图像 pick(若有)决定具体 SKU;无图像则取首个。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "Style", # unrelated dimension → slot unresolved "enriched_taxonomy_attributes": [ {"name": "Color", "value": {"zh": "卡其色", "en": "khaki"}} ], "skus": [ {"sku_id": "a", "option1_value": "A"}, {"sku_id": "b", "option1_value": "B"}, ], }, } ] decisions = selector.prepare_hits(hits, parsed_query) # Taxonomy matches → both SKUs text-matched; no image pick → first one wins. d = decisions["spu-1"] assert d.selected_sku_id == "a" assert d.final_source == "taxonomy" # selected_text reflects the real matched taxonomy value, not SKU's unrelated option. assert d.selected_text == "卡其色" assert d.matched_sources == {"color": "taxonomy"} def test_taxonomy_does_not_override_contradicting_sku_option_value(): """SPU 级 taxonomy 说"卡其色",但 SKU 自己的 option1_value 是"白色(无内衬)", 该 SKU 不应被视作文本命中——避免 SPU 级信号把错色 SKU 顶上去。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", "enriched_taxonomy_attributes": [ {"name": "Color", "value": {"zh": "卡其色", "en": "khaki"}} ], "skus": [ {"sku_id": "white", "option1_value": "白色(无内衬)"}, {"sku_id": "khaki", "option1_value": "卡其色棉"}, ], }, } ] decisions = selector.prepare_hits(hits, parsed_query) # 只有 khaki 自有值匹配;taxonomy 不会把 white 顶出来。 assert decisions["spu-1"].selected_sku_id == "khaki" assert decisions["spu-1"].final_source == "option" def test_taxonomy_fills_in_only_when_sku_self_value_is_empty(): """混合场景:SKU 1 无 option1_value → taxonomy 接管;SKU 2 自带白色 → 不匹配。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", "enriched_taxonomy_attributes": [ {"name": "Color", "value": {"zh": "卡其色"}} ], "skus": [ {"sku_id": "no-value", "option1_value": ""}, {"sku_id": "white", "option1_value": "白色"}, ], }, } ] decisions = selector.prepare_hits(hits, parsed_query) d = decisions["spu-1"] assert d.selected_sku_id == "no-value" assert d.final_source == "taxonomy" assert d.selected_text == "卡其色" def test_image_pick_serves_as_visual_tiebreak_within_text_matched(): selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", "skus": [ { "sku_id": "khaki-cotton", "option1_value": "卡其色棉", "image_src": "https://cdn/x/khaki-cotton.jpg", }, { "sku_id": "khaki-linen", "option1_value": "卡其色麻", "image_src": "https://cdn/x/khaki-linen.jpg", }, ], }, "inner_hits": { "exact_image_knn_query_hits": { "hits": { "hits": [ { "_score": 0.87, "_source": {"url": "https://cdn/x/khaki-linen.jpg"}, } ] } } }, } ] decisions = selector.prepare_hits(hits, parsed_query) decision = decisions["spu-1"] assert decision.selected_sku_id == "khaki-linen" assert decision.final_source == "option" assert decision.image_pick_sku_id == "khaki-linen" assert decision.image_pick_score == 0.87 def test_image_only_selection_when_no_style_intent(): """无款式意图:仅凭 image_embedding 最近邻 SKU,直接把该 SKU 置顶。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace(style_intent_profile=None) hits = [ { "_id": "spu-1", "_source": { "option1_name": "Color", "image_url": "https://cdn/x/default.jpg", "skus": [ { "sku_id": "red", "option1_value": "Red", "image_src": "https://cdn/x/red.jpg", }, { "sku_id": "blue", "option1_value": "Blue", "image_src": "https://cdn/x/blue.jpg", }, ], }, "inner_hits": { "image_knn_query_hits": { "hits": { "hits": [ {"_score": 0.74, "_source": {"url": "https://cdn/x/blue.jpg"}} ] } } }, } ] decisions = selector.prepare_hits(hits, parsed_query) decision = decisions["spu-1"] assert decision.selected_sku_id == "blue" assert decision.final_source == "image" selector.apply_precomputed_decisions(hits, decisions) source = hits[0]["_source"] assert source["skus"][0]["sku_id"] == "blue" assert source["image_url"] == "https://cdn/x/blue.jpg" def test_image_pick_ignored_when_text_matches_but_visual_url_not_in_text_set(): """文本命中优先:image-pick 若落在非文本命中 SKU,则不接管。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace( style_intent_profile=StyleIntentProfile(intents=(_khaki_intent(),)) ) hits = [ { "_id": "spu-1", "_source": { "option1_name": "颜色", "skus": [ { "sku_id": "khaki", "option1_value": "卡其色", "image_src": "https://cdn/x/khaki.jpg", }, { "sku_id": "black", "option1_value": "黑色", "image_src": "https://cdn/x/black.jpg", }, ], }, "inner_hits": { "exact_image_knn_query_hits": { "hits": { "hits": [ {"_score": 0.9, "_source": {"url": "https://cdn/x/black.jpg"}} ] } } }, } ] decisions = selector.prepare_hits(hits, parsed_query) decision = decisions["spu-1"] # Hard text-first: khaki stays, though image pointed at black. assert decision.selected_sku_id == "khaki" assert decision.final_source == "option" assert decision.image_pick_sku_id == "black" def test_image_pick_matches_when_inner_hit_url_has_query_string(): """inner_hits 带 ?v=1,SKU 无 query —— 应用归一化后应对齐。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace(style_intent_profile=None) hits = [ { "_id": "spu-1", "_source": { "skus": [ { "sku_id": "s1", "image_src": "https://cdn/img/p.jpg", }, ], }, "inner_hits": { "exact_image_knn_query_hits": { "hits": { "hits": [ { "_score": 0.8, "_source": {"url": "https://cdn/img/p.jpg?width=800&quality=85"}, } ] } } }, } ] d = selector.prepare_hits(hits, parsed_query)["spu-1"] assert d.selected_sku_id == "s1" assert d.final_source == "image" def test_image_pick_uses_nested_offset_and_image_embedding_when_needed(): """_source.url 与 sku 写法不一致时,用 offset 从 image_embedding 取 canonical url。""" selector = StyleSkuSelector(_color_registry()) parsed_query = SimpleNamespace(style_intent_profile=None) hits = [ { "_id": "spu-1", "_source": { "image_embedding": [ {"url": "https://cdn/a/spu.jpg"}, {"url": "https://cdn/b/sku-match.jpg"}, ], "skus": [ {"sku_id": "sku-a", "image_src": "//cdn/b/sku-match.jpg"}, ], }, "inner_hits": { "exact_image_knn_query_hits": { "hits": { "hits": [ { "_score": 0.91, "_nested": {"field": "image_embedding", "offset": 1}, "_source": {"url": "https://wrong.example/x.jpg"}, } ] } } }, } ] d = selector.prepare_hits(hits, parsed_query)["spu-1"] assert d.selected_sku_id == "sku-a" assert d.image_pick_url == "https://cdn/b/sku-match.jpg"