test_sku_intent_selector.py 7.52 KB
from types import SimpleNamespace

from config import QueryConfig
from query.style_intent import DetectedStyleIntent, StyleIntentProfile, StyleIntentRegistry
from search.sku_intent_selector import StyleSkuSelector


def test_style_sku_selector_matches_first_sku_by_attribute_terms():
    registry = StyleIntentRegistry.from_query_config(
        QueryConfig(
            style_intent_terms={
                "color": [{"en_terms": ["navy"], "zh_terms": ["藏青"], "attribute_terms": ["navy"]}],
                "size": [{"en_terms": ["xl"], "zh_terms": ["加大码"], "attribute_terms": ["x-large"]}],
            },
            style_intent_dimension_aliases={
                "color": ["color", "颜色"],
                "size": ["size", "尺码"],
            },
        )
    )
    selector = StyleSkuSelector(registry)
    parsed_query = SimpleNamespace(
        style_intent_profile=StyleIntentProfile(
            intents=(
                DetectedStyleIntent(
                    intent_type="color",
                    canonical_value="navy",
                    matched_term="藏青",
                    matched_query_text="藏青",
                    attribute_terms=("navy",),
                    dimension_aliases=("color", "颜色"),
                ),
                DetectedStyleIntent(
                    intent_type="size",
                    canonical_value="x-large",
                    matched_term="xl",
                    matched_query_text="xl",
                    attribute_terms=("x-large",),
                    dimension_aliases=("size", "尺码"),
                ),
            ),
        )
    )
    source = {
        "option1_name": "Color",
        "option2_name": "Size",
        "skus": [
            {"sku_id": "1", "option1_value": "Black", "option2_value": "M"},
            {"sku_id": "2", "option1_value": "Navy Blue", "option2_value": "X-Large", "image_src": "matched.jpg"},
            {"sku_id": "3", "option1_value": "Navy", "option2_value": "XL"},
        ],
    }
    hits = [{"_id": "spu-1", "_source": source}]

    decisions = selector.prepare_hits(hits, parsed_query)
    decision = decisions["spu-1"]

    assert decision.selected_sku_id == "2"
    assert decision.selected_text == "Navy Blue X-Large"
    assert decision.matched_stage == "text"

    selector.apply_precomputed_decisions(hits, decisions)

    assert source["skus"][0]["sku_id"] == "2"
    assert source["image_url"] == "matched.jpg"


def test_style_sku_selector_returns_no_match_without_attribute_contains():
    registry = StyleIntentRegistry.from_query_config(
        QueryConfig(
            style_intent_terms={
                "color": [{"en_terms": ["beige"], "zh_terms": ["米色"], "attribute_terms": ["beige"]}],
            },
            style_intent_dimension_aliases={"color": ["color", "颜色"]},
        )
    )
    selector = StyleSkuSelector(registry)
    parsed_query = SimpleNamespace(
        style_intent_profile=StyleIntentProfile(
            intents=(
                DetectedStyleIntent(
                    intent_type="color",
                    canonical_value="beige",
                    matched_term="米色",
                    matched_query_text="米色",
                    attribute_terms=("beige",),
                    dimension_aliases=("color", "颜色"),
                ),
            ),
        )
    )
    hits = [{
        "_id": "spu-1",
        "_source": {
            "option1_name": "Color",
            "skus": [
                {"sku_id": "1", "option1_value": "Khaki"},
                {"sku_id": "2", "option1_value": "Light Brown"},
            ],
        },
    }]

    decisions = selector.prepare_hits(hits, parsed_query)

    assert decisions["spu-1"].selected_sku_id is None
    assert decisions["spu-1"].matched_stage == "no_match"


def test_is_text_match_uses_token_boundaries_for_sizes():
    registry = StyleIntentRegistry.from_query_config(
        QueryConfig(
            style_intent_terms={
                "size": [{"en_terms": ["l"], "zh_terms": ["大码"], "attribute_terms": ["l"]}],
            },
            style_intent_dimension_aliases={"size": ["size", "尺码"]},
        )
    )
    selector = StyleSkuSelector(registry)
    style_profile = StyleIntentProfile(
        intents=(
            DetectedStyleIntent(
                intent_type="size",
                canonical_value="l",
                matched_term="l",
                matched_query_text="l",
                attribute_terms=("l",),
                dimension_aliases=("size", "尺码"),
            ),
        ),
    )
    selection_context = selector._build_selection_context(style_profile)

    assert selector._is_text_match("size", selection_context, normalized_value="l")
    assert not selector._is_text_match("size", selection_context, normalized_value="xl")
    assert not selector._is_text_match("size", selection_context, normalized_value="xxl")


def test_is_text_match_handles_punctuation_and_descriptive_attribute_values():
    registry = StyleIntentRegistry.from_query_config(
        QueryConfig(
            style_intent_terms={
                "color": [{"en_terms": ["blue"], "zh_terms": ["蓝色"], "attribute_terms": ["blue"]}],
                "style": [{"en_terms": ["off-white"], "zh_terms": ["米白"], "attribute_terms": ["off-white"]}],
                "accessory": [{"en_terms": ["headscarf"], "zh_terms": ["头巾"], "attribute_terms": ["headscarf"]}],
                "size": [{"en_terms": ["2xl"], "zh_terms": ["2xl"], "attribute_terms": ["2xl"]}],
            },
            style_intent_dimension_aliases={
                "color": ["color", "颜色"],
                "style": ["style", "风格"],
                "accessory": ["accessory", "配饰"],
                "size": ["size", "尺码"],
            },
        )
    )
    selector = StyleSkuSelector(registry)
    style_profile = StyleIntentProfile(
        intents=(
            DetectedStyleIntent(
                intent_type="color",
                canonical_value="blue",
                matched_term="blue",
                matched_query_text="blue",
                attribute_terms=("blue",),
                dimension_aliases=("color", "颜色"),
            ),
            DetectedStyleIntent(
                intent_type="style",
                canonical_value="off-white",
                matched_term="off-white",
                matched_query_text="off-white",
                attribute_terms=("off-white",),
                dimension_aliases=("style", "风格"),
            ),
            DetectedStyleIntent(
                intent_type="accessory",
                canonical_value="headscarf",
                matched_term="headscarf",
                matched_query_text="headscarf",
                attribute_terms=("headscarf",),
                dimension_aliases=("accessory", "配饰"),
            ),
            DetectedStyleIntent(
                intent_type="size",
                canonical_value="2xl",
                matched_term="2xl",
                matched_query_text="2xl",
                attribute_terms=("2xl",),
                dimension_aliases=("size", "尺码"),
            ),
        ),
    )
    selection_context = selector._build_selection_context(style_profile)

    assert selector._is_text_match("color", selection_context, normalized_value="gray blue")
    assert selector._is_text_match("style", selection_context, normalized_value="off-white/lined")
    assert selector._is_text_match("accessory", selection_context, normalized_value="army green + headscarf")
    assert selector._is_text_match("size", selection_context, normalized_value="2xl recommended 65-70kg")