test_style_intent.py 3.15 KB
from types import SimpleNamespace

from config import QueryConfig
from query.style_intent import StyleIntentDetector, StyleIntentRegistry


def test_style_intent_detector_matches_original_and_translated_queries():
    query_config = QueryConfig(
        style_intent_terms={
            "color": [{"en_terms": ["black"], "zh_terms": ["黑色"], "attribute_terms": ["black"]}],
            "size": [{"en_terms": ["xl", "x-large"], "zh_terms": ["加大码"], "attribute_terms": ["x-large"]}],
        },
        style_intent_dimension_aliases={
            "color": ["color", "颜色"],
            "size": ["size", "尺码"],
        },
    )
    detector = StyleIntentDetector(
        StyleIntentRegistry.from_query_config(query_config),
        tokenizer=lambda text: text.split(),
    )

    parsed_query = SimpleNamespace(
        original_query="黑色 连衣裙",
        query_normalized="黑色 连衣裙",
        rewritten_query="黑色 连衣裙",
        translations={"en": "black dress xl"},
    )

    profile = detector.detect(parsed_query)

    assert profile.is_active is True
    assert profile.get_canonical_values("color") == {"black"}
    assert profile.get_canonical_values("size") == {"x-large"}
    assert len(profile.query_variants) == 2


def test_style_intent_detector_uses_original_query_when_language_translation_missing():
    query_config = QueryConfig(
        style_intent_terms={
            "color": [{"en_terms": ["black"], "zh_terms": ["黑色"], "attribute_terms": ["black"]}],
        },
        style_intent_dimension_aliases={"color": ["color", "颜色"]},
    )
    detector = StyleIntentDetector(
        StyleIntentRegistry.from_query_config(query_config),
        tokenizer=lambda text: text.split(),
    )

    parsed_query = SimpleNamespace(
        original_query="black dress",
        query_normalized="black dress",
        rewritten_query="black dress",
        translations={"zh": "连衣裙"},
    )

    profile = detector.detect(parsed_query)

    assert profile.get_canonical_values("color") == {"black"}
    assert profile.intents[0].attribute_terms == ("black",)


def test_style_intent_detector_tokenizes_each_language_once():
    query_config = QueryConfig(
        style_intent_terms={
            "color": [{"en_terms": ["black"], "zh_terms": ["黑色"], "attribute_terms": ["black"]}],
            "size": [{"en_terms": ["xl"], "zh_terms": ["加大码"], "attribute_terms": ["xl"]}],
        },
        style_intent_dimension_aliases={
            "color": ["color", "颜色"],
            "size": ["size", "尺码"],
        },
    )
    tokenize_calls = []

    def counting_tokenizer(text):
        tokenize_calls.append(text)
        return str(text).split()

    detector = StyleIntentDetector(
        StyleIntentRegistry.from_query_config(query_config),
        tokenizer=counting_tokenizer,
    )
    parsed_query = SimpleNamespace(
        original_query="黑色 连衣裙",
        query_normalized="黑色 连衣裙",
        rewritten_query="黑色 连衣裙",
        translations={"en": "black dress xl"},
    )

    profile = detector.detect(parsed_query)

    assert profile.is_active is True
    assert tokenize_calls == ["黑色 连衣裙"]