test_style_intent.py
3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from types import SimpleNamespace
from config import QueryConfig
from query.style_intent import StyleIntentDetector, StyleIntentRegistry
def test_style_intent_detector_matches_original_and_translated_queries():
query_config = QueryConfig(
style_intent_terms={
"color": [{"en_terms": ["black"], "zh_terms": ["黑色"], "attribute_terms": ["black"]}],
"size": [{"en_terms": ["xl", "x-large"], "zh_terms": ["加大码"], "attribute_terms": ["x-large"]}],
},
style_intent_dimension_aliases={
"color": ["color", "颜色"],
"size": ["size", "尺码"],
},
)
detector = StyleIntentDetector(
StyleIntentRegistry.from_query_config(query_config),
tokenizer=lambda text: text.split(),
)
parsed_query = SimpleNamespace(
original_query="黑色 连衣裙",
query_normalized="黑色 连衣裙",
rewritten_query="黑色 连衣裙",
translations={"en": "black dress xl"},
)
profile = detector.detect(parsed_query)
assert profile.is_active is True
assert profile.get_canonical_values("color") == {"black"}
assert profile.get_canonical_values("size") == {"x-large"}
assert len(profile.query_variants) == 2
def test_style_intent_detector_uses_original_query_when_language_translation_missing():
query_config = QueryConfig(
style_intent_terms={
"color": [{"en_terms": ["black"], "zh_terms": ["黑色"], "attribute_terms": ["black"]}],
},
style_intent_dimension_aliases={"color": ["color", "颜色"]},
)
detector = StyleIntentDetector(
StyleIntentRegistry.from_query_config(query_config),
tokenizer=lambda text: text.split(),
)
parsed_query = SimpleNamespace(
original_query="black dress",
query_normalized="black dress",
rewritten_query="black dress",
translations={"zh": "连衣裙"},
)
profile = detector.detect(parsed_query)
assert profile.get_canonical_values("color") == {"black"}
assert profile.intents[0].attribute_terms == ("black",)
def test_style_intent_detector_tokenizes_each_language_once():
query_config = QueryConfig(
style_intent_terms={
"color": [{"en_terms": ["black"], "zh_terms": ["黑色"], "attribute_terms": ["black"]}],
"size": [{"en_terms": ["xl"], "zh_terms": ["加大码"], "attribute_terms": ["xl"]}],
},
style_intent_dimension_aliases={
"color": ["color", "颜色"],
"size": ["size", "尺码"],
},
)
tokenize_calls = []
def counting_tokenizer(text):
tokenize_calls.append(text)
return str(text).split()
detector = StyleIntentDetector(
StyleIntentRegistry.from_query_config(query_config),
tokenizer=counting_tokenizer,
)
parsed_query = SimpleNamespace(
original_query="黑色 连衣裙",
query_normalized="黑色 连衣裙",
rewritten_query="黑色 连衣裙",
translations={"en": "black dress xl"},
)
profile = detector.detect(parsed_query)
assert profile.is_active is True
assert tokenize_calls == ["黑色 连衣裙"]