b712a831
tangwang
意图识别策略和性能优化
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
from types import SimpleNamespace
from config import QueryConfig
from query.style_intent import DetectedStyleIntent, StyleIntentProfile, StyleIntentRegistry
from search.sku_intent_selector import StyleSkuSelector
def test_style_sku_selector_matches_first_sku_by_attribute_terms():
registry = StyleIntentRegistry.from_query_config(
QueryConfig(
style_intent_terms={
"color": [{"en_terms": ["navy"], "zh_terms": ["藏青"], "attribute_terms": ["navy"]}],
"size": [{"en_terms": ["xl"], "zh_terms": ["加大码"], "attribute_terms": ["x-large"]}],
},
style_intent_dimension_aliases={
"color": ["color", "颜色"],
"size": ["size", "尺码"],
},
)
)
selector = StyleSkuSelector(registry)
parsed_query = SimpleNamespace(
style_intent_profile=StyleIntentProfile(
intents=(
DetectedStyleIntent(
intent_type="color",
canonical_value="navy",
matched_term="藏青",
matched_query_text="藏青",
attribute_terms=("navy",),
dimension_aliases=("color", "颜色"),
),
DetectedStyleIntent(
intent_type="size",
canonical_value="x-large",
matched_term="xl",
matched_query_text="xl",
attribute_terms=("x-large",),
dimension_aliases=("size", "尺码"),
),
),
)
)
source = {
"option1_name": "Color",
"option2_name": "Size",
"skus": [
{"sku_id": "1", "option1_value": "Black", "option2_value": "M"},
{"sku_id": "2", "option1_value": "Navy Blue", "option2_value": "X-Large", "image_src": "matched.jpg"},
{"sku_id": "3", "option1_value": "Navy", "option2_value": "XL"},
],
}
hits = [{"_id": "spu-1", "_source": source}]
decisions = selector.prepare_hits(hits, parsed_query)
decision = decisions["spu-1"]
assert decision.selected_sku_id == "2"
assert decision.selected_text == "Navy Blue X-Large"
assert decision.matched_stage == "text"
selector.apply_precomputed_decisions(hits, decisions)
assert source["skus"][0]["sku_id"] == "2"
assert source["image_url"] == "matched.jpg"
def test_style_sku_selector_returns_no_match_without_attribute_contains():
registry = StyleIntentRegistry.from_query_config(
QueryConfig(
style_intent_terms={
"color": [{"en_terms": ["beige"], "zh_terms": ["米色"], "attribute_terms": ["beige"]}],
},
style_intent_dimension_aliases={"color": ["color", "颜色"]},
)
)
selector = StyleSkuSelector(registry)
parsed_query = SimpleNamespace(
style_intent_profile=StyleIntentProfile(
intents=(
DetectedStyleIntent(
intent_type="color",
canonical_value="beige",
matched_term="米色",
matched_query_text="米色",
attribute_terms=("beige",),
dimension_aliases=("color", "颜色"),
),
),
)
)
hits = [{
"_id": "spu-1",
"_source": {
"option1_name": "Color",
"skus": [
{"sku_id": "1", "option1_value": "Khaki"},
{"sku_id": "2", "option1_value": "Light Brown"},
],
},
}]
decisions = selector.prepare_hits(hits, parsed_query)
assert decisions["spu-1"].selected_sku_id is None
assert decisions["spu-1"].matched_stage == "no_match"
|
837d5d76
tangwang
sku筛选匹配规则优化,按 tok...
|
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
def test_is_text_match_uses_token_boundaries_for_sizes():
registry = StyleIntentRegistry.from_query_config(
QueryConfig(
style_intent_terms={
"size": [{"en_terms": ["l"], "zh_terms": ["大码"], "attribute_terms": ["l"]}],
},
style_intent_dimension_aliases={"size": ["size", "尺码"]},
)
)
selector = StyleSkuSelector(registry)
style_profile = StyleIntentProfile(
intents=(
DetectedStyleIntent(
intent_type="size",
canonical_value="l",
matched_term="l",
matched_query_text="l",
attribute_terms=("l",),
dimension_aliases=("size", "尺码"),
),
),
)
selection_context = selector._build_selection_context(style_profile)
assert selector._is_text_match("size", selection_context, normalized_value="l")
assert not selector._is_text_match("size", selection_context, normalized_value="xl")
assert not selector._is_text_match("size", selection_context, normalized_value="xxl")
def test_is_text_match_handles_punctuation_and_descriptive_attribute_values():
registry = StyleIntentRegistry.from_query_config(
QueryConfig(
style_intent_terms={
"color": [{"en_terms": ["blue"], "zh_terms": ["蓝色"], "attribute_terms": ["blue"]}],
"style": [{"en_terms": ["off-white"], "zh_terms": ["米白"], "attribute_terms": ["off-white"]}],
"accessory": [{"en_terms": ["headscarf"], "zh_terms": ["头巾"], "attribute_terms": ["headscarf"]}],
"size": [{"en_terms": ["2xl"], "zh_terms": ["2xl"], "attribute_terms": ["2xl"]}],
},
style_intent_dimension_aliases={
"color": ["color", "颜色"],
"style": ["style", "风格"],
"accessory": ["accessory", "配饰"],
"size": ["size", "尺码"],
},
)
)
selector = StyleSkuSelector(registry)
style_profile = StyleIntentProfile(
intents=(
DetectedStyleIntent(
intent_type="color",
canonical_value="blue",
matched_term="blue",
matched_query_text="blue",
attribute_terms=("blue",),
dimension_aliases=("color", "颜色"),
),
DetectedStyleIntent(
intent_type="style",
canonical_value="off-white",
matched_term="off-white",
matched_query_text="off-white",
attribute_terms=("off-white",),
dimension_aliases=("style", "风格"),
),
DetectedStyleIntent(
intent_type="accessory",
canonical_value="headscarf",
matched_term="headscarf",
matched_query_text="headscarf",
attribute_terms=("headscarf",),
dimension_aliases=("accessory", "配饰"),
),
DetectedStyleIntent(
intent_type="size",
canonical_value="2xl",
matched_term="2xl",
matched_query_text="2xl",
attribute_terms=("2xl",),
dimension_aliases=("size", "尺码"),
),
),
)
selection_context = selector._build_selection_context(style_profile)
assert selector._is_text_match("color", selection_context, normalized_value="gray blue")
assert selector._is_text_match("style", selection_context, normalized_value="off-white/lined")
assert selector._is_text_match("accessory", selection_context, normalized_value="army green + headscarf")
assert selector._is_text_match("size", selection_context, normalized_value="2xl recommended 65-70kg")
|