cda1cd62
tangwang
意图分析&应用 baseline
|
1
2
3
4
5
6
7
|
"""
SKU selection for style-intent-aware search results.
"""
from __future__ import annotations
from dataclasses import dataclass, field
|
b712a831
tangwang
意图识别策略和性能优化
|
8
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
cda1cd62
tangwang
意图分析&应用 baseline
|
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
from query.style_intent import StyleIntentProfile, StyleIntentRegistry
from query.tokenization import normalize_query_text
@dataclass(frozen=True)
class SkuSelectionDecision:
selected_sku_id: Optional[str]
rerank_suffix: str
selected_text: str
matched_stage: str
similarity_score: Optional[float] = None
resolved_dimensions: Dict[str, Optional[str]] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"selected_sku_id": self.selected_sku_id,
"rerank_suffix": self.rerank_suffix,
"selected_text": self.selected_text,
"matched_stage": self.matched_stage,
"similarity_score": self.similarity_score,
"resolved_dimensions": dict(self.resolved_dimensions),
}
@dataclass
|
2efad04b
tangwang
意图匹配的性能优化:
|
35
|
class _SelectionContext:
|
b712a831
tangwang
意图识别策略和性能优化
|
36
37
|
attribute_terms_by_intent: Dict[str, Tuple[str, ...]]
normalized_text_cache: Dict[str, str] = field(default_factory=dict)
|
2efad04b
tangwang
意图匹配的性能优化:
|
38
|
text_match_cache: Dict[Tuple[str, str], bool] = field(default_factory=dict)
|
cda1cd62
tangwang
意图分析&应用 baseline
|
39
40
41
42
43
44
45
46
47
48
|
class StyleSkuSelector:
"""Selects the best SKU for an SPU based on detected style intent."""
def __init__(
self,
registry: StyleIntentRegistry,
*,
text_encoder_getter: Optional[Callable[[], Any]] = None,
|
cda1cd62
tangwang
意图分析&应用 baseline
|
49
50
51
|
) -> None:
self.registry = registry
self._text_encoder_getter = text_encoder_getter
|
cda1cd62
tangwang
意图分析&应用 baseline
|
52
53
54
55
56
57
58
59
60
61
62
|
def prepare_hits(
self,
es_hits: List[Dict[str, Any]],
parsed_query: Any,
) -> Dict[str, SkuSelectionDecision]:
decisions: Dict[str, SkuSelectionDecision] = {}
style_profile = getattr(parsed_query, "style_intent_profile", None)
if not isinstance(style_profile, StyleIntentProfile) or not style_profile.is_active:
return decisions
|
b712a831
tangwang
意图识别策略和性能优化
|
63
|
selection_context = self._build_selection_context(style_profile)
|
cda1cd62
tangwang
意图分析&应用 baseline
|
64
65
66
67
68
69
70
71
72
|
for hit in es_hits:
source = hit.get("_source")
if not isinstance(source, dict):
continue
decision = self._select_for_source(
source,
style_profile=style_profile,
|
2efad04b
tangwang
意图匹配的性能优化:
|
73
|
selection_context=selection_context,
|
cda1cd62
tangwang
意图分析&应用 baseline
|
74
75
76
77
|
)
if decision is None:
continue
|
cda1cd62
tangwang
意图分析&应用 baseline
|
78
79
|
if decision.rerank_suffix:
hit["_style_rerank_suffix"] = decision.rerank_suffix
|
2efad04b
tangwang
意图匹配的性能优化:
|
80
81
|
else:
hit.pop("_style_rerank_suffix", None)
|
cda1cd62
tangwang
意图分析&应用 baseline
|
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
doc_id = hit.get("_id")
if doc_id is not None:
decisions[str(doc_id)] = decision
return decisions
def apply_precomputed_decisions(
self,
es_hits: List[Dict[str, Any]],
decisions: Dict[str, SkuSelectionDecision],
) -> None:
if not es_hits or not decisions:
return
for hit in es_hits:
doc_id = hit.get("_id")
if doc_id is None:
continue
decision = decisions.get(str(doc_id))
if decision is None:
continue
source = hit.get("_source")
if not isinstance(source, dict):
continue
self._apply_decision_to_source(source, decision)
if decision.rerank_suffix:
hit["_style_rerank_suffix"] = decision.rerank_suffix
|
2efad04b
tangwang
意图匹配的性能优化:
|
110
111
|
else:
hit.pop("_style_rerank_suffix", None)
|
cda1cd62
tangwang
意图分析&应用 baseline
|
112
|
|
2efad04b
tangwang
意图匹配的性能优化:
|
113
114
|
def _build_selection_context(
self,
|
2efad04b
tangwang
意图匹配的性能优化:
|
115
116
|
style_profile: StyleIntentProfile,
) -> _SelectionContext:
|
b712a831
tangwang
意图识别策略和性能优化
|
117
|
attribute_terms_by_intent: Dict[str, List[str]] = {}
|
2efad04b
tangwang
意图匹配的性能优化:
|
118
|
for intent in style_profile.intents:
|
b712a831
tangwang
意图识别策略和性能优化
|
119
120
121
122
123
124
|
terms = attribute_terms_by_intent.setdefault(intent.intent_type, [])
for raw_term in intent.attribute_terms:
normalized_term = normalize_query_text(raw_term)
if not normalized_term or normalized_term in terms:
continue
terms.append(normalized_term)
|
2efad04b
tangwang
意图匹配的性能优化:
|
125
126
|
return _SelectionContext(
|
b712a831
tangwang
意图识别策略和性能优化
|
127
|
attribute_terms_by_intent={
|
2efad04b
tangwang
意图匹配的性能优化:
|
128
|
intent_type: tuple(terms)
|
b712a831
tangwang
意图识别策略和性能优化
|
129
|
for intent_type, terms in attribute_terms_by_intent.items()
|
2efad04b
tangwang
意图匹配的性能优化:
|
130
|
},
|
2efad04b
tangwang
意图匹配的性能优化:
|
131
132
|
)
|
b712a831
tangwang
意图识别策略和性能优化
|
133
134
135
136
137
138
139
140
141
142
143
|
@staticmethod
def _normalize_cached(selection_context: _SelectionContext, value: Any) -> str:
raw = str(value or "").strip()
if not raw:
return ""
cached = selection_context.normalized_text_cache.get(raw)
if cached is not None:
return cached
normalized = normalize_query_text(raw)
selection_context.normalized_text_cache[raw] = normalized
return normalized
|
cda1cd62
tangwang
意图分析&应用 baseline
|
144
|
|
cda1cd62
tangwang
意图分析&应用 baseline
|
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
def _resolve_dimensions(
self,
source: Dict[str, Any],
style_profile: StyleIntentProfile,
) -> Dict[str, Optional[str]]:
option_names = {
"option1_value": normalize_query_text(source.get("option1_name")),
"option2_value": normalize_query_text(source.get("option2_name")),
"option3_value": normalize_query_text(source.get("option3_name")),
}
resolved: Dict[str, Optional[str]] = {}
for intent in style_profile.intents:
if intent.intent_type in resolved:
continue
aliases = set(intent.dimension_aliases or self.registry.get_dimension_aliases(intent.intent_type))
matched_field = None
for field_name, option_name in option_names.items():
if option_name and option_name in aliases:
matched_field = field_name
break
resolved[intent.intent_type] = matched_field
return resolved
|
cda1cd62
tangwang
意图分析&应用 baseline
|
168
|
@staticmethod
|
2efad04b
tangwang
意图匹配的性能优化:
|
169
170
171
172
173
174
175
176
177
178
179
180
181
|
def _empty_decision(
resolved_dimensions: Dict[str, Optional[str]],
matched_stage: str,
) -> SkuSelectionDecision:
return SkuSelectionDecision(
selected_sku_id=None,
rerank_suffix="",
selected_text="",
matched_stage=matched_stage,
resolved_dimensions=dict(resolved_dimensions),
)
def _is_text_match(
|
cda1cd62
tangwang
意图分析&应用 baseline
|
182
|
self,
|
2efad04b
tangwang
意图匹配的性能优化:
|
183
|
intent_type: str,
|
2efad04b
tangwang
意图匹配的性能优化:
|
184
|
selection_context: _SelectionContext,
|
6adbf18a
tangwang
reranker提示词优化
|
185
|
*,
|
b712a831
tangwang
意图识别策略和性能优化
|
186
|
normalized_value: str,
|
cda1cd62
tangwang
意图分析&应用 baseline
|
187
|
) -> bool:
|
2efad04b
tangwang
意图匹配的性能优化:
|
188
|
if not normalized_value:
|
cda1cd62
tangwang
意图分析&应用 baseline
|
189
190
|
return False
|
2efad04b
tangwang
意图匹配的性能优化:
|
191
192
193
194
195
|
cache_key = (intent_type, normalized_value)
cached = selection_context.text_match_cache.get(cache_key)
if cached is not None:
return cached
|
b712a831
tangwang
意图识别策略和性能优化
|
196
197
|
attribute_terms = selection_context.attribute_terms_by_intent.get(intent_type, ())
matched = any(term in normalized_value for term in attribute_terms if term)
|
2efad04b
tangwang
意图匹配的性能优化:
|
198
199
200
201
202
|
selection_context.text_match_cache[cache_key] = matched
return matched
def _find_first_text_match(
self,
|
b712a831
tangwang
意图识别策略和性能优化
|
203
204
|
skus: List[Dict[str, Any]],
resolved_dimensions: Dict[str, Optional[str]],
|
2efad04b
tangwang
意图匹配的性能优化:
|
205
|
selection_context: _SelectionContext,
|
b712a831
tangwang
意图识别策略和性能优化
|
206
207
208
209
210
|
) -> Optional[Tuple[str, str]]:
for sku in skus:
selection_parts: List[str] = []
seen_parts: set[str] = set()
matched = True
|
cda1cd62
tangwang
意图分析&应用 baseline
|
211
|
|
b712a831
tangwang
意图识别策略和性能优化
|
212
213
214
215
|
for intent_type, field_name in resolved_dimensions.items():
if not field_name:
matched = False
break
|
2efad04b
tangwang
意图匹配的性能优化:
|
216
|
|
b712a831
tangwang
意图识别策略和性能优化
|
217
218
219
220
221
222
223
224
225
|
raw_value = str(sku.get(field_name) or "").strip()
normalized_value = self._normalize_cached(selection_context, raw_value)
if not self._is_text_match(
intent_type,
selection_context,
normalized_value=normalized_value,
):
matched = False
break
|
2efad04b
tangwang
意图匹配的性能优化:
|
226
|
|
b712a831
tangwang
意图识别策略和性能优化
|
227
228
229
|
if raw_value and normalized_value not in seen_parts:
seen_parts.add(normalized_value)
selection_parts.append(raw_value)
|
cda1cd62
tangwang
意图分析&应用 baseline
|
230
|
|
b712a831
tangwang
意图识别策略和性能优化
|
231
232
233
|
if matched:
return str(sku.get("sku_id") or ""), " ".join(selection_parts).strip()
return None
|
cda1cd62
tangwang
意图分析&应用 baseline
|
234
235
236
237
238
239
|
def _select_for_source(
self,
source: Dict[str, Any],
*,
style_profile: StyleIntentProfile,
|
2efad04b
tangwang
意图匹配的性能优化:
|
240
|
selection_context: _SelectionContext,
|
cda1cd62
tangwang
意图分析&应用 baseline
|
241
242
243
244
245
246
|
) -> Optional[SkuSelectionDecision]:
skus = source.get("skus")
if not isinstance(skus, list) or not skus:
return None
resolved_dimensions = self._resolve_dimensions(source, style_profile)
|
2efad04b
tangwang
意图匹配的性能优化:
|
247
248
249
|
if not resolved_dimensions or any(not field_name for field_name in resolved_dimensions.values()):
return self._empty_decision(resolved_dimensions, matched_stage="unresolved")
|
b712a831
tangwang
意图识别策略和性能优化
|
250
251
|
text_match = self._find_first_text_match(skus, resolved_dimensions, selection_context)
if text_match is None:
|
2efad04b
tangwang
意图匹配的性能优化:
|
252
|
return self._empty_decision(resolved_dimensions, matched_stage="no_match")
|
cda1cd62
tangwang
意图分析&应用 baseline
|
253
|
return self._build_decision(
|
b712a831
tangwang
意图识别策略和性能优化
|
254
255
256
257
|
selected_sku_id=text_match[0],
selected_text=text_match[1],
resolved_dimensions=resolved_dimensions,
matched_stage="text",
|
cda1cd62
tangwang
意图分析&应用 baseline
|
258
259
260
261
|
)
@staticmethod
def _build_decision(
|
b712a831
tangwang
意图识别策略和性能优化
|
262
263
|
selected_sku_id: str,
selected_text: str,
|
cda1cd62
tangwang
意图分析&应用 baseline
|
264
265
266
267
268
269
|
resolved_dimensions: Dict[str, Optional[str]],
*,
matched_stage: str,
similarity_score: Optional[float] = None,
) -> SkuSelectionDecision:
return SkuSelectionDecision(
|
b712a831
tangwang
意图识别策略和性能优化
|
270
271
272
|
selected_sku_id=selected_sku_id or None,
rerank_suffix=str(selected_text or "").strip(),
selected_text=str(selected_text or "").strip(),
|
cda1cd62
tangwang
意图分析&应用 baseline
|
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|
matched_stage=matched_stage,
similarity_score=similarity_score,
resolved_dimensions=dict(resolved_dimensions),
)
@staticmethod
def _apply_decision_to_source(source: Dict[str, Any], decision: SkuSelectionDecision) -> None:
skus = source.get("skus")
if not isinstance(skus, list) or not skus or not decision.selected_sku_id:
return
selected_index = None
for index, sku in enumerate(skus):
if str(sku.get("sku_id") or "") == decision.selected_sku_id:
selected_index = index
break
if selected_index is None:
return
selected_sku = skus.pop(selected_index)
skus.insert(0, selected_sku)
image_src = selected_sku.get("image_src") or selected_sku.get("imageSrc")
if image_src:
source["image_url"] = image_src
|