Commit 6adbf18af72d462a53cd40715fb67cbce924b0d0

Authored by tangwang
1 parent 2efad04b

reranker提示词优化

config/config.yaml
@@ -397,9 +397,15 @@ services: @@ -397,9 +397,15 @@ services:
397 enforce_eager: false 397 enforce_eager: false
398 infer_batch_size: 100 398 infer_batch_size: 100
399 sort_by_doc_length: true 399 sort_by_doc_length: true
400 - # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点  
401 - instruction: "rank products by given query"  
402 # instruction: "Given a query, score the product for relevance" 400 # instruction: "Given a query, score the product for relevance"
  401 + # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
  402 + # instruction: "rank products by given query, category match first"
  403 + # instruction: "Rank products by query relevance, prioritizing category match"
  404 + # instruction: "Rank products by query relevance, prioritizing category and style match"
  405 + # instruction: "Rank by query relevance, prioritize category & style"
  406 + # instruction: "Relevance ranking: category & style match first"
  407 + # instruction: "Score product relevance by query with category & style match prioritized"
  408 + instruction: "Rank products by query with category & style match prioritized"
403 qwen3_transformers: 409 qwen3_transformers:
404 model_name: "Qwen/Qwen3-Reranker-0.6B" 410 model_name: "Qwen/Qwen3-Reranker-0.6B"
405 instruction: "rank products by given query" 411 instruction: "rank products by given query"
frontend/static/js/tenant_facets_config.js
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 // 根据不同的 tenant_id 配置不同的分面字段名、显示标签和容器ID 2 // 根据不同的 tenant_id 配置不同的分面字段名、显示标签和容器ID
3 const TENANT_FACETS_CONFIG = { 3 const TENANT_FACETS_CONFIG = {
4 // tenant_id=162: 使用小写的规格名称 4 // tenant_id=162: 使用小写的规格名称
5 - "162": { 5 + "163": {
6 specificationFields: [ 6 specificationFields: [
7 { 7 {
8 field: "specifications.color", 8 field: "specifications.color",
search/sku_intent_selector.py
@@ -41,6 +41,7 @@ class _SkuCandidate: @@ -41,6 +41,7 @@ class _SkuCandidate:
41 selection_text: str 41 selection_text: str
42 normalized_selection_text: str 42 normalized_selection_text: str
43 intent_values: Dict[str, str] 43 intent_values: Dict[str, str]
  44 + normalized_intent_values: Dict[str, str]
44 45
45 46
46 @dataclass 47 @dataclass
@@ -235,29 +236,36 @@ class StyleSkuSelector: @@ -235,29 +236,36 @@ class StyleSkuSelector:
235 candidates: List[_SkuCandidate] = [] 236 candidates: List[_SkuCandidate] = []
236 for index, sku in enumerate(skus): 237 for index, sku in enumerate(skus):
237 intent_values: Dict[str, str] = {} 238 intent_values: Dict[str, str] = {}
  239 + normalized_intent_values: Dict[str, str] = {}
238 for intent_type, field_name in resolved_dimensions.items(): 240 for intent_type, field_name in resolved_dimensions.items():
239 if not field_name: 241 if not field_name:
240 continue 242 continue
241 - intent_values[intent_type] = str(sku.get(field_name) or "").strip() 243 + raw = str(sku.get(field_name) or "").strip()
  244 + intent_values[intent_type] = raw
  245 + normalized_intent_values[intent_type] = normalize_query_text(raw)
242 246
243 selection_parts: List[str] = [] 247 selection_parts: List[str] = []
244 - seen = set()  
245 - for value in intent_values.values():  
246 - normalized = normalize_query_text(value)  
247 - if not normalized or normalized in seen: 248 + norm_parts: List[str] = []
  249 + seen: set[str] = set()
  250 + for intent_type, raw in intent_values.items():
  251 + nv = normalized_intent_values[intent_type]
  252 + if not nv or nv in seen:
248 continue 253 continue
249 - seen.add(normalized)  
250 - selection_parts.append(value) 254 + seen.add(nv)
  255 + selection_parts.append(raw)
  256 + norm_parts.append(nv)
251 257
252 selection_text = " ".join(selection_parts).strip() 258 selection_text = " ".join(selection_parts).strip()
  259 + normalized_selection_text = " ".join(norm_parts).strip()
253 candidates.append( 260 candidates.append(
254 _SkuCandidate( 261 _SkuCandidate(
255 index=index, 262 index=index,
256 sku_id=str(sku.get("sku_id") or ""), 263 sku_id=str(sku.get("sku_id") or ""),
257 sku=sku, 264 sku=sku,
258 selection_text=selection_text, 265 selection_text=selection_text,
259 - normalized_selection_text=normalize_query_text(selection_text), 266 + normalized_selection_text=normalized_selection_text,
260 intent_values=intent_values, 267 intent_values=intent_values,
  268 + normalized_intent_values=normalized_intent_values,
261 ) 269 )
262 ) 270 )
263 return candidates 271 return candidates
@@ -280,8 +288,11 @@ class StyleSkuSelector: @@ -280,8 +288,11 @@ class StyleSkuSelector:
280 intent_type: str, 288 intent_type: str,
281 value: str, 289 value: str,
282 selection_context: _SelectionContext, 290 selection_context: _SelectionContext,
  291 + *,
  292 + normalized_value: Optional[str] = None,
283 ) -> bool: 293 ) -> bool:
284 - normalized_value = normalize_query_text(value) 294 + if normalized_value is None:
  295 + normalized_value = normalize_query_text(value)
285 if not normalized_value: 296 if not normalized_value:
286 return False 297 return False
287 298
@@ -307,7 +318,12 @@ class StyleSkuSelector: @@ -307,7 +318,12 @@ class StyleSkuSelector:
307 ) -> Optional[_SkuCandidate]: 318 ) -> Optional[_SkuCandidate]:
308 for candidate in candidates: 319 for candidate in candidates:
309 if candidate.intent_values and all( 320 if candidate.intent_values and all(
310 - self._is_text_match(intent_type, value, selection_context) 321 + self._is_text_match(
  322 + intent_type,
  323 + value,
  324 + selection_context,
  325 + normalized_value=candidate.normalized_intent_values[intent_type],
  326 + )
311 for intent_type, value in candidate.intent_values.items() 327 for intent_type, value in candidate.intent_values.items()
312 ): 328 ):
313 return candidate 329 return candidate