Commit 4202440950af6b1f1c53c7e165d6f12d2dfeec7e

Authored by tangwang
1 parent 286e9b4f

评估框架-批量打标

docs/常用查询 - ES.md
@@ -659,7 +659,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ @@ -659,7 +659,7 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
659 "query": { 659 "query": {
660 "bool": { 660 "bool": {
661 "filter": [ 661 "filter": [
662 - { "exists": { "field": "image_embedding" } } 662 + { "exists": { "field": "title_embedding" } }
663 ] 663 ]
664 } 664 }
665 } 665 }
query/query_parser.py
@@ -27,7 +27,7 @@ from .product_title_exclusion import ( @@ -27,7 +27,7 @@ from .product_title_exclusion import (
27 ) 27 )
28 from .query_rewriter import QueryRewriter, QueryNormalizer 28 from .query_rewriter import QueryRewriter, QueryNormalizer
29 from .style_intent import StyleIntentDetector, StyleIntentProfile, StyleIntentRegistry 29 from .style_intent import StyleIntentDetector, StyleIntentProfile, StyleIntentRegistry
30 -from .tokenization import QueryTextAnalysisCache, contains_han_text, extract_token_strings 30 +from .tokenization import QueryTextAnalysisCache, extract_token_strings
31 from .keyword_extractor import KeywordExtractor, collect_keywords_queries 31 from .keyword_extractor import KeywordExtractor, collect_keywords_queries
32 32
33 logger = logging.getLogger(__name__) 33 logger = logging.getLogger(__name__)
@@ -289,31 +289,12 @@ class QueryParser: @@ -289,31 +289,12 @@ class QueryParser:
289 def _get_query_tokens(self, query: str) -> List[str]: 289 def _get_query_tokens(self, query: str) -> List[str]:
290 return self._extract_tokens(self._tokenizer(query)) 290 return self._extract_tokens(self._tokenizer(query))
291 291
292 - @staticmethod  
293 - def _is_ascii_latin_query(text: str) -> bool:  
294 - candidate = str(text or "").strip()  
295 - if not candidate or contains_han_text(candidate):  
296 - return False  
297 - try:  
298 - candidate.encode("ascii")  
299 - except UnicodeEncodeError:  
300 - return False  
301 - return any(ch.isalpha() for ch in candidate)  
302 -  
303 def _detect_query_language( 292 def _detect_query_language(
304 self, 293 self,
305 query_text: str, 294 query_text: str,
306 *, 295 *,
307 target_languages: Optional[List[str]] = None, 296 target_languages: Optional[List[str]] = None,
308 ) -> str: 297 ) -> str:
309 - normalized_targets = self._normalize_language_codes(target_languages)  
310 - supported_languages = self._normalize_language_codes(  
311 - getattr(self.config.query_config, "supported_languages", None)  
312 - )  
313 - active_languages = normalized_targets or supported_languages  
314 - if active_languages and set(active_languages).issubset({"en", "zh"}):  
315 - if self._is_ascii_latin_query(query_text):  
316 - return "en"  
317 return self.language_detector.detect(query_text) 298 return self.language_detector.detect(query_text)
318 299
319 def parse( 300 def parse(
scripts/evaluation/eval_framework/clients.py
@@ -82,6 +82,18 @@ def _canonicalize_judge_label(raw: str) -> str | None: @@ -82,6 +82,18 @@ def _canonicalize_judge_label(raw: str) -> str | None:
82 return None 82 return None
83 83
84 84
  85 +def _describe_request_exception(exc: requests.exceptions.RequestException) -> str:
  86 + if isinstance(exc, requests.exceptions.HTTPError):
  87 + response = getattr(exc, "response", None)
  88 + if response is None:
  89 + return str(exc)
  90 + body = str(getattr(response, "text", "") or "").strip()
  91 + if len(body) > 600:
  92 + body = body[:600].rstrip() + "...[truncated]"
  93 + return f"status={response.status_code} body={body or '<empty>'}"
  94 + return str(exc)
  95 +
  96 +
85 class SearchServiceClient: 97 class SearchServiceClient:
86 def __init__(self, base_url: str, tenant_id: str): 98 def __init__(self, base_url: str, tenant_id: str):
87 self.base_url = base_url.rstrip("/") 99 self.base_url = base_url.rstrip("/")
@@ -341,7 +353,8 @@ class DashScopeLabelClient: @@ -341,7 +353,8 @@ class DashScopeLabelClient:
341 except Exception as exc: 353 except Exception as exc:
342 last_exc = exc 354 last_exc = exc
343 is_request_error = isinstance(exc, requests.exceptions.RequestException) 355 is_request_error = isinstance(exc, requests.exceptions.RequestException)
344 - if not is_request_error or attempt >= self.retry_attempts: 356 + is_transient = is_request_error and self._is_transient_request_error(exc)
  357 + if not is_transient or attempt >= self.retry_attempts:
345 raise 358 raise
346 _client_log.warning( 359 _client_log.warning(
347 "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s", 360 "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s",
@@ -350,7 +363,7 @@ class DashScopeLabelClient: @@ -350,7 +363,7 @@ class DashScopeLabelClient:
350 phase, 363 phase,
351 self.model, 364 self.model,
352 self.use_batch, 365 self.use_batch,
353 - exc, 366 + _describe_request_exception(exc),
354 ) 367 )
355 time.sleep(self.retry_delay_sec) 368 time.sleep(self.retry_delay_sec)
356 else: 369 else:
@@ -366,6 +379,17 @@ class DashScopeLabelClient: @@ -366,6 +379,17 @@ class DashScopeLabelClient:
366 ) 379 )
367 return content, raw 380 return content, raw
368 381
  382 + @staticmethod
  383 + def _is_transient_request_error(exc: requests.exceptions.RequestException) -> bool:
  384 + if isinstance(exc, (requests.exceptions.ConnectionError, requests.exceptions.Timeout)):
  385 + return True
  386 + if isinstance(exc, requests.exceptions.HTTPError):
  387 + response = getattr(exc, "response", None)
  388 + if response is None:
  389 + return True
  390 + return int(response.status_code) in _TRANSIENT_HTTP_STATUS_CODES
  391 + return False
  392 +
369 def _find_batch_line_for_custom_id( 393 def _find_batch_line_for_custom_id(
370 self, 394 self,
371 file_id: Optional[str], 395 file_id: Optional[str],
scripts/evaluation/eval_framework/utils.py
@@ -11,6 +11,20 @@ from typing import Any, Dict, List, Sequence, Tuple @@ -11,6 +11,20 @@ from typing import Any, Dict, List, Sequence, Tuple
11 11
12 from .constants import PROJECT_ROOT 12 from .constants import PROJECT_ROOT
13 13
  14 +_LABEL_OPTION_MAX_CHARS = 40
  15 +_LABEL_DOC_LINE_MAX_CHARS = 260
  16 +
  17 +
  18 +def _truncate_text(value: Any, max_chars: int) -> str:
  19 + text = str(value or "").strip()
  20 + if max_chars <= 0:
  21 + return ""
  22 + if len(text) <= max_chars:
  23 + return text
  24 + if max_chars <= 3:
  25 + return text[:max_chars]
  26 + return text[: max_chars - 3].rstrip() + "..."
  27 +
14 28
15 def utc_now_iso() -> str: 29 def utc_now_iso() -> str:
16 return datetime.now(timezone.utc).isoformat() 30 return datetime.now(timezone.utc).isoformat()
@@ -67,11 +81,7 @@ def compact_option_values(skus: Sequence[Dict[str, Any]]) -&gt; Tuple[str, str, str @@ -67,11 +81,7 @@ def compact_option_values(skus: Sequence[Dict[str, Any]]) -&gt; Tuple[str, str, str
67 81
68 def build_display_title(doc: Dict[str, Any]) -> str: 82 def build_display_title(doc: Dict[str, Any]) -> str:
69 title = doc.get("title") 83 title = doc.get("title")
70 - en = pick_text(title, "en")  
71 - zh = pick_text(title, "zh")  
72 - if en and zh and en != zh:  
73 - return f"{en} / {zh}"  
74 - return en or zh 84 + return pick_text(title, "en") or pick_text(title, "zh")
75 85
76 86
77 def build_rerank_doc(doc: Dict[str, Any]) -> str: 87 def build_rerank_doc(doc: Dict[str, Any]) -> str:
@@ -82,14 +92,15 @@ def build_rerank_doc(doc: Dict[str, Any]) -&gt; str: @@ -82,14 +92,15 @@ def build_rerank_doc(doc: Dict[str, Any]) -&gt; str:
82 def build_label_doc_line(idx: int, doc: Dict[str, Any]) -> str: 92 def build_label_doc_line(idx: int, doc: Dict[str, Any]) -> str:
83 title = build_display_title(doc) 93 title = build_display_title(doc)
84 option1, option2, option3 = compact_option_values(doc.get("skus") or []) 94 option1, option2, option3 = compact_option_values(doc.get("skus") or [])
85 - vendor = pick_text(doc.get("vendor"), "en")  
86 - category = pick_text(doc.get("category_path"), "en") or pick_text(doc.get("category_name"), "en")  
87 parts = [title] 95 parts = [title]
88 if option1: 96 if option1:
89 - parts.append(f"{option1}") 97 + parts.append(_truncate_text(option1, _LABEL_OPTION_MAX_CHARS))
90 if option2: 98 if option2:
91 - parts.append(f"{option2}")  
92 - return f"{idx}. " + " ".join(part for part in parts if part) 99 + parts.append(_truncate_text(option2, _LABEL_OPTION_MAX_CHARS))
  100 + if option3:
  101 + parts.append(_truncate_text(option3, _LABEL_OPTION_MAX_CHARS))
  102 + line = " ".join(part for part in parts if part)
  103 + return _truncate_text(f"{idx}. {line}", _LABEL_DOC_LINE_MAX_CHARS)
93 104
94 105
95 def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]: 106 def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]:
tests/test_eval_framework_clients.py 0 → 100644
@@ -0,0 +1,71 @@ @@ -0,0 +1,71 @@
  1 +import pytest
  2 +import requests
  3 +
  4 +from scripts.evaluation.eval_framework.clients import DashScopeLabelClient
  5 +from scripts.evaluation.eval_framework.utils import build_label_doc_line
  6 +
  7 +
  8 +def _http_error(status_code: int, body: str) -> requests.exceptions.HTTPError:
  9 + response = requests.Response()
  10 + response.status_code = status_code
  11 + response._content = body.encode("utf-8")
  12 + response.url = "https://dashscope-us.aliyuncs.com/compatible-mode/v1/chat/completions"
  13 + return requests.exceptions.HTTPError(f"{status_code} error", response=response)
  14 +
  15 +
  16 +def test_build_label_doc_line_truncates_long_fields():
  17 + doc = {
  18 + "title": {"en": "Minimalist Top " * 40},
  19 + "skus": [
  20 + {
  21 + "option1_value": "Very Long Color Name " * 10,
  22 + "option2_value": "Very Long Size Name " * 10,
  23 + "option3_value": "Very Long Material Name " * 10,
  24 + }
  25 + ],
  26 + }
  27 +
  28 + line = build_label_doc_line(1, doc)
  29 +
  30 + assert line.startswith("1. ")
  31 + assert len(line) <= 260
  32 + assert line.endswith("...")
  33 +
  34 +
  35 +def test_dashscope_chat_does_not_retry_non_transient_400(monkeypatch):
  36 + client = DashScopeLabelClient(model="qwen3.5-plus", base_url="https://example.com", api_key="test")
  37 + client.retry_attempts = 4
  38 + client.retry_delay_sec = 0
  39 + calls = {"count": 0}
  40 +
  41 + def fake_chat_sync(prompt: str):
  42 + calls["count"] += 1
  43 + raise _http_error(400, '{"code":"InvalidParameter"}')
  44 +
  45 + monkeypatch.setattr(client, "_chat_sync", fake_chat_sync)
  46 +
  47 + with pytest.raises(requests.exceptions.HTTPError):
  48 + client._chat("prompt", phase="relevance_classify")
  49 +
  50 + assert calls["count"] == 1
  51 +
  52 +
  53 +def test_dashscope_chat_retries_transient_500(monkeypatch):
  54 + client = DashScopeLabelClient(model="qwen3.5-plus", base_url="https://example.com", api_key="test")
  55 + client.retry_attempts = 4
  56 + client.retry_delay_sec = 0
  57 + calls = {"count": 0}
  58 +
  59 + def fake_chat_sync(prompt: str):
  60 + calls["count"] += 1
  61 + if calls["count"] < 3:
  62 + raise _http_error(500, '{"code":"InternalError"}')
  63 + return "Exact Match", '{"choices":[{"message":{"content":"Exact Match"}}]}'
  64 +
  65 + monkeypatch.setattr(client, "_chat_sync", fake_chat_sync)
  66 +
  67 + content, raw = client._chat("prompt", phase="relevance_classify")
  68 +
  69 + assert content == "Exact Match"
  70 + assert "Exact Match" in raw
  71 + assert calls["count"] == 3
tests/test_query_parser_mixed_language.py
@@ -136,20 +136,24 @@ def test_parse_reuses_tokenization_across_tail_stages(monkeypatch): @@ -136,20 +136,24 @@ def test_parse_reuses_tokenization_across_tail_stages(monkeypatch):
136 assert tokenize_calls == [] 136 assert tokenize_calls == []
137 137
138 138
139 -def test_parse_fast_path_detects_ascii_query_as_english_without_lingua(monkeypatch): 139 +def test_parse_ascii_latin_query_uses_language_detector(monkeypatch):
140 parser = QueryParser(_build_config(), translator=_DummyTranslator(), tokenizer=_tokenizer) 140 parser = QueryParser(_build_config(), translator=_DummyTranslator(), tokenizer=_tokenizer)
  141 + detector_calls = []
  142 +
141 monkeypatch.setattr( 143 monkeypatch.setattr(
142 parser.language_detector, 144 parser.language_detector,
143 "detect", 145 "detect",
144 - lambda text: (_ for _ in ()).throw(AssertionError("Lingua path should not be used")), 146 + lambda text: detector_calls.append(text) or "es",
145 ) 147 )
146 148
147 result = parser.parse( 149 result = parser.parse(
148 - "street t-shirt women", 150 + "falda negra oficina",
149 tenant_id="0", 151 tenant_id="0",
150 generate_vector=False, 152 generate_vector=False,
151 target_languages=["en", "zh"], 153 target_languages=["en", "zh"],
152 ) 154 )
153 155
154 - assert result.detected_language == "en"  
155 - assert result.query_tokens == ["street", "t-shirt", "women"] 156 + assert detector_calls == ["falda negra oficina"]
  157 + assert result.detected_language == "es"
  158 + assert result.translations == {"en": "falda negra oficina-en", "zh": "falda negra oficina-zh"}
  159 + assert result.query_tokens == ["falda", "negra", "oficina"]