Commit 4202440950af6b1f1c53c7e165d6f12d2dfeec7e
1 parent
286e9b4f
评估框架-批量打标
Showing
6 changed files
with
129 additions
and
38 deletions
Show diff stats
docs/常用查询 - ES.md
query/query_parser.py
| ... | ... | @@ -27,7 +27,7 @@ from .product_title_exclusion import ( |
| 27 | 27 | ) |
| 28 | 28 | from .query_rewriter import QueryRewriter, QueryNormalizer |
| 29 | 29 | from .style_intent import StyleIntentDetector, StyleIntentProfile, StyleIntentRegistry |
| 30 | -from .tokenization import QueryTextAnalysisCache, contains_han_text, extract_token_strings | |
| 30 | +from .tokenization import QueryTextAnalysisCache, extract_token_strings | |
| 31 | 31 | from .keyword_extractor import KeywordExtractor, collect_keywords_queries |
| 32 | 32 | |
| 33 | 33 | logger = logging.getLogger(__name__) |
| ... | ... | @@ -289,31 +289,12 @@ class QueryParser: |
| 289 | 289 | def _get_query_tokens(self, query: str) -> List[str]: |
| 290 | 290 | return self._extract_tokens(self._tokenizer(query)) |
| 291 | 291 | |
| 292 | - @staticmethod | |
| 293 | - def _is_ascii_latin_query(text: str) -> bool: | |
| 294 | - candidate = str(text or "").strip() | |
| 295 | - if not candidate or contains_han_text(candidate): | |
| 296 | - return False | |
| 297 | - try: | |
| 298 | - candidate.encode("ascii") | |
| 299 | - except UnicodeEncodeError: | |
| 300 | - return False | |
| 301 | - return any(ch.isalpha() for ch in candidate) | |
| 302 | - | |
| 303 | 292 | def _detect_query_language( |
| 304 | 293 | self, |
| 305 | 294 | query_text: str, |
| 306 | 295 | *, |
| 307 | 296 | target_languages: Optional[List[str]] = None, |
| 308 | 297 | ) -> str: |
| 309 | - normalized_targets = self._normalize_language_codes(target_languages) | |
| 310 | - supported_languages = self._normalize_language_codes( | |
| 311 | - getattr(self.config.query_config, "supported_languages", None) | |
| 312 | - ) | |
| 313 | - active_languages = normalized_targets or supported_languages | |
| 314 | - if active_languages and set(active_languages).issubset({"en", "zh"}): | |
| 315 | - if self._is_ascii_latin_query(query_text): | |
| 316 | - return "en" | |
| 317 | 298 | return self.language_detector.detect(query_text) |
| 318 | 299 | |
| 319 | 300 | def parse( | ... | ... |
scripts/evaluation/eval_framework/clients.py
| ... | ... | @@ -82,6 +82,18 @@ def _canonicalize_judge_label(raw: str) -> str | None: |
| 82 | 82 | return None |
| 83 | 83 | |
| 84 | 84 | |
| 85 | +def _describe_request_exception(exc: requests.exceptions.RequestException) -> str: | |
| 86 | + if isinstance(exc, requests.exceptions.HTTPError): | |
| 87 | + response = getattr(exc, "response", None) | |
| 88 | + if response is None: | |
| 89 | + return str(exc) | |
| 90 | + body = str(getattr(response, "text", "") or "").strip() | |
| 91 | + if len(body) > 600: | |
| 92 | + body = body[:600].rstrip() + "...[truncated]" | |
| 93 | + return f"status={response.status_code} body={body or '<empty>'}" | |
| 94 | + return str(exc) | |
| 95 | + | |
| 96 | + | |
| 85 | 97 | class SearchServiceClient: |
| 86 | 98 | def __init__(self, base_url: str, tenant_id: str): |
| 87 | 99 | self.base_url = base_url.rstrip("/") |
| ... | ... | @@ -341,7 +353,8 @@ class DashScopeLabelClient: |
| 341 | 353 | except Exception as exc: |
| 342 | 354 | last_exc = exc |
| 343 | 355 | is_request_error = isinstance(exc, requests.exceptions.RequestException) |
| 344 | - if not is_request_error or attempt >= self.retry_attempts: | |
| 356 | + is_transient = is_request_error and self._is_transient_request_error(exc) | |
| 357 | + if not is_transient or attempt >= self.retry_attempts: | |
| 345 | 358 | raise |
| 346 | 359 | _client_log.warning( |
| 347 | 360 | "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s", |
| ... | ... | @@ -350,7 +363,7 @@ class DashScopeLabelClient: |
| 350 | 363 | phase, |
| 351 | 364 | self.model, |
| 352 | 365 | self.use_batch, |
| 353 | - exc, | |
| 366 | + _describe_request_exception(exc), | |
| 354 | 367 | ) |
| 355 | 368 | time.sleep(self.retry_delay_sec) |
| 356 | 369 | else: |
| ... | ... | @@ -366,6 +379,17 @@ class DashScopeLabelClient: |
| 366 | 379 | ) |
| 367 | 380 | return content, raw |
| 368 | 381 | |
| 382 | + @staticmethod | |
| 383 | + def _is_transient_request_error(exc: requests.exceptions.RequestException) -> bool: | |
| 384 | + if isinstance(exc, (requests.exceptions.ConnectionError, requests.exceptions.Timeout)): | |
| 385 | + return True | |
| 386 | + if isinstance(exc, requests.exceptions.HTTPError): | |
| 387 | + response = getattr(exc, "response", None) | |
| 388 | + if response is None: | |
| 389 | + return True | |
| 390 | + return int(response.status_code) in _TRANSIENT_HTTP_STATUS_CODES | |
| 391 | + return False | |
| 392 | + | |
| 369 | 393 | def _find_batch_line_for_custom_id( |
| 370 | 394 | self, |
| 371 | 395 | file_id: Optional[str], | ... | ... |
scripts/evaluation/eval_framework/utils.py
| ... | ... | @@ -11,6 +11,20 @@ from typing import Any, Dict, List, Sequence, Tuple |
| 11 | 11 | |
| 12 | 12 | from .constants import PROJECT_ROOT |
| 13 | 13 | |
| 14 | +_LABEL_OPTION_MAX_CHARS = 40 | |
| 15 | +_LABEL_DOC_LINE_MAX_CHARS = 260 | |
| 16 | + | |
| 17 | + | |
| 18 | +def _truncate_text(value: Any, max_chars: int) -> str: | |
| 19 | + text = str(value or "").strip() | |
| 20 | + if max_chars <= 0: | |
| 21 | + return "" | |
| 22 | + if len(text) <= max_chars: | |
| 23 | + return text | |
| 24 | + if max_chars <= 3: | |
| 25 | + return text[:max_chars] | |
| 26 | + return text[: max_chars - 3].rstrip() + "..." | |
| 27 | + | |
| 14 | 28 | |
| 15 | 29 | def utc_now_iso() -> str: |
| 16 | 30 | return datetime.now(timezone.utc).isoformat() |
| ... | ... | @@ -67,11 +81,7 @@ def compact_option_values(skus: Sequence[Dict[str, Any]]) -> Tuple[str, str, str |
| 67 | 81 | |
| 68 | 82 | def build_display_title(doc: Dict[str, Any]) -> str: |
| 69 | 83 | title = doc.get("title") |
| 70 | - en = pick_text(title, "en") | |
| 71 | - zh = pick_text(title, "zh") | |
| 72 | - if en and zh and en != zh: | |
| 73 | - return f"{en} / {zh}" | |
| 74 | - return en or zh | |
| 84 | + return pick_text(title, "en") or pick_text(title, "zh") | |
| 75 | 85 | |
| 76 | 86 | |
| 77 | 87 | def build_rerank_doc(doc: Dict[str, Any]) -> str: |
| ... | ... | @@ -82,14 +92,15 @@ def build_rerank_doc(doc: Dict[str, Any]) -> str: |
| 82 | 92 | def build_label_doc_line(idx: int, doc: Dict[str, Any]) -> str: |
| 83 | 93 | title = build_display_title(doc) |
| 84 | 94 | option1, option2, option3 = compact_option_values(doc.get("skus") or []) |
| 85 | - vendor = pick_text(doc.get("vendor"), "en") | |
| 86 | - category = pick_text(doc.get("category_path"), "en") or pick_text(doc.get("category_name"), "en") | |
| 87 | 95 | parts = [title] |
| 88 | 96 | if option1: |
| 89 | - parts.append(f"{option1}") | |
| 97 | + parts.append(_truncate_text(option1, _LABEL_OPTION_MAX_CHARS)) | |
| 90 | 98 | if option2: |
| 91 | - parts.append(f"{option2}") | |
| 92 | - return f"{idx}. " + " ".join(part for part in parts if part) | |
| 99 | + parts.append(_truncate_text(option2, _LABEL_OPTION_MAX_CHARS)) | |
| 100 | + if option3: | |
| 101 | + parts.append(_truncate_text(option3, _LABEL_OPTION_MAX_CHARS)) | |
| 102 | + line = " ".join(part for part in parts if part) | |
| 103 | + return _truncate_text(f"{idx}. {line}", _LABEL_DOC_LINE_MAX_CHARS) | |
| 93 | 104 | |
| 94 | 105 | |
| 95 | 106 | def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]: | ... | ... |
| ... | ... | @@ -0,0 +1,71 @@ |
| 1 | +import pytest | |
| 2 | +import requests | |
| 3 | + | |
| 4 | +from scripts.evaluation.eval_framework.clients import DashScopeLabelClient | |
| 5 | +from scripts.evaluation.eval_framework.utils import build_label_doc_line | |
| 6 | + | |
| 7 | + | |
| 8 | +def _http_error(status_code: int, body: str) -> requests.exceptions.HTTPError: | |
| 9 | + response = requests.Response() | |
| 10 | + response.status_code = status_code | |
| 11 | + response._content = body.encode("utf-8") | |
| 12 | + response.url = "https://dashscope-us.aliyuncs.com/compatible-mode/v1/chat/completions" | |
| 13 | + return requests.exceptions.HTTPError(f"{status_code} error", response=response) | |
| 14 | + | |
| 15 | + | |
| 16 | +def test_build_label_doc_line_truncates_long_fields(): | |
| 17 | + doc = { | |
| 18 | + "title": {"en": "Minimalist Top " * 40}, | |
| 19 | + "skus": [ | |
| 20 | + { | |
| 21 | + "option1_value": "Very Long Color Name " * 10, | |
| 22 | + "option2_value": "Very Long Size Name " * 10, | |
| 23 | + "option3_value": "Very Long Material Name " * 10, | |
| 24 | + } | |
| 25 | + ], | |
| 26 | + } | |
| 27 | + | |
| 28 | + line = build_label_doc_line(1, doc) | |
| 29 | + | |
| 30 | + assert line.startswith("1. ") | |
| 31 | + assert len(line) <= 260 | |
| 32 | + assert line.endswith("...") | |
| 33 | + | |
| 34 | + | |
| 35 | +def test_dashscope_chat_does_not_retry_non_transient_400(monkeypatch): | |
| 36 | + client = DashScopeLabelClient(model="qwen3.5-plus", base_url="https://example.com", api_key="test") | |
| 37 | + client.retry_attempts = 4 | |
| 38 | + client.retry_delay_sec = 0 | |
| 39 | + calls = {"count": 0} | |
| 40 | + | |
| 41 | + def fake_chat_sync(prompt: str): | |
| 42 | + calls["count"] += 1 | |
| 43 | + raise _http_error(400, '{"code":"InvalidParameter"}') | |
| 44 | + | |
| 45 | + monkeypatch.setattr(client, "_chat_sync", fake_chat_sync) | |
| 46 | + | |
| 47 | + with pytest.raises(requests.exceptions.HTTPError): | |
| 48 | + client._chat("prompt", phase="relevance_classify") | |
| 49 | + | |
| 50 | + assert calls["count"] == 1 | |
| 51 | + | |
| 52 | + | |
| 53 | +def test_dashscope_chat_retries_transient_500(monkeypatch): | |
| 54 | + client = DashScopeLabelClient(model="qwen3.5-plus", base_url="https://example.com", api_key="test") | |
| 55 | + client.retry_attempts = 4 | |
| 56 | + client.retry_delay_sec = 0 | |
| 57 | + calls = {"count": 0} | |
| 58 | + | |
| 59 | + def fake_chat_sync(prompt: str): | |
| 60 | + calls["count"] += 1 | |
| 61 | + if calls["count"] < 3: | |
| 62 | + raise _http_error(500, '{"code":"InternalError"}') | |
| 63 | + return "Exact Match", '{"choices":[{"message":{"content":"Exact Match"}}]}' | |
| 64 | + | |
| 65 | + monkeypatch.setattr(client, "_chat_sync", fake_chat_sync) | |
| 66 | + | |
| 67 | + content, raw = client._chat("prompt", phase="relevance_classify") | |
| 68 | + | |
| 69 | + assert content == "Exact Match" | |
| 70 | + assert "Exact Match" in raw | |
| 71 | + assert calls["count"] == 3 | ... | ... |
tests/test_query_parser_mixed_language.py
| ... | ... | @@ -136,20 +136,24 @@ def test_parse_reuses_tokenization_across_tail_stages(monkeypatch): |
| 136 | 136 | assert tokenize_calls == [] |
| 137 | 137 | |
| 138 | 138 | |
| 139 | -def test_parse_fast_path_detects_ascii_query_as_english_without_lingua(monkeypatch): | |
| 139 | +def test_parse_ascii_latin_query_uses_language_detector(monkeypatch): | |
| 140 | 140 | parser = QueryParser(_build_config(), translator=_DummyTranslator(), tokenizer=_tokenizer) |
| 141 | + detector_calls = [] | |
| 142 | + | |
| 141 | 143 | monkeypatch.setattr( |
| 142 | 144 | parser.language_detector, |
| 143 | 145 | "detect", |
| 144 | - lambda text: (_ for _ in ()).throw(AssertionError("Lingua path should not be used")), | |
| 146 | + lambda text: detector_calls.append(text) or "es", | |
| 145 | 147 | ) |
| 146 | 148 | |
| 147 | 149 | result = parser.parse( |
| 148 | - "street t-shirt women", | |
| 150 | + "falda negra oficina", | |
| 149 | 151 | tenant_id="0", |
| 150 | 152 | generate_vector=False, |
| 151 | 153 | target_languages=["en", "zh"], |
| 152 | 154 | ) |
| 153 | 155 | |
| 154 | - assert result.detected_language == "en" | |
| 155 | - assert result.query_tokens == ["street", "t-shirt", "women"] | |
| 156 | + assert detector_calls == ["falda negra oficina"] | |
| 157 | + assert result.detected_language == "es" | |
| 158 | + assert result.translations == {"en": "falda negra oficina-en", "zh": "falda negra oficina-zh"} | |
| 159 | + assert result.query_tokens == ["falda", "negra", "oficina"] | ... | ... |