Commit 822ab0fdd6778ce78b5d727e6753f953c2c59999

Authored by tangwang
1 parent 2059d959

1. product_enrich: prompt optimization for qanchor generation

2. eval framework: record request_id and response body when LLM calls fail — clients.py: added request ID extraction and error description functions
indexer/product_enrich.py
... ... @@ -1003,10 +1003,8 @@ def call_llm(
1003 1003 usage.get("prompt_tokens"),
1004 1004 usage.get("total_tokens"),
1005 1005 )
1006   - logger.info("\nGenerated Content:\n%s", generated_content)
1007 1006 logger.info("\nMerged Markdown:\n%s", full_markdown)
1008 1007  
1009   - verbose_logger.info(f"\nGenerated Content:\n{generated_content}")
1010 1008 verbose_logger.info(f"\nMerged Markdown:\n{full_markdown}")
1011 1009  
1012 1010 return full_markdown, json.dumps(result, ensure_ascii=False)
... ...
indexer/product_enrich_prompts.py
... ... @@ -3,9 +3,7 @@
3 3 from typing import Any, Dict, Tuple
4 4  
5 5 SYSTEM_MESSAGE = (
6   - "You are an e-commerce product annotator. "
7   - "Continue the provided assistant Markdown table prefix. "
8   - "Do not repeat or modify the prefix, and do not add explanations outside the table."
  6 + "You are an e-commerce product data annotation assistant. Extract and normalize structured product information from each input product text, and present the results as Markdown table in the specified language."
9 7 )
10 8  
11 9 SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these columns:
... ... @@ -19,13 +17,14 @@ SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these
19 17 7. Key attributes: core product attributes and specifications. Depending on the item type, this may include fit, silhouette, length, sleeve type, neckline, waistline, closure, pattern, design details, structure, or other relevant attribute dimensions
20 18 8. Material description: material, fabric, texture, or construction description
21 19 9. Functional features: practical or performance-related functions such as stretch, breathability, warmth, support, storage, protection, or ease of wear
22   -10. Anchor text: a search-oriented keyword string covering product type, category intent, attributes, design cues, usage scenarios, and strong shopping phrases
  20 +10. Anchor text: multiple search-oriented qanchor phrases that capture key user intent and demand points for the product.
23 21  
24 22 Rules:
25 23 - Keep the input order and row count exactly the same.
26 24 - Infer only from the provided input product text; if uncertain, prefer concise and broadly correct ecommerce wording.
27 25 - Keep category paths concise and use ">" as the separator.
28 26 - For columns with multiple values, the localized output requirement will define the delimiter.
  27 +- For each product, the last column (anchor text) should consist of multiple search-oriented qanchor phrases, separated by commas. Each qanchor should be a concise, natural, and user-like search phrase. Collectively, they should comprehensively and precisely cover key aspects such as product type, category intent, core attributes, style features, and usage scenarios. Generate around 3–6 qanchors per product, favoring diverse, natural, search-like phrasing that captures distinct user intents and demand points.
29 28  
30 29 Input product list:
31 30 """
... ...
scripts/evaluation/eval_framework/clients.py
... ... @@ -82,15 +82,44 @@ def _canonicalize_judge_label(raw: str) -> str | None:
82 82 return None
83 83  
84 84  
  85 +def _extract_request_id(resp: requests.Response | None) -> str:
  86 + """Extract provider request id from headers / JSON (minimal common fields)."""
  87 + if resp is None:
  88 + return ""
  89 + for k in ("x-request-id", "x-dashscope-request-id"):
  90 + v = resp.headers.get(k) or resp.headers.get(k.upper())
  91 + if v:
  92 + return str(v).strip()
  93 + try:
  94 + data = resp.json() or {}
  95 + except Exception:
  96 + return ""
  97 + if isinstance(data, dict):
  98 + # OpenAI-like success has `id`; some errors include `request_id`.
  99 + for k in ("request_id", "requestId", "id"):
  100 + v = data.get(k)
  101 + if v:
  102 + return str(v).strip()
  103 + err = data.get("error")
  104 + if isinstance(err, dict):
  105 + for k in ("request_id", "requestId", "id"):
  106 + v = err.get(k)
  107 + if v:
  108 + return str(v).strip()
  109 + return ""
  110 +
  111 +
85 112 def _describe_request_exception(exc: requests.exceptions.RequestException) -> str:
86 113 if isinstance(exc, requests.exceptions.HTTPError):
87 114 response = getattr(exc, "response", None)
88 115 if response is None:
89 116 return str(exc)
  117 + rid = _extract_request_id(response)
90 118 body = str(getattr(response, "text", "") or "").strip()
91 119 if len(body) > 600:
92 120 body = body[:600].rstrip() + "...[truncated]"
93   - return f"status={response.status_code} body={body or '<empty>'}"
  121 + rid_part = f" request_id={rid}" if rid else ""
  122 + return f"status={response.status_code}{rid_part} body={body or '<empty>'}"
94 123 return str(exc)
95 124  
96 125  
... ... @@ -362,6 +391,14 @@ class DashScopeLabelClient:
362 391 is_request_error = isinstance(exc, requests.exceptions.RequestException)
363 392 is_transient = is_request_error and self._is_transient_request_error(exc)
364 393 if not is_transient or attempt >= self.retry_attempts:
  394 + if is_request_error:
  395 + _client_log.error(
  396 + "DashScope request failed: phase=%s model=%s use_batch=%s error=%s",
  397 + phase,
  398 + self.model,
  399 + self.use_batch,
  400 + _describe_request_exception(exc),
  401 + )
365 402 raise
366 403 _client_log.warning(
367 404 "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s",
... ...
scripts/evaluation/eval_framework/framework.py
... ... @@ -408,11 +408,21 @@ class SearchEvaluationFramework:
408 408 query, docs, query_intent_block=intent_block
409 409 )
410 410 return [(labels, raw_response, docs)]
411   - except Exception:
  411 + except Exception as exc:
  412 + rid = ""
  413 + if isinstance(exc, requests.exceptions.RequestException):
  414 + resp = getattr(exc, "response", None)
  415 + if resp is not None:
  416 + try:
  417 + rid = resp.headers.get("x-request-id") or resp.headers.get("X-Request-Id") or ""
  418 + except Exception:
  419 + rid = ""
  420 + rid_part = f" llm_request_id={rid}" if rid else ""
412 421 _log.exception(
413   - "[eval-rebuild] classify failed query=%r docs=%s; %s",
  422 + "[eval-rebuild] classify failed query=%r docs=%s;%s %s",
414 423 query,
415 424 len(docs),
  425 + rid_part,
416 426 "splitting batch" if len(docs) > 1 else "single-doc failure",
417 427 )
418 428 if len(docs) == 1:
... ...