Commit 822ab0fdd6778ce78b5d727e6753f953c2c59999

Authored by tangwang
1 parent 2059d959

1. product_enrich: prompt optimization for qanchor generation

2. eval framework: record request_id and response body when LLM calls fail — clients.py: added request ID extraction and error description functions
indexer/product_enrich.py
@@ -1003,10 +1003,8 @@ def call_llm( @@ -1003,10 +1003,8 @@ def call_llm(
1003 usage.get("prompt_tokens"), 1003 usage.get("prompt_tokens"),
1004 usage.get("total_tokens"), 1004 usage.get("total_tokens"),
1005 ) 1005 )
1006 - logger.info("\nGenerated Content:\n%s", generated_content)  
1007 logger.info("\nMerged Markdown:\n%s", full_markdown) 1006 logger.info("\nMerged Markdown:\n%s", full_markdown)
1008 1007
1009 - verbose_logger.info(f"\nGenerated Content:\n{generated_content}")  
1010 verbose_logger.info(f"\nMerged Markdown:\n{full_markdown}") 1008 verbose_logger.info(f"\nMerged Markdown:\n{full_markdown}")
1011 1009
1012 return full_markdown, json.dumps(result, ensure_ascii=False) 1010 return full_markdown, json.dumps(result, ensure_ascii=False)
indexer/product_enrich_prompts.py
@@ -3,9 +3,7 @@ @@ -3,9 +3,7 @@
3 from typing import Any, Dict, Tuple 3 from typing import Any, Dict, Tuple
4 4
5 SYSTEM_MESSAGE = ( 5 SYSTEM_MESSAGE = (
6 - "You are an e-commerce product annotator. "  
7 - "Continue the provided assistant Markdown table prefix. "  
8 - "Do not repeat or modify the prefix, and do not add explanations outside the table." 6 + "You are an e-commerce product data annotation assistant. Extract and normalize structured product information from each input product text, and present the results as Markdown table in the specified language."
9 ) 7 )
10 8
11 SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these columns: 9 SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these columns:
@@ -19,13 +17,14 @@ SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these @@ -19,13 +17,14 @@ SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these
19 7. Key attributes: core product attributes and specifications. Depending on the item type, this may include fit, silhouette, length, sleeve type, neckline, waistline, closure, pattern, design details, structure, or other relevant attribute dimensions 17 7. Key attributes: core product attributes and specifications. Depending on the item type, this may include fit, silhouette, length, sleeve type, neckline, waistline, closure, pattern, design details, structure, or other relevant attribute dimensions
20 8. Material description: material, fabric, texture, or construction description 18 8. Material description: material, fabric, texture, or construction description
21 9. Functional features: practical or performance-related functions such as stretch, breathability, warmth, support, storage, protection, or ease of wear 19 9. Functional features: practical or performance-related functions such as stretch, breathability, warmth, support, storage, protection, or ease of wear
22 -10. Anchor text: a search-oriented keyword string covering product type, category intent, attributes, design cues, usage scenarios, and strong shopping phrases 20 +10. Anchor text: multiple search-oriented qanchor phrases that capture key user intent and demand points for the product.
23 21
24 Rules: 22 Rules:
25 - Keep the input order and row count exactly the same. 23 - Keep the input order and row count exactly the same.
26 - Infer only from the provided input product text; if uncertain, prefer concise and broadly correct ecommerce wording. 24 - Infer only from the provided input product text; if uncertain, prefer concise and broadly correct ecommerce wording.
27 - Keep category paths concise and use ">" as the separator. 25 - Keep category paths concise and use ">" as the separator.
28 - For columns with multiple values, the localized output requirement will define the delimiter. 26 - For columns with multiple values, the localized output requirement will define the delimiter.
  27 +- For each product, the last column (anchor text) should consist of multiple search-oriented qanchor phrases, separated by commas. Each qanchor should be a concise, natural, and user-like search phrase. Collectively, they should comprehensively and precisely cover key aspects such as product type, category intent, core attributes, style features, and usage scenarios. Generate around 3–6 qanchors per product, favoring diverse, natural, search-like phrasing that captures distinct user intents and demand points.
29 28
30 Input product list: 29 Input product list:
31 """ 30 """
scripts/evaluation/eval_framework/clients.py
@@ -82,15 +82,44 @@ def _canonicalize_judge_label(raw: str) -> str | None: @@ -82,15 +82,44 @@ def _canonicalize_judge_label(raw: str) -> str | None:
82 return None 82 return None
83 83
84 84
  85 +def _extract_request_id(resp: requests.Response | None) -> str:
  86 + """Extract provider request id from headers / JSON (minimal common fields)."""
  87 + if resp is None:
  88 + return ""
  89 + for k in ("x-request-id", "x-dashscope-request-id"):
  90 + v = resp.headers.get(k) or resp.headers.get(k.upper())
  91 + if v:
  92 + return str(v).strip()
  93 + try:
  94 + data = resp.json() or {}
  95 + except Exception:
  96 + return ""
  97 + if isinstance(data, dict):
  98 + # OpenAI-like success has `id`; some errors include `request_id`.
  99 + for k in ("request_id", "requestId", "id"):
  100 + v = data.get(k)
  101 + if v:
  102 + return str(v).strip()
  103 + err = data.get("error")
  104 + if isinstance(err, dict):
  105 + for k in ("request_id", "requestId", "id"):
  106 + v = err.get(k)
  107 + if v:
  108 + return str(v).strip()
  109 + return ""
  110 +
  111 +
85 def _describe_request_exception(exc: requests.exceptions.RequestException) -> str: 112 def _describe_request_exception(exc: requests.exceptions.RequestException) -> str:
86 if isinstance(exc, requests.exceptions.HTTPError): 113 if isinstance(exc, requests.exceptions.HTTPError):
87 response = getattr(exc, "response", None) 114 response = getattr(exc, "response", None)
88 if response is None: 115 if response is None:
89 return str(exc) 116 return str(exc)
  117 + rid = _extract_request_id(response)
90 body = str(getattr(response, "text", "") or "").strip() 118 body = str(getattr(response, "text", "") or "").strip()
91 if len(body) > 600: 119 if len(body) > 600:
92 body = body[:600].rstrip() + "...[truncated]" 120 body = body[:600].rstrip() + "...[truncated]"
93 - return f"status={response.status_code} body={body or '<empty>'}" 121 + rid_part = f" request_id={rid}" if rid else ""
  122 + return f"status={response.status_code}{rid_part} body={body or '<empty>'}"
94 return str(exc) 123 return str(exc)
95 124
96 125
@@ -362,6 +391,14 @@ class DashScopeLabelClient: @@ -362,6 +391,14 @@ class DashScopeLabelClient:
362 is_request_error = isinstance(exc, requests.exceptions.RequestException) 391 is_request_error = isinstance(exc, requests.exceptions.RequestException)
363 is_transient = is_request_error and self._is_transient_request_error(exc) 392 is_transient = is_request_error and self._is_transient_request_error(exc)
364 if not is_transient or attempt >= self.retry_attempts: 393 if not is_transient or attempt >= self.retry_attempts:
  394 + if is_request_error:
  395 + _client_log.error(
  396 + "DashScope request failed: phase=%s model=%s use_batch=%s error=%s",
  397 + phase,
  398 + self.model,
  399 + self.use_batch,
  400 + _describe_request_exception(exc),
  401 + )
365 raise 402 raise
366 _client_log.warning( 403 _client_log.warning(
367 "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s", 404 "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s",
scripts/evaluation/eval_framework/framework.py
@@ -408,11 +408,21 @@ class SearchEvaluationFramework: @@ -408,11 +408,21 @@ class SearchEvaluationFramework:
408 query, docs, query_intent_block=intent_block 408 query, docs, query_intent_block=intent_block
409 ) 409 )
410 return [(labels, raw_response, docs)] 410 return [(labels, raw_response, docs)]
411 - except Exception: 411 + except Exception as exc:
  412 + rid = ""
  413 + if isinstance(exc, requests.exceptions.RequestException):
  414 + resp = getattr(exc, "response", None)
  415 + if resp is not None:
  416 + try:
  417 + rid = resp.headers.get("x-request-id") or resp.headers.get("X-Request-Id") or ""
  418 + except Exception:
  419 + rid = ""
  420 + rid_part = f" llm_request_id={rid}" if rid else ""
412 _log.exception( 421 _log.exception(
413 - "[eval-rebuild] classify failed query=%r docs=%s; %s", 422 + "[eval-rebuild] classify failed query=%r docs=%s;%s %s",
414 query, 423 query,
415 len(docs), 424 len(docs),
  425 + rid_part,
416 "splitting batch" if len(docs) > 1 else "single-doc failure", 426 "splitting batch" if len(docs) > 1 else "single-doc failure",
417 ) 427 )
418 if len(docs) == 1: 428 if len(docs) == 1: