1. product_enrich: prompt optimization for qanchor generation

2. eval framework: record request_id and response body when LLM calls fail — clients.py: added request ID extraction and error description functions

1. product_enrich: prompt optimization for qanchor generation
2. eval framework: record request_id and response body when LLM calls fail — clients.py: added request ID extraction and error description functions
tangwang
1 parent 2059d959
Showing 4 changed files with 53 additions and 9 deletions Show diff stats
indexer/product_enrich.py
indexer/product_enrich_prompts.py
scripts/evaluation/eval_framework/clients.py
scripts/evaluation/eval_framework/framework.py
@@ -1003,10 +1003,8 @@ def call_llm(
                     usage.get("prompt_tokens"),
                     usage.get("total_tokens"),
                 )
-                logger.info("\nGenerated Content:\n%s", generated_content)
                 logger.info("\nMerged Markdown:\n%s", full_markdown)
-                verbose_logger.info(f"\nGenerated Content:\n{generated_content}")
                 verbose_logger.info(f"\nMerged Markdown:\n{full_markdown}")
                 return full_markdown, json.dumps(result, ensure_ascii=False)
@@ -3,9 +3,7 @@
 from typing import Any, Dict, Tuple
 SYSTEM_MESSAGE = (
-    "You are an e-commerce product annotator. "
-    "Continue the provided assistant Markdown table prefix. "
-    "Do not repeat or modify the prefix, and do not add explanations outside the table."
+    "You are an e-commerce product data annotation assistant. Extract and normalize structured product information from each input product text, and present the results as Markdown table in the specified language."
 )
 SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these columns:
@@ -19,13 +17,14 @@ SHARED_ANALYSIS_INSTRUCTION = &quot;&quot;&quot;Analyze each input product text and fill these 
 7. Key attributes: core product attributes and specifications. Depending on the item type, this may include fit, silhouette, length, sleeve type, neckline, waistline, closure, pattern, design details, structure, or other relevant attribute dimensions
 8. Material description: material, fabric, texture, or construction description
 9. Functional features: practical or performance-related functions such as stretch, breathability, warmth, support, storage, protection, or ease of wear
-10. Anchor text: a search-oriented keyword string covering product type, category intent, attributes, design cues, usage scenarios, and strong shopping phrases
+10. Anchor text: multiple search-oriented qanchor phrases that capture key user intent and demand points for the product.
 Rules:
 - Keep the input order and row count exactly the same.
 - Infer only from the provided input product text; if uncertain, prefer concise and broadly correct ecommerce wording.
 - Keep category paths concise and use ">" as the separator.
 - For columns with multiple values, the localized output requirement will define the delimiter.
+- For each product, the last column (anchor text) should consist of multiple search-oriented qanchor phrases, separated by commas. Each qanchor should be a concise, natural, and user-like search phrase. Collectively, they should comprehensively and precisely cover key aspects such as product type, category intent, core attributes, style features, and usage scenarios. Generate around 3–6 qanchors per product, favoring diverse, natural, search-like phrasing that captures distinct user intents and demand points.
 Input product list:
 """
@@ -82,15 +82,44 @@ def _canonicalize_judge_label(raw: str) -&gt; str | None:
     return None
+def _extract_request_id(resp: requests.Response | None) -> str:
+    """Extract provider request id from headers / JSON (minimal common fields)."""
+    if resp is None:
+        return ""
+    for k in ("x-request-id", "x-dashscope-request-id"):
+        v = resp.headers.get(k) or resp.headers.get(k.upper())
+        if v:
+            return str(v).strip()
+    try:
+        data = resp.json() or {}
+    except Exception:
+        return ""
+    if isinstance(data, dict):
+        # OpenAI-like success has `id`; some errors include `request_id`.
+        for k in ("request_id", "requestId", "id"):
+            v = data.get(k)
+            if v:
+                return str(v).strip()
+        err = data.get("error")
+        if isinstance(err, dict):
+            for k in ("request_id", "requestId", "id"):
+                v = err.get(k)
+                if v:
+                    return str(v).strip()
+    return ""
+
+
 def _describe_request_exception(exc: requests.exceptions.RequestException) -> str:
     if isinstance(exc, requests.exceptions.HTTPError):
         response = getattr(exc, "response", None)
         if response is None:
             return str(exc)
+        rid = _extract_request_id(response)
         body = str(getattr(response, "text", "") or "").strip()
         if len(body) > 600:
             body = body[:600].rstrip() + "...[truncated]"
-        return f"status={response.status_code} body={body or '<empty>'}"
+        rid_part = f" request_id={rid}" if rid else ""
+        return f"status={response.status_code}{rid_part} body={body or '<empty>'}"
     return str(exc)
@@ -362,6 +391,14 @@ class DashScopeLabelClient:
                 is_request_error = isinstance(exc, requests.exceptions.RequestException)
                 is_transient = is_request_error and self._is_transient_request_error(exc)
                 if not is_transient or attempt >= self.retry_attempts:
+                    if is_request_error:
+                        _client_log.error(
+                            "DashScope request failed: phase=%s model=%s use_batch=%s error=%s",
+                            phase,
+                            self.model,
+                            self.use_batch,
+                            _describe_request_exception(exc),
+                        )
                     raise
                 _client_log.warning(
                     "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s",
@@ -408,11 +408,21 @@ class SearchEvaluationFramework:
                 query, docs, query_intent_block=intent_block
             )
             return [(labels, raw_response, docs)]
-        except Exception:
+        except Exception as exc:
+            rid = ""
+            if isinstance(exc, requests.exceptions.RequestException):
+                resp = getattr(exc, "response", None)
+                if resp is not None:
+                    try:
+                        rid = resp.headers.get("x-request-id") or resp.headers.get("X-Request-Id") or ""
+                    except Exception:
+                        rid = ""
+            rid_part = f" llm_request_id={rid}" if rid else ""
             _log.exception(
-                "[eval-rebuild] classify failed query=%r docs=%s; %s",
+                "[eval-rebuild] classify failed query=%r docs=%s;%s %s",
                 query,
                 len(docs),
+                rid_part,
                 "splitting batch" if len(docs) > 1 else "single-doc failure",
             )
             if len(docs) == 1: