From 822ab0fdd6778ce78b5d727e6753f953c2c59999 Mon Sep 17 00:00:00 2001
From: tangwang <tangwang@essa.top>
Date: Fri, 17 Apr 2026 20:36:10 +0800
Subject: [PATCH] 1. product_enrich: prompt optimization for qanchor generation 2. eval framework: record request_id and response body when LLM calls fail — clients.py: added request ID extraction and error description functions

---
 indexer/product_enrich.py                      |  2 --
 indexer/product_enrich_prompts.py              |  7 +++----
 scripts/evaluation/eval_framework/clients.py   | 39 ++++++++++++++++++++++++++++++++++++++-
 scripts/evaluation/eval_framework/framework.py | 14 ++++++++++++--
 4 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/indexer/product_enrich.py b/indexer/product_enrich.py
index 7dca838..92e42e7 100644
--- a/indexer/product_enrich.py
+++ b/indexer/product_enrich.py
@@ -1003,10 +1003,8 @@ def call_llm(
                     usage.get("prompt_tokens"),
                     usage.get("total_tokens"),
                 )
-                logger.info("\nGenerated Content:\n%s", generated_content)
                 logger.info("\nMerged Markdown:\n%s", full_markdown)
 
-                verbose_logger.info(f"\nGenerated Content:\n{generated_content}")
                 verbose_logger.info(f"\nMerged Markdown:\n{full_markdown}")
 
                 return full_markdown, json.dumps(result, ensure_ascii=False)
diff --git a/indexer/product_enrich_prompts.py b/indexer/product_enrich_prompts.py
index 6c2d05a..c53ab5b 100644
--- a/indexer/product_enrich_prompts.py
+++ b/indexer/product_enrich_prompts.py
@@ -3,9 +3,7 @@
 from typing import Any, Dict, Tuple
 
 SYSTEM_MESSAGE = (
-    "You are an e-commerce product annotator. "
-    "Continue the provided assistant Markdown table prefix. "
-    "Do not repeat or modify the prefix, and do not add explanations outside the table."
+    "You are an e-commerce product data annotation assistant. Extract and normalize structured product information from each input product text, and present the results as Markdown table in the specified language."
 )
 
 SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these columns:
@@ -19,13 +17,14 @@ SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product text and fill these 
 7. Key attributes: core product attributes and specifications. Depending on the item type, this may include fit, silhouette, length, sleeve type, neckline, waistline, closure, pattern, design details, structure, or other relevant attribute dimensions
 8. Material description: material, fabric, texture, or construction description
 9. Functional features: practical or performance-related functions such as stretch, breathability, warmth, support, storage, protection, or ease of wear
-10. Anchor text: a search-oriented keyword string covering product type, category intent, attributes, design cues, usage scenarios, and strong shopping phrases
+10. Anchor text: multiple search-oriented qanchor phrases that capture key user intent and demand points for the product.
 
 Rules:
 - Keep the input order and row count exactly the same.
 - Infer only from the provided input product text; if uncertain, prefer concise and broadly correct ecommerce wording.
 - Keep category paths concise and use ">" as the separator.
 - For columns with multiple values, the localized output requirement will define the delimiter.
+- For each product, the last column (anchor text) should consist of multiple search-oriented qanchor phrases, separated by commas. Each qanchor should be a concise, natural, and user-like search phrase. Collectively, they should comprehensively and precisely cover key aspects such as product type, category intent, core attributes, style features, and usage scenarios. Generate around 3–6 qanchors per product, favoring diverse, natural, search-like phrasing that captures distinct user intents and demand points.
 
 Input product list:
 """
diff --git a/scripts/evaluation/eval_framework/clients.py b/scripts/evaluation/eval_framework/clients.py
index 3ec4056..81331e6 100644
--- a/scripts/evaluation/eval_framework/clients.py
+++ b/scripts/evaluation/eval_framework/clients.py
@@ -82,15 +82,44 @@ def _canonicalize_judge_label(raw: str) -> str | None:
     return None
 
 
+def _extract_request_id(resp: requests.Response | None) -> str:
+    """Extract provider request id from headers / JSON (minimal common fields)."""
+    if resp is None:
+        return ""
+    for k in ("x-request-id", "x-dashscope-request-id"):
+        v = resp.headers.get(k) or resp.headers.get(k.upper())
+        if v:
+            return str(v).strip()
+    try:
+        data = resp.json() or {}
+    except Exception:
+        return ""
+    if isinstance(data, dict):
+        # OpenAI-like success has `id`; some errors include `request_id`.
+        for k in ("request_id", "requestId", "id"):
+            v = data.get(k)
+            if v:
+                return str(v).strip()
+        err = data.get("error")
+        if isinstance(err, dict):
+            for k in ("request_id", "requestId", "id"):
+                v = err.get(k)
+                if v:
+                    return str(v).strip()
+    return ""
+
+
 def _describe_request_exception(exc: requests.exceptions.RequestException) -> str:
     if isinstance(exc, requests.exceptions.HTTPError):
         response = getattr(exc, "response", None)
         if response is None:
             return str(exc)
+        rid = _extract_request_id(response)
         body = str(getattr(response, "text", "") or "").strip()
         if len(body) > 600:
             body = body[:600].rstrip() + "...[truncated]"
-        return f"status={response.status_code} body={body or '<empty>'}"
+        rid_part = f" request_id={rid}" if rid else ""
+        return f"status={response.status_code}{rid_part} body={body or '<empty>'}"
     return str(exc)
 
 
@@ -362,6 +391,14 @@ class DashScopeLabelClient:
                 is_request_error = isinstance(exc, requests.exceptions.RequestException)
                 is_transient = is_request_error and self._is_transient_request_error(exc)
                 if not is_transient or attempt >= self.retry_attempts:
+                    if is_request_error:
+                        _client_log.error(
+                            "DashScope request failed: phase=%s model=%s use_batch=%s error=%s",
+                            phase,
+                            self.model,
+                            self.use_batch,
+                            _describe_request_exception(exc),
+                        )
                     raise
                 _client_log.warning(
                     "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s",
diff --git a/scripts/evaluation/eval_framework/framework.py b/scripts/evaluation/eval_framework/framework.py
index 359efa4..9ca44b6 100644
--- a/scripts/evaluation/eval_framework/framework.py
+++ b/scripts/evaluation/eval_framework/framework.py
@@ -408,11 +408,21 @@ class SearchEvaluationFramework:
                 query, docs, query_intent_block=intent_block
             )
             return [(labels, raw_response, docs)]
-        except Exception:
+        except Exception as exc:
+            rid = ""
+            if isinstance(exc, requests.exceptions.RequestException):
+                resp = getattr(exc, "response", None)
+                if resp is not None:
+                    try:
+                        rid = resp.headers.get("x-request-id") or resp.headers.get("X-Request-Id") or ""
+                    except Exception:
+                        rid = ""
+            rid_part = f" llm_request_id={rid}" if rid else ""
             _log.exception(
-                "[eval-rebuild] classify failed query=%r docs=%s; %s",
+                "[eval-rebuild] classify failed query=%r docs=%s;%s %s",
                 query,
                 len(docs),
+                rid_part,
                 "splitting batch" if len(docs) > 1 else "single-doc failure",
             )
             if len(docs) == 1:
--
libgit2 0.21.2