Commit 5e3d6d3a614a63e5b56a840fa1655a1e91164ce9

Authored by tangwang
1 parent 825828c4

refactor(search): 简化质量评估、英文标签、quality_summary 由 LLM 产出

## 搜索工具与质量评估
- _assess_search_quality 仅返回 (labels, quality_summary):去掉 verdict(优质/一般/较差)及依赖逻辑;prompt 要求 LLM 输出 labels + quality_summary(1–2 句:结果主要包含什么、是否基本满足意图、匹配度)。
- 工具返回格式统一为:【搜索完成】query='...' + 结果引用 [SEARCH_REF:ref_id] + 搜索结果质量情况(评估总条数、Highly Relevant / Partially Relevant 条数)+ results list(top10 标题)。
- 精简 prompt 与日志:评估输入仅保留序号+标题;删除 verdict_hint、逐条 SEARCH_RESULT_ITEM/SEARCH_RESULT_PRODUCT 日志,保留单行注册日志。

## 三级标签改为英文
- 完美匹配 → Highly Relevant;部分匹配 → Partially Relevant;不相关 → Not Relevant。
- 全量替换:search_tools(prompt、valid、统计与过滤)、search_registry(ProductItem.match_label 默认及注释、SearchResult 注释)、app.py(卡片 label_style、结果块头部与筛选逻辑)。

## Registry 与 UI
- SearchResult 移除 quality_verdict 字段;quality_summary 由 _assess_search_quality 的 LLM 返回写入。
- 结果块头部不再展示 verdict 图标/文案,改为展示 query + Highly/Partially Relevant 件数 + quality_summary(若有)。

## Agent
- 系统提示词调整:角色与原则、价值提供与信息收集、search_products 与 [SEARCH_REF:xxx] 使用说明。

Co-authored-by: Cursor <cursoragent@cursor.com>
@@ -321,7 +321,7 @@ def display_product_card_from_item(product: ProductItem) -&gt; None: @@ -321,7 +321,7 @@ def display_product_card_from_item(product: ProductItem) -&gt; None:
321 if product.price is not None: 321 if product.price is not None:
322 st.caption(f"¥{product.price:.2f}") 322 st.caption(f"¥{product.price:.2f}")
323 323
324 - label_style = "⭐" if product.match_label == "完美匹配" else "✦" 324 + label_style = "⭐" if product.match_label == "Highly Relevant" else "✦"
325 st.caption(f"{label_style} {product.match_label}") 325 st.caption(f"{label_style} {product.match_label}")
326 326
327 327
@@ -330,25 +330,25 @@ def render_search_result_block(result: SearchResult) -&gt; None: @@ -330,25 +330,25 @@ def render_search_result_block(result: SearchResult) -&gt; None:
330 Render a full search result block in place of a [SEARCH_REF:xxx] token. 330 Render a full search result block in place of a [SEARCH_REF:xxx] token.
331 331
332 Shows: 332 Shows:
333 - - A styled header with query text + quality verdict + match counts  
334 - - A grid of product cards (perfect matches first, then partial; max 6) 333 + - A styled header with query + match counts + quality_summary (if any)
  334 + - A grid of product cards (Highly Relevant first, then Partially Relevant; max 6)
335 """ 335 """
336 - verdict_icon = {"优质": "✅", "一般": "〰️", "较差": "⚠️"}.get(result.quality_verdict, "🔍") 336 + summary_line = f' &nbsp;·&nbsp;{result.quality_summary}' if result.quality_summary else ''
337 header_html = ( 337 header_html = (
338 f'<div style="border:1px solid #e0e0e0;border-radius:8px;padding:10px 14px;' 338 f'<div style="border:1px solid #e0e0e0;border-radius:8px;padding:10px 14px;'
339 f'margin:8px 0 4px 0;background:#fafafa;">' 339 f'margin:8px 0 4px 0;background:#fafafa;">'
340 f'<span style="font-size:0.8rem;color:#555;">' 340 f'<span style="font-size:0.8rem;color:#555;">'
341 f'🔍 <b>{result.query}</b>' 341 f'🔍 <b>{result.query}</b>'
342 - f'&nbsp;&nbsp;{verdict_icon} {result.quality_verdict}'  
343 - f'&nbsp;·&nbsp;完美匹配&nbsp;{result.perfect_count}&nbsp;件'  
344 - f'&nbsp;·&nbsp;相关&nbsp;{result.partial_count}&nbsp;件' 342 + f'&nbsp;·&nbsp;Highly Relevant&nbsp;{result.perfect_count}&nbsp;件'
  343 + f'&nbsp;·&nbsp;Partially Relevant&nbsp;{result.partial_count}&nbsp;件'
  344 + f'{summary_line}'
345 f'</span></div>' 345 f'</span></div>'
346 ) 346 )
347 st.markdown(header_html, unsafe_allow_html=True) 347 st.markdown(header_html, unsafe_allow_html=True)
348 348
349 # Perfect matches first, fall back to partials if none 349 # Perfect matches first, fall back to partials if none
350 - perfect = [p for p in result.products if p.match_label == "完美匹配"]  
351 - partial = [p for p in result.products if p.match_label == "部分匹配"] 350 + perfect = [p for p in result.products if p.match_label == "Highly Relevant"]
  351 + partial = [p for p in result.products if p.match_label == "Partially Relevant"]
352 to_show = (perfect + partial)[:6] if perfect else partial[:6] 352 to_show = (perfect + partial)[:6] if perfect else partial[:6]
353 353
354 if not to_show: 354 if not to_show:
app/agents/shopping_agent.py
@@ -33,26 +33,21 @@ logger = logging.getLogger(__name__) @@ -33,26 +33,21 @@ logger = logging.getLogger(__name__)
33 # 1. Guides multi-query search planning with explicit evaluate-and-decide loop 33 # 1. Guides multi-query search planning with explicit evaluate-and-decide loop
34 # 2. Forbids re-listing product details in the final response 34 # 2. Forbids re-listing product details in the final response
35 # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism 35 # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism
36 -SYSTEM_PROMPT = """角色定义  
37 -你是一名专业的服装电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。  
38 -  
39 -一些原则:  
40 -1. 你是一个真人导购,是一个贴心、专业的销售,保持灵活,根据上下文,基于常识灵活的切换策略,在合适的上下文询问合适的问题、给出有价值的方案和搜索结果的呈现。  
41 -2. 商品搜索结果推荐与信息收集:  
42 - 1. 根据上下文、用户诉求,灵活的切换侧重点,何时需要进行搜索、何时要引导客户完善需求,你需要站在用户角度进行思考。比如已经有较为清晰的意图,则以搜索、方案推荐为主,有必要的时候,思考该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。如果存在重大的需求方向缺口,主动通过1-2个关键问题进行引导,并提供初步方向。  
43 - 2. 适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的帮助用户更清晰的呈现需求、提高商品发现的效率,形成“提供-反馈”的良性循环。  
44 - 3. 对于复杂需求时,要能基于上下文,将导购任务进行合理拆解。  
45 -3. 引导或者收集需求时,需要站在用户立场,比如询问用户期待的效果或感觉、使用的场合、偏好的风格等用户立场需,而不是询问具体的款式或参数,你需要将用户立场的需求理解/翻译/转化为具体的搜索计划,最后筛选产品、结合需求+结果特性组织推荐理由、呈现方案。  
46 -4. 如何使用search_products:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。每次调用 search_products 后,工具会返回以下内容,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。可以进行多轮搜索,但是要适时的总结和反馈信息避免用户等待过长时间:  
47 - - 各层级数量:完美匹配 / 部分匹配 / 不相关 的条数  
48 - - 整体质量判断:优质 / 一般 / 较差  
49 - - 简短质量说明  
50 - - 结果引用标识:[SEARCH_REF:xxx]  
51 -5. 撰写最终回复的时候,使用 [SEARCH_REF:xxx] 内联引用  
52 - 1. 用自然流畅的语言组织回复,将 [SEARCH_REF:xxx] 嵌入叙述中  
53 - 2. 系统会自动在 [SEARCH_REF:xxx] 位置渲染对应的商品卡片列表  
54 - 3. 禁止在回复文本中列出商品名称、ID、价格、分类、规格等字段  
55 - 4. 禁止用编号列表逐条复述搜索结果中的商品 36 +SYSTEM_PROMPT = """ 角色定义
  37 + 你是我们店铺的一名专业的电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。
  38 + 作为我们店铺的一名专业的销售,除了本店铺的商品的推荐,你可以给用户提供有帮助的信息,但是不要虚构商品、提供本商店搜索结果以外的商品。
  39 +
  40 + 一些原则:
  41 + 1. 价值提供与信息收集的原则:
  42 + 1. 优先价值提供:适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的澄清需求、提高商品发现的效率,形成“提供-反馈”的良性循环。
  43 + 2. 缺口大(比如品类或者使用人群都不能确定)→ 给出方案推荐 + 1-2个关键问题让用户选择;缺口小→直接检索+方案呈现,根据情况,可以考虑该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。
  44 + 3. 选项驱动式澄清:推荐几个清晰的方向,呈现方案或商品搜索结果,再做澄清
  45 + 4. 单轮对话最好只提一个问题,最多两个,禁止多问题堆叠。
  46 + 5. 站在用户立场思考:比如询问用户期待的效果或感觉、使用的场合、想解决的问题,而不是询问具体的款式、参数,你需要将用户表达的需求翻译为具体可检索的商品特征(版型、材质、设计元素、风格标签等),并据此筛选商品、组织推荐逻辑。
  47 + 2. 如何使用make_search_products_tool:
  48 + 1. 可以生成多个query进行搜索:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。
  49 + 2. 可以根据搜索结果调整搜索策略:每次调用 search_products 后,工具会返回搜索结果的相关性的判断、以及搜索结果的topN的title,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。
  50 + 3. 使用 [SEARCH_REF:xxx] 内联引用搜索结果:搜索工具会返回一个结果引用标识[SEARCH_REF:xxx],撰写最终答复的时候可以直接引用将 [SEARCH_REF:xxx] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。
56 """ 51 """
57 52
58 53
app/search_registry.py
@@ -27,8 +27,8 @@ class ProductItem: @@ -27,8 +27,8 @@ class ProductItem:
27 vendor: Optional[str] = None 27 vendor: Optional[str] = None
28 image_url: Optional[str] = None 28 image_url: Optional[str] = None
29 relevance_score: Optional[float] = None 29 relevance_score: Optional[float] = None
30 - # LLM-assigned label: "完美匹配" | "部分匹配" | "不相关"  
31 - match_label: str = "部分匹配" 30 + # LLM-assigned label: "Highly Relevant" | "Partially Relevant" | "Not Relevant"
  31 + match_label: str = "Partially Relevant"
32 tags: list = field(default_factory=list) 32 tags: list = field(default_factory=list)
33 specifications: list = field(default_factory=list) 33 specifications: list = field(default_factory=list)
34 34
@@ -40,7 +40,7 @@ class SearchResult: @@ -40,7 +40,7 @@ class SearchResult:
40 40
41 Identified by ref_id (e.g. 'sr_3f9a1b2c'). 41 Identified by ref_id (e.g. 'sr_3f9a1b2c').
42 Stores the query, LLM quality assessment, and the curated product list 42 Stores the query, LLM quality assessment, and the curated product list
43 - (only "完美匹配" and "部分匹配" items — "不相关" are discarded). 43 + (only "Highly Relevant" and "Partially Relevant" items — "Not Relevant" are discarded).
44 """ 44 """
45 45
46 ref_id: str 46 ref_id: str
@@ -55,9 +55,8 @@ class SearchResult: @@ -55,9 +55,8 @@ class SearchResult:
55 partial_count: int 55 partial_count: int
56 irrelevant_count: int 56 irrelevant_count: int
57 57
58 - # LLM overall quality verdict  
59 - quality_verdict: str # "优质" | "一般" | "较差"  
60 - quality_summary: str # one-sentence LLM explanation 58 + # LLM-written short summary: what the results mainly contain, whether they meet intent, match degree
  59 + quality_summary: str
61 60
62 # Curated product list (perfect + partial only) 61 # Curated product list (perfect + partial only)
63 products: list # list[ProductItem] 62 products: list # list[ProductItem]
app/tools/search_tools.py
1 """ 1 """
2 Search Tools for Product Discovery 2 Search Tools for Product Discovery
3 3
4 -Key design:  
5 -- search_products is created via a factory (make_search_products_tool) that  
6 - closes over (session_id, registry), so each agent session has its own tool  
7 - instance pointing to the shared registry.  
8 -- After calling the search API, an LLM quality-assessment step labels every  
9 - result as 完美匹配 / 部分匹配 / 不相关 and produces an overall verdict.  
10 -- The curated product list is stored in the registry under a unique ref_id.  
11 -- The tool returns ONLY the quality summary + [SEARCH_REF:ref_id], never the  
12 - raw product list. The LLM references the result in its final response via  
13 - the [SEARCH_REF:...] token; the UI renders the product cards from the registry. 4 +- search_products is created via make_search_products_tool(session_id, registry).
  5 +- After search API, an LLM labels each result as Highly Relevant / Partially Relevant / Not Relevant; we count and
  6 + store the curated list in the registry, return [SEARCH_REF:ref_id] + quality counts + top10 titles.
14 """ 7 """
15 8
16 import base64 9 import base64
@@ -65,94 +58,61 @@ def get_openai_client() -&gt; OpenAI: @@ -65,94 +58,61 @@ def get_openai_client() -&gt; OpenAI:
65 58
66 # ── LLM quality assessment ───────────────────────────────────────────────────── 59 # ── LLM quality assessment ─────────────────────────────────────────────────────
67 60
68 -def _assess_search_quality(  
69 - query: str,  
70 - raw_products: list,  
71 -) -> tuple[list[str], str, str]: 61 +def _assess_search_quality(query: str, raw_products: list) -> tuple[list[str], str]:
72 """ 62 """
73 - Ask the LLM to evaluate how well each search result matches the query.  
74 -  
75 - Returns:  
76 - labels – list[str], one per product: "完美匹配" | "部分匹配" | "不相关"  
77 - verdict – str: "优质" | "一般" | "较差"  
78 - summary – str: one-sentence explanation 63 + Use LLM to label each search result and write a short quality_summary.
  64 + Returns (labels, quality_summary). labels: one per product; quality_summary: 1–2 sentences.
79 """ 65 """
80 n = len(raw_products) 66 n = len(raw_products)
81 if n == 0: 67 if n == 0:
82 - return [], "较差", "搜索未返回任何商品。" 68 + return [], ""
83 69
84 - # Build a compact product list — only title/category/tags/score to save tokens  
85 - lines: list[str] = [] 70 + lines = []
86 for i, p in enumerate(raw_products, 1): 71 for i, p in enumerate(raw_products, 1):
87 title = (p.get("title") or "")[:60] 72 title = (p.get("title") or "")[:60]
88 - cat = p.get("category_path") or p.get("category_name") or ""  
89 - tags_raw = p.get("tags") or []  
90 - tags = ", ".join(str(t) for t in tags_raw[:5])  
91 - score = p.get("relevance_score") or 0  
92 - row = f"{i}. [{score:.1f}] {title} | {cat}"  
93 - if tags:  
94 - row += f" | 标签:{tags}"  
95 - lines.append(row)  
96 - 73 + lines.append(f"{i}. {title}")
97 product_text = "\n".join(lines) 74 product_text = "\n".join(lines)
98 75
99 - prompt = f"""你是商品搜索质量评估专家。请评估以下搜索结果与用户查询的匹配程度。 76 + prompt = f"""评估以下搜索结果与用户查询的匹配程度,完成两件事:
  77 +1. 为每条结果打一个等级:Highly Relevant / Partially Relevant / Not Relevant。
  78 +2. 写一段 quality_summary(1–2 句话):简要说明搜索结果主要包含哪些商品、是否基本满足搜索意图、整体匹配度如何。
100 79
101 用户查询:{query} 80 用户查询:{query}
102 81
103 -搜索结果(共 {n} 条,格式:序号. [相关性分数] 标题 | 分类 | 标签): 82 +搜索结果(共 {n} 条):
104 {product_text} 83 {product_text}
105 84
106 -评估说明:  
107 -- 完美匹配:完全符合用户查询意图,用户必然感兴趣  
108 -- 部分匹配:与查询有关联,但不完全满足意图(如品类对但风格偏差、相关配件等)  
109 -- 不相关:与查询无关,不应展示给用户  
110 -  
111 -整体 verdict 判断标准:  
112 -- 优质:完美匹配 ≥ 5 条  
113 -- 一般:完美匹配 2-4 条  
114 -- 较差:完美匹配 < 2 条  
115 -  
116 -请严格按以下 JSON 格式输出,不得有任何额外文字或代码块标记:  
117 -{{"labels": ["完美匹配", "部分匹配", "不相关", ...], "verdict": "优质", "summary": "一句话评价搜索质量"}} 85 +等级说明:Highly Relevant=完全符合查询意图;Partially Relevant=基本相关(如品类等主需求匹配但部分属性不完全符合);Not Relevant=不相关。
118 86
119 -labels 数组长度必须恰好等于 {n}。""" 87 +请严格按以下 JSON 输出,仅输出 JSON,无其他内容:
  88 +{{"labels": ["Highly Relevant", "Partially Relevant", "Not Relevant", ...], "quality_summary": "你的1-2句总结"}}
  89 +labels 数组长度必须等于 {n}。"""
120 90
121 try: 91 try:
122 client = get_openai_client() 92 client = get_openai_client()
123 resp = client.chat.completions.create( 93 resp = client.chat.completions.create(
124 model=settings.openai_model, 94 model=settings.openai_model,
125 messages=[{"role": "user", "content": prompt}], 95 messages=[{"role": "user", "content": prompt}],
126 - max_tokens=800, 96 + max_tokens=700,
127 temperature=0.1, 97 temperature=0.1,
128 ) 98 )
129 raw = resp.choices[0].message.content.strip() 99 raw = resp.choices[0].message.content.strip()
130 - # Strip markdown code fences if the model adds them  
131 if raw.startswith("```"): 100 if raw.startswith("```"):
132 raw = raw.split("```")[1] 101 raw = raw.split("```")[1]
133 if raw.startswith("json"): 102 if raw.startswith("json"):
134 raw = raw[4:] 103 raw = raw[4:]
135 raw = raw.strip() 104 raw = raw.strip()
136 -  
137 data = json.loads(raw) 105 data = json.loads(raw)
138 - labels: list[str] = data.get("labels", [])  
139 -  
140 - # Normalize and pad / trim to match n  
141 - valid = {"完美匹配", "部分匹配", "不相关"}  
142 - labels = [l if l in valid else "部分匹配" for l in labels] 106 + labels = data.get("labels", [])
  107 + valid = {"Highly Relevant", "Partially Relevant", "Not Relevant"}
  108 + labels = [l if l in valid else "Partially Relevant" for l in labels]
143 while len(labels) < n: 109 while len(labels) < n:
144 - labels.append("部分匹配")  
145 - labels = labels[:n]  
146 -  
147 - verdict: str = data.get("verdict", "一般")  
148 - if verdict not in ("优质", "一般", "较差"):  
149 - verdict = "一般"  
150 - summary: str = str(data.get("summary", ""))  
151 - return labels, verdict, summary  
152 - 110 + labels.append("Partially Relevant")
  111 + quality_summary = (data.get("quality_summary") or "").strip() or ""
  112 + return labels[:n], quality_summary
153 except Exception as e: 113 except Exception as e:
154 - logger.warning(f"Quality assessment LLM call failed: {e}; using fallback labels.")  
155 - return ["部分匹配"] * n, "一般", "质量评估步骤失败,结果仅供参考。" 114 + logger.warning(f"Quality assessment failed: {e}; using fallback.")
  115 + return ["Partially Relevant"] * n, ""
156 116
157 117
158 # ── Tool factory ─────────────────────────────────────────────────────────────── 118 # ── Tool factory ───────────────────────────────────────────────────────────────
@@ -169,22 +129,18 @@ def make_search_products_tool( @@ -169,22 +129,18 @@ def make_search_products_tool(
169 2. Runs LLM quality assessment on up to 20 results. 129 2. Runs LLM quality assessment on up to 20 results.
170 3. Stores a SearchResult in the registry. 130 3. Stores a SearchResult in the registry.
171 4. Returns a concise quality summary + [SEARCH_REF:ref_id]. 131 4. Returns a concise quality summary + [SEARCH_REF:ref_id].
172 - The product list is NEVER returned in the tool output text.  
173 """ 132 """
174 133
175 @tool 134 @tool
176 def search_products(query: str, limit: int = 20) -> str: 135 def search_products(query: str, limit: int = 20) -> str:
177 - """搜索商品库,根据自然语言描述找到匹配商品,并进行质量评估。  
178 -  
179 - 每次调用专注于单一搜索角度。复杂需求请拆分为多次调用,每次换一个 query。  
180 - 工具会自动评估结果质量(完美匹配 / 部分匹配 / 不相关),并给出整体判断。 136 + """搜索商品库并做质量评估:LLM 为每条结果打等级(Highly Relevant / Partially Relevant / Not Relevant),返回引用与 top10 标题。
181 137
182 Args: 138 Args:
183 - query: 自然语言商品描述,例如"男士休闲亚麻短裤夏季"  
184 - limit: 最多返回条数(建议 10-20,越多评估越全面) 139 + query: 自然语言商品描述
  140 + limit: 最多返回条数(1-20)
185 141
186 Returns: 142 Returns:
187 - 质量评估摘要 + [SEARCH_REF:ref_id],供最终回复引用。 143 + 【搜索完成】+ 结果引用 [SEARCH_REF:ref_id] + 质量情况(评估条数、Highly/Partially Relevant 数)+ results list(top10 标题)
188 """ 144 """
189 try: 145 try:
190 logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}") 146 logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}")
@@ -199,6 +155,9 @@ def make_search_products_tool( @@ -199,6 +155,9 @@ def make_search_products_tool(
199 "size": min(max(limit, 1), 20), 155 "size": min(max(limit, 1), 20),
200 "from": 0, 156 "from": 0,
201 "language": "zh", 157 "language": "zh",
  158 + "enable_rerank": True,
  159 + "rerank_query_template": query,
  160 + "rerank_doc_template": "{title}",
202 } 161 }
203 162
204 resp = requests.post(url, json=payload, headers=headers, timeout=60) 163 resp = requests.post(url, json=payload, headers=headers, timeout=60)
@@ -216,40 +175,32 @@ def make_search_products_tool( @@ -216,40 +175,32 @@ def make_search_products_tool(
216 "未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。" 175 "未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。"
217 ) 176 )
218 177
219 - # ── LLM quality assessment ──────────────────────────────────────  
220 - labels, verdict, quality_summary = _assess_search_quality(query, raw_results) 178 + labels, quality_summary = _assess_search_quality(query, raw_results)
  179 + perfect_count = sum(1 for l in labels if l == "Highly Relevant")
  180 + partial_count = sum(1 for l in labels if l == "Partially Relevant")
  181 + irrelevant_count = len(labels) - perfect_count - partial_count
221 182
222 - # ── Build ProductItem list (keep perfect + partial, discard irrelevant) ──  
223 products: list[ProductItem] = [] 183 products: list[ProductItem] = []
224 - perfect_count = partial_count = irrelevant_count = 0  
225 -  
226 for raw, label in zip(raw_results, labels): 184 for raw, label in zip(raw_results, labels):
227 - if label == "完美匹配":  
228 - perfect_count += 1  
229 - elif label == "部分匹配":  
230 - partial_count += 1  
231 - else:  
232 - irrelevant_count += 1  
233 -  
234 - if label in ("完美匹配", "部分匹配"):  
235 - products.append(  
236 - ProductItem(  
237 - spu_id=str(raw.get("spu_id", "")),  
238 - title=raw.get("title") or "",  
239 - price=raw.get("price"),  
240 - category_path=(  
241 - raw.get("category_path") or raw.get("category_name")  
242 - ),  
243 - vendor=raw.get("vendor"),  
244 - image_url=_normalize_image_url(raw.get("image_url")),  
245 - relevance_score=raw.get("relevance_score"),  
246 - match_label=label,  
247 - tags=raw.get("tags") or [],  
248 - specifications=raw.get("specifications") or [],  
249 - ) 185 + if label not in ("Highly Relevant", "Partially Relevant"):
  186 + continue
  187 + products.append(
  188 + ProductItem(
  189 + spu_id=str(raw.get("spu_id", "")),
  190 + title=raw.get("title") or "",
  191 + price=raw.get("price"),
  192 + category_path=(
  193 + raw.get("category_path") or raw.get("category_name")
  194 + ),
  195 + vendor=raw.get("vendor"),
  196 + image_url=_normalize_image_url(raw.get("image_url")),
  197 + relevance_score=raw.get("relevance_score"),
  198 + match_label=label,
  199 + tags=raw.get("tags") or [],
  200 + specifications=raw.get("specifications") or [],
250 ) 201 )
  202 + )
251 203
252 - # ── Register ────────────────────────────────────────────────────  
253 ref_id = new_ref_id() 204 ref_id = new_ref_id()
254 result = SearchResult( 205 result = SearchResult(
255 ref_id=ref_id, 206 ref_id=ref_id,
@@ -259,65 +210,27 @@ def make_search_products_tool( @@ -259,65 +210,27 @@ def make_search_products_tool(
259 perfect_count=perfect_count, 210 perfect_count=perfect_count,
260 partial_count=partial_count, 211 partial_count=partial_count,
261 irrelevant_count=irrelevant_count, 212 irrelevant_count=irrelevant_count,
262 - quality_verdict=verdict,  
263 quality_summary=quality_summary, 213 quality_summary=quality_summary,
264 products=products, 214 products=products,
265 ) 215 )
266 registry.register(session_id, result) 216 registry.register(session_id, result)
267 -  
268 - # ── Search result detailed log (ref_id, summary, per-item id + image_url raw/normalized) ──  
269 - logger.info(  
270 - "[%s] SEARCH_RESULT ref_id=%s query=%s total_api_hits=%s returned_count=%s "  
271 - "verdict=%s quality_summary=%s perfect=%s partial=%s irrelevant=%s",  
272 - session_id,  
273 - ref_id,  
274 - query,  
275 - total_hits,  
276 - len(raw_results),  
277 - verdict,  
278 - quality_summary,  
279 - perfect_count,  
280 - partial_count,  
281 - irrelevant_count,  
282 - )  
283 - for idx, raw in enumerate(raw_results):  
284 - raw_img = raw.get("image_url") or ""  
285 - logger.info(  
286 - "[%s] SEARCH_RESULT_ITEM raw idx=%s spu_id=%s title=%s image_url_raw=%s",  
287 - session_id,  
288 - idx,  
289 - raw.get("spu_id", ""),  
290 - (raw.get("title") or "")[:60],  
291 - raw_img,  
292 - )  
293 - for p in products:  
294 - logger.info(  
295 - "[%s] SEARCH_RESULT_PRODUCT spu_id=%s match_label=%s image_url_normalized=%s",  
296 - session_id,  
297 - p.spu_id,  
298 - p.match_label,  
299 - p.image_url or "",  
300 - )  
301 - 217 + assessed_n = len(raw_results)
302 logger.info( 218 logger.info(
303 - f"[{session_id}] Registered {ref_id}: verdict={verdict}, "  
304 - f"perfect={perfect_count}, partial={partial_count}, irrel={irrelevant_count}" 219 + "[%s] Registered %s: query=%s assessed=%s perfect=%s partial=%s",
  220 + session_id, ref_id, query, assessed_n, perfect_count, partial_count,
305 ) 221 )
306 222
307 - # ── Return summary to agent (NOT the product list) ──────────────  
308 - verdict_hint = {  
309 - "优质": "结果质量优质,可直接引用。",  
310 - "一般": "结果质量一般,可酌情引用,也可补充更精准的 query。",  
311 - "较差": "结果质量较差,建议重新规划 query 后再次搜索。",  
312 - }.get(verdict, "") 223 + top10_titles = [
  224 + (raw.get("title") or "未知")[:80]
  225 + for raw in raw_results[:10]
  226 + ]
  227 + results_list = "\n".join(f"{i}. {t}" for i, t in enumerate(top10_titles, 1))
313 228
314 return ( 229 return (
315 f"【搜索完成】query='{query}'\n" 230 f"【搜索完成】query='{query}'\n"
316 - f"API 总命中:{total_hits} 条 | 本次评估:{len(raw_results)} 条\n"  
317 - f"质量评估:完美匹配 {perfect_count} 条 | 部分匹配 {partial_count} 条 | 不相关 {irrelevant_count} 条\n"  
318 - f"整体判断:{verdict} — {quality_summary}\n"  
319 - f"{verdict_hint}\n"  
320 - f"结果引用:[SEARCH_REF:{ref_id}]" 231 + f"结果引用:[SEARCH_REF:{ref_id}]\n"
  232 + f"搜索结果质量情况:评估总条数{assessed_n}条,Highly Relevant {perfect_count} 条,Partially Relevant {partial_count} 条。\n"
  233 + f"results list:\n{results_list}"
321 ) 234 )
322 235
323 except requests.exceptions.RequestException as e: 236 except requests.exceptions.RequestException as e: