Commit 5e3d6d3a614a63e5b56a840fa1655a1e91164ce9
1 parent
825828c4
refactor(search): 简化质量评估、英文标签、quality_summary 由 LLM 产出
## 搜索工具与质量评估 - _assess_search_quality 仅返回 (labels, quality_summary):去掉 verdict(优质/一般/较差)及依赖逻辑;prompt 要求 LLM 输出 labels + quality_summary(1–2 句:结果主要包含什么、是否基本满足意图、匹配度)。 - 工具返回格式统一为:【搜索完成】query='...' + 结果引用 [SEARCH_REF:ref_id] + 搜索结果质量情况(评估总条数、Highly Relevant / Partially Relevant 条数)+ results list(top10 标题)。 - 精简 prompt 与日志:评估输入仅保留序号+标题;删除 verdict_hint、逐条 SEARCH_RESULT_ITEM/SEARCH_RESULT_PRODUCT 日志,保留单行注册日志。 ## 三级标签改为英文 - 完美匹配 → Highly Relevant;部分匹配 → Partially Relevant;不相关 → Not Relevant。 - 全量替换:search_tools(prompt、valid、统计与过滤)、search_registry(ProductItem.match_label 默认及注释、SearchResult 注释)、app.py(卡片 label_style、结果块头部与筛选逻辑)。 ## Registry 与 UI - SearchResult 移除 quality_verdict 字段;quality_summary 由 _assess_search_quality 的 LLM 返回写入。 - 结果块头部不再展示 verdict 图标/文案,改为展示 query + Highly/Partially Relevant 件数 + quality_summary(若有)。 ## Agent - 系统提示词调整:角色与原则、价值提供与信息收集、search_products 与 [SEARCH_REF:xxx] 使用说明。 Co-authored-by: Cursor <cursoragent@cursor.com>
Showing
4 changed files
with
94 additions
and
187 deletions
Show diff stats
| @@ -321,7 +321,7 @@ def display_product_card_from_item(product: ProductItem) -> None: | @@ -321,7 +321,7 @@ def display_product_card_from_item(product: ProductItem) -> None: | ||
| 321 | if product.price is not None: | 321 | if product.price is not None: |
| 322 | st.caption(f"¥{product.price:.2f}") | 322 | st.caption(f"¥{product.price:.2f}") |
| 323 | 323 | ||
| 324 | - label_style = "⭐" if product.match_label == "完美匹配" else "✦" | 324 | + label_style = "⭐" if product.match_label == "Highly Relevant" else "✦" |
| 325 | st.caption(f"{label_style} {product.match_label}") | 325 | st.caption(f"{label_style} {product.match_label}") |
| 326 | 326 | ||
| 327 | 327 | ||
| @@ -330,25 +330,25 @@ def render_search_result_block(result: SearchResult) -> None: | @@ -330,25 +330,25 @@ def render_search_result_block(result: SearchResult) -> None: | ||
| 330 | Render a full search result block in place of a [SEARCH_REF:xxx] token. | 330 | Render a full search result block in place of a [SEARCH_REF:xxx] token. |
| 331 | 331 | ||
| 332 | Shows: | 332 | Shows: |
| 333 | - - A styled header with query text + quality verdict + match counts | ||
| 334 | - - A grid of product cards (perfect matches first, then partial; max 6) | 333 | + - A styled header with query + match counts + quality_summary (if any) |
| 334 | + - A grid of product cards (Highly Relevant first, then Partially Relevant; max 6) | ||
| 335 | """ | 335 | """ |
| 336 | - verdict_icon = {"优质": "✅", "一般": "〰️", "较差": "⚠️"}.get(result.quality_verdict, "🔍") | 336 | + summary_line = f' · {result.quality_summary}' if result.quality_summary else '' |
| 337 | header_html = ( | 337 | header_html = ( |
| 338 | f'<div style="border:1px solid #e0e0e0;border-radius:8px;padding:10px 14px;' | 338 | f'<div style="border:1px solid #e0e0e0;border-radius:8px;padding:10px 14px;' |
| 339 | f'margin:8px 0 4px 0;background:#fafafa;">' | 339 | f'margin:8px 0 4px 0;background:#fafafa;">' |
| 340 | f'<span style="font-size:0.8rem;color:#555;">' | 340 | f'<span style="font-size:0.8rem;color:#555;">' |
| 341 | f'🔍 <b>{result.query}</b>' | 341 | f'🔍 <b>{result.query}</b>' |
| 342 | - f' {verdict_icon} {result.quality_verdict}' | ||
| 343 | - f' · 完美匹配 {result.perfect_count} 件' | ||
| 344 | - f' · 相关 {result.partial_count} 件' | 342 | + f' · Highly Relevant {result.perfect_count} 件' |
| 343 | + f' · Partially Relevant {result.partial_count} 件' | ||
| 344 | + f'{summary_line}' | ||
| 345 | f'</span></div>' | 345 | f'</span></div>' |
| 346 | ) | 346 | ) |
| 347 | st.markdown(header_html, unsafe_allow_html=True) | 347 | st.markdown(header_html, unsafe_allow_html=True) |
| 348 | 348 | ||
| 349 | # Perfect matches first, fall back to partials if none | 349 | # Perfect matches first, fall back to partials if none |
| 350 | - perfect = [p for p in result.products if p.match_label == "完美匹配"] | ||
| 351 | - partial = [p for p in result.products if p.match_label == "部分匹配"] | 350 | + perfect = [p for p in result.products if p.match_label == "Highly Relevant"] |
| 351 | + partial = [p for p in result.products if p.match_label == "Partially Relevant"] | ||
| 352 | to_show = (perfect + partial)[:6] if perfect else partial[:6] | 352 | to_show = (perfect + partial)[:6] if perfect else partial[:6] |
| 353 | 353 | ||
| 354 | if not to_show: | 354 | if not to_show: |
app/agents/shopping_agent.py
| @@ -33,26 +33,21 @@ logger = logging.getLogger(__name__) | @@ -33,26 +33,21 @@ logger = logging.getLogger(__name__) | ||
| 33 | # 1. Guides multi-query search planning with explicit evaluate-and-decide loop | 33 | # 1. Guides multi-query search planning with explicit evaluate-and-decide loop |
| 34 | # 2. Forbids re-listing product details in the final response | 34 | # 2. Forbids re-listing product details in the final response |
| 35 | # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism | 35 | # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism |
| 36 | -SYSTEM_PROMPT = """角色定义 | ||
| 37 | -你是一名专业的服装电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。 | ||
| 38 | - | ||
| 39 | -一些原则: | ||
| 40 | -1. 你是一个真人导购,是一个贴心、专业的销售,保持灵活,根据上下文,基于常识灵活的切换策略,在合适的上下文询问合适的问题、给出有价值的方案和搜索结果的呈现。 | ||
| 41 | -2. 商品搜索结果推荐与信息收集: | ||
| 42 | - 1. 根据上下文、用户诉求,灵活的切换侧重点,何时需要进行搜索、何时要引导客户完善需求,你需要站在用户角度进行思考。比如已经有较为清晰的意图,则以搜索、方案推荐为主,有必要的时候,思考该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。如果存在重大的需求方向缺口,主动通过1-2个关键问题进行引导,并提供初步方向。 | ||
| 43 | - 2. 适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的帮助用户更清晰的呈现需求、提高商品发现的效率,形成“提供-反馈”的良性循环。 | ||
| 44 | - 3. 对于复杂需求时,要能基于上下文,将导购任务进行合理拆解。 | ||
| 45 | -3. 引导或者收集需求时,需要站在用户立场,比如询问用户期待的效果或感觉、使用的场合、偏好的风格等用户立场需,而不是询问具体的款式或参数,你需要将用户立场的需求理解/翻译/转化为具体的搜索计划,最后筛选产品、结合需求+结果特性组织推荐理由、呈现方案。 | ||
| 46 | -4. 如何使用search_products:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。每次调用 search_products 后,工具会返回以下内容,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。可以进行多轮搜索,但是要适时的总结和反馈信息避免用户等待过长时间: | ||
| 47 | - - 各层级数量:完美匹配 / 部分匹配 / 不相关 的条数 | ||
| 48 | - - 整体质量判断:优质 / 一般 / 较差 | ||
| 49 | - - 简短质量说明 | ||
| 50 | - - 结果引用标识:[SEARCH_REF:xxx] | ||
| 51 | -5. 撰写最终回复的时候,使用 [SEARCH_REF:xxx] 内联引用 | ||
| 52 | - 1. 用自然流畅的语言组织回复,将 [SEARCH_REF:xxx] 嵌入叙述中 | ||
| 53 | - 2. 系统会自动在 [SEARCH_REF:xxx] 位置渲染对应的商品卡片列表 | ||
| 54 | - 3. 禁止在回复文本中列出商品名称、ID、价格、分类、规格等字段 | ||
| 55 | - 4. 禁止用编号列表逐条复述搜索结果中的商品 | 36 | +SYSTEM_PROMPT = """ 角色定义 |
| 37 | + 你是我们店铺的一名专业的电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。 | ||
| 38 | + 作为我们店铺的一名专业的销售,除了本店铺的商品的推荐,你可以给用户提供有帮助的信息,但是不要虚构商品、提供本商店搜索结果以外的商品。 | ||
| 39 | + | ||
| 40 | + 一些原则: | ||
| 41 | + 1. 价值提供与信息收集的原则: | ||
| 42 | + 1. 优先价值提供:适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的澄清需求、提高商品发现的效率,形成“提供-反馈”的良性循环。 | ||
| 43 | + 2. 缺口大(比如品类或者使用人群都不能确定)→ 给出方案推荐 + 1-2个关键问题让用户选择;缺口小→直接检索+方案呈现,根据情况,可以考虑该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。 | ||
| 44 | + 3. 选项驱动式澄清:推荐几个清晰的方向,呈现方案或商品搜索结果,再做澄清 | ||
| 45 | + 4. 单轮对话最好只提一个问题,最多两个,禁止多问题堆叠。 | ||
| 46 | + 5. 站在用户立场思考:比如询问用户期待的效果或感觉、使用的场合、想解决的问题,而不是询问具体的款式、参数,你需要将用户表达的需求翻译为具体可检索的商品特征(版型、材质、设计元素、风格标签等),并据此筛选商品、组织推荐逻辑。 | ||
| 47 | + 2. 如何使用make_search_products_tool: | ||
| 48 | + 1. 可以生成多个query进行搜索:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。 | ||
| 49 | + 2. 可以根据搜索结果调整搜索策略:每次调用 search_products 后,工具会返回搜索结果的相关性的判断、以及搜索结果的topN的title,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。 | ||
| 50 | + 3. 使用 [SEARCH_REF:xxx] 内联引用搜索结果:搜索工具会返回一个结果引用标识[SEARCH_REF:xxx],撰写最终答复的时候可以直接引用将 [SEARCH_REF:xxx] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。 | ||
| 56 | """ | 51 | """ |
| 57 | 52 | ||
| 58 | 53 |
app/search_registry.py
| @@ -27,8 +27,8 @@ class ProductItem: | @@ -27,8 +27,8 @@ class ProductItem: | ||
| 27 | vendor: Optional[str] = None | 27 | vendor: Optional[str] = None |
| 28 | image_url: Optional[str] = None | 28 | image_url: Optional[str] = None |
| 29 | relevance_score: Optional[float] = None | 29 | relevance_score: Optional[float] = None |
| 30 | - # LLM-assigned label: "完美匹配" | "部分匹配" | "不相关" | ||
| 31 | - match_label: str = "部分匹配" | 30 | + # LLM-assigned label: "Highly Relevant" | "Partially Relevant" | "Not Relevant" |
| 31 | + match_label: str = "Partially Relevant" | ||
| 32 | tags: list = field(default_factory=list) | 32 | tags: list = field(default_factory=list) |
| 33 | specifications: list = field(default_factory=list) | 33 | specifications: list = field(default_factory=list) |
| 34 | 34 | ||
| @@ -40,7 +40,7 @@ class SearchResult: | @@ -40,7 +40,7 @@ class SearchResult: | ||
| 40 | 40 | ||
| 41 | Identified by ref_id (e.g. 'sr_3f9a1b2c'). | 41 | Identified by ref_id (e.g. 'sr_3f9a1b2c'). |
| 42 | Stores the query, LLM quality assessment, and the curated product list | 42 | Stores the query, LLM quality assessment, and the curated product list |
| 43 | - (only "完美匹配" and "部分匹配" items — "不相关" are discarded). | 43 | + (only "Highly Relevant" and "Partially Relevant" items — "Not Relevant" are discarded). |
| 44 | """ | 44 | """ |
| 45 | 45 | ||
| 46 | ref_id: str | 46 | ref_id: str |
| @@ -55,9 +55,8 @@ class SearchResult: | @@ -55,9 +55,8 @@ class SearchResult: | ||
| 55 | partial_count: int | 55 | partial_count: int |
| 56 | irrelevant_count: int | 56 | irrelevant_count: int |
| 57 | 57 | ||
| 58 | - # LLM overall quality verdict | ||
| 59 | - quality_verdict: str # "优质" | "一般" | "较差" | ||
| 60 | - quality_summary: str # one-sentence LLM explanation | 58 | + # LLM-written short summary: what the results mainly contain, whether they meet intent, match degree |
| 59 | + quality_summary: str | ||
| 61 | 60 | ||
| 62 | # Curated product list (perfect + partial only) | 61 | # Curated product list (perfect + partial only) |
| 63 | products: list # list[ProductItem] | 62 | products: list # list[ProductItem] |
app/tools/search_tools.py
| 1 | """ | 1 | """ |
| 2 | Search Tools for Product Discovery | 2 | Search Tools for Product Discovery |
| 3 | 3 | ||
| 4 | -Key design: | ||
| 5 | -- search_products is created via a factory (make_search_products_tool) that | ||
| 6 | - closes over (session_id, registry), so each agent session has its own tool | ||
| 7 | - instance pointing to the shared registry. | ||
| 8 | -- After calling the search API, an LLM quality-assessment step labels every | ||
| 9 | - result as 完美匹配 / 部分匹配 / 不相关 and produces an overall verdict. | ||
| 10 | -- The curated product list is stored in the registry under a unique ref_id. | ||
| 11 | -- The tool returns ONLY the quality summary + [SEARCH_REF:ref_id], never the | ||
| 12 | - raw product list. The LLM references the result in its final response via | ||
| 13 | - the [SEARCH_REF:...] token; the UI renders the product cards from the registry. | 4 | +- search_products is created via make_search_products_tool(session_id, registry). |
| 5 | +- After search API, an LLM labels each result as Highly Relevant / Partially Relevant / Not Relevant; we count and | ||
| 6 | + store the curated list in the registry, return [SEARCH_REF:ref_id] + quality counts + top10 titles. | ||
| 14 | """ | 7 | """ |
| 15 | 8 | ||
| 16 | import base64 | 9 | import base64 |
| @@ -65,94 +58,61 @@ def get_openai_client() -> OpenAI: | @@ -65,94 +58,61 @@ def get_openai_client() -> OpenAI: | ||
| 65 | 58 | ||
| 66 | # ── LLM quality assessment ───────────────────────────────────────────────────── | 59 | # ── LLM quality assessment ───────────────────────────────────────────────────── |
| 67 | 60 | ||
| 68 | -def _assess_search_quality( | ||
| 69 | - query: str, | ||
| 70 | - raw_products: list, | ||
| 71 | -) -> tuple[list[str], str, str]: | 61 | +def _assess_search_quality(query: str, raw_products: list) -> tuple[list[str], str]: |
| 72 | """ | 62 | """ |
| 73 | - Ask the LLM to evaluate how well each search result matches the query. | ||
| 74 | - | ||
| 75 | - Returns: | ||
| 76 | - labels – list[str], one per product: "完美匹配" | "部分匹配" | "不相关" | ||
| 77 | - verdict – str: "优质" | "一般" | "较差" | ||
| 78 | - summary – str: one-sentence explanation | 63 | + Use LLM to label each search result and write a short quality_summary. |
| 64 | + Returns (labels, quality_summary). labels: one per product; quality_summary: 1–2 sentences. | ||
| 79 | """ | 65 | """ |
| 80 | n = len(raw_products) | 66 | n = len(raw_products) |
| 81 | if n == 0: | 67 | if n == 0: |
| 82 | - return [], "较差", "搜索未返回任何商品。" | 68 | + return [], "" |
| 83 | 69 | ||
| 84 | - # Build a compact product list — only title/category/tags/score to save tokens | ||
| 85 | - lines: list[str] = [] | 70 | + lines = [] |
| 86 | for i, p in enumerate(raw_products, 1): | 71 | for i, p in enumerate(raw_products, 1): |
| 87 | title = (p.get("title") or "")[:60] | 72 | title = (p.get("title") or "")[:60] |
| 88 | - cat = p.get("category_path") or p.get("category_name") or "" | ||
| 89 | - tags_raw = p.get("tags") or [] | ||
| 90 | - tags = ", ".join(str(t) for t in tags_raw[:5]) | ||
| 91 | - score = p.get("relevance_score") or 0 | ||
| 92 | - row = f"{i}. [{score:.1f}] {title} | {cat}" | ||
| 93 | - if tags: | ||
| 94 | - row += f" | 标签:{tags}" | ||
| 95 | - lines.append(row) | ||
| 96 | - | 73 | + lines.append(f"{i}. {title}") |
| 97 | product_text = "\n".join(lines) | 74 | product_text = "\n".join(lines) |
| 98 | 75 | ||
| 99 | - prompt = f"""你是商品搜索质量评估专家。请评估以下搜索结果与用户查询的匹配程度。 | 76 | + prompt = f"""评估以下搜索结果与用户查询的匹配程度,完成两件事: |
| 77 | +1. 为每条结果打一个等级:Highly Relevant / Partially Relevant / Not Relevant。 | ||
| 78 | +2. 写一段 quality_summary(1–2 句话):简要说明搜索结果主要包含哪些商品、是否基本满足搜索意图、整体匹配度如何。 | ||
| 100 | 79 | ||
| 101 | 用户查询:{query} | 80 | 用户查询:{query} |
| 102 | 81 | ||
| 103 | -搜索结果(共 {n} 条,格式:序号. [相关性分数] 标题 | 分类 | 标签): | 82 | +搜索结果(共 {n} 条): |
| 104 | {product_text} | 83 | {product_text} |
| 105 | 84 | ||
| 106 | -评估说明: | ||
| 107 | -- 完美匹配:完全符合用户查询意图,用户必然感兴趣 | ||
| 108 | -- 部分匹配:与查询有关联,但不完全满足意图(如品类对但风格偏差、相关配件等) | ||
| 109 | -- 不相关:与查询无关,不应展示给用户 | ||
| 110 | - | ||
| 111 | -整体 verdict 判断标准: | ||
| 112 | -- 优质:完美匹配 ≥ 5 条 | ||
| 113 | -- 一般:完美匹配 2-4 条 | ||
| 114 | -- 较差:完美匹配 < 2 条 | ||
| 115 | - | ||
| 116 | -请严格按以下 JSON 格式输出,不得有任何额外文字或代码块标记: | ||
| 117 | -{{"labels": ["完美匹配", "部分匹配", "不相关", ...], "verdict": "优质", "summary": "一句话评价搜索质量"}} | 85 | +等级说明:Highly Relevant=完全符合查询意图;Partially Relevant=基本相关(如品类等主需求匹配但部分属性不完全符合);Not Relevant=不相关。 |
| 118 | 86 | ||
| 119 | -labels 数组长度必须恰好等于 {n}。""" | 87 | +请严格按以下 JSON 输出,仅输出 JSON,无其他内容: |
| 88 | +{{"labels": ["Highly Relevant", "Partially Relevant", "Not Relevant", ...], "quality_summary": "你的1-2句总结"}} | ||
| 89 | +labels 数组长度必须等于 {n}。""" | ||
| 120 | 90 | ||
| 121 | try: | 91 | try: |
| 122 | client = get_openai_client() | 92 | client = get_openai_client() |
| 123 | resp = client.chat.completions.create( | 93 | resp = client.chat.completions.create( |
| 124 | model=settings.openai_model, | 94 | model=settings.openai_model, |
| 125 | messages=[{"role": "user", "content": prompt}], | 95 | messages=[{"role": "user", "content": prompt}], |
| 126 | - max_tokens=800, | 96 | + max_tokens=700, |
| 127 | temperature=0.1, | 97 | temperature=0.1, |
| 128 | ) | 98 | ) |
| 129 | raw = resp.choices[0].message.content.strip() | 99 | raw = resp.choices[0].message.content.strip() |
| 130 | - # Strip markdown code fences if the model adds them | ||
| 131 | if raw.startswith("```"): | 100 | if raw.startswith("```"): |
| 132 | raw = raw.split("```")[1] | 101 | raw = raw.split("```")[1] |
| 133 | if raw.startswith("json"): | 102 | if raw.startswith("json"): |
| 134 | raw = raw[4:] | 103 | raw = raw[4:] |
| 135 | raw = raw.strip() | 104 | raw = raw.strip() |
| 136 | - | ||
| 137 | data = json.loads(raw) | 105 | data = json.loads(raw) |
| 138 | - labels: list[str] = data.get("labels", []) | ||
| 139 | - | ||
| 140 | - # Normalize and pad / trim to match n | ||
| 141 | - valid = {"完美匹配", "部分匹配", "不相关"} | ||
| 142 | - labels = [l if l in valid else "部分匹配" for l in labels] | 106 | + labels = data.get("labels", []) |
| 107 | + valid = {"Highly Relevant", "Partially Relevant", "Not Relevant"} | ||
| 108 | + labels = [l if l in valid else "Partially Relevant" for l in labels] | ||
| 143 | while len(labels) < n: | 109 | while len(labels) < n: |
| 144 | - labels.append("部分匹配") | ||
| 145 | - labels = labels[:n] | ||
| 146 | - | ||
| 147 | - verdict: str = data.get("verdict", "一般") | ||
| 148 | - if verdict not in ("优质", "一般", "较差"): | ||
| 149 | - verdict = "一般" | ||
| 150 | - summary: str = str(data.get("summary", "")) | ||
| 151 | - return labels, verdict, summary | ||
| 152 | - | 110 | + labels.append("Partially Relevant") |
| 111 | + quality_summary = (data.get("quality_summary") or "").strip() or "" | ||
| 112 | + return labels[:n], quality_summary | ||
| 153 | except Exception as e: | 113 | except Exception as e: |
| 154 | - logger.warning(f"Quality assessment LLM call failed: {e}; using fallback labels.") | ||
| 155 | - return ["部分匹配"] * n, "一般", "质量评估步骤失败,结果仅供参考。" | 114 | + logger.warning(f"Quality assessment failed: {e}; using fallback.") |
| 115 | + return ["Partially Relevant"] * n, "" | ||
| 156 | 116 | ||
| 157 | 117 | ||
| 158 | # ── Tool factory ─────────────────────────────────────────────────────────────── | 118 | # ── Tool factory ─────────────────────────────────────────────────────────────── |
| @@ -169,22 +129,18 @@ def make_search_products_tool( | @@ -169,22 +129,18 @@ def make_search_products_tool( | ||
| 169 | 2. Runs LLM quality assessment on up to 20 results. | 129 | 2. Runs LLM quality assessment on up to 20 results. |
| 170 | 3. Stores a SearchResult in the registry. | 130 | 3. Stores a SearchResult in the registry. |
| 171 | 4. Returns a concise quality summary + [SEARCH_REF:ref_id]. | 131 | 4. Returns a concise quality summary + [SEARCH_REF:ref_id]. |
| 172 | - The product list is NEVER returned in the tool output text. | ||
| 173 | """ | 132 | """ |
| 174 | 133 | ||
| 175 | @tool | 134 | @tool |
| 176 | def search_products(query: str, limit: int = 20) -> str: | 135 | def search_products(query: str, limit: int = 20) -> str: |
| 177 | - """搜索商品库,根据自然语言描述找到匹配商品,并进行质量评估。 | ||
| 178 | - | ||
| 179 | - 每次调用专注于单一搜索角度。复杂需求请拆分为多次调用,每次换一个 query。 | ||
| 180 | - 工具会自动评估结果质量(完美匹配 / 部分匹配 / 不相关),并给出整体判断。 | 136 | + """搜索商品库并做质量评估:LLM 为每条结果打等级(Highly Relevant / Partially Relevant / Not Relevant),返回引用与 top10 标题。 |
| 181 | 137 | ||
| 182 | Args: | 138 | Args: |
| 183 | - query: 自然语言商品描述,例如"男士休闲亚麻短裤夏季" | ||
| 184 | - limit: 最多返回条数(建议 10-20,越多评估越全面) | 139 | + query: 自然语言商品描述 |
| 140 | + limit: 最多返回条数(1-20) | ||
| 185 | 141 | ||
| 186 | Returns: | 142 | Returns: |
| 187 | - 质量评估摘要 + [SEARCH_REF:ref_id],供最终回复引用。 | 143 | + 【搜索完成】+ 结果引用 [SEARCH_REF:ref_id] + 质量情况(评估条数、Highly/Partially Relevant 数)+ results list(top10 标题) |
| 188 | """ | 144 | """ |
| 189 | try: | 145 | try: |
| 190 | logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}") | 146 | logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}") |
| @@ -199,6 +155,9 @@ def make_search_products_tool( | @@ -199,6 +155,9 @@ def make_search_products_tool( | ||
| 199 | "size": min(max(limit, 1), 20), | 155 | "size": min(max(limit, 1), 20), |
| 200 | "from": 0, | 156 | "from": 0, |
| 201 | "language": "zh", | 157 | "language": "zh", |
| 158 | + "enable_rerank": True, | ||
| 159 | + "rerank_query_template": query, | ||
| 160 | + "rerank_doc_template": "{title}", | ||
| 202 | } | 161 | } |
| 203 | 162 | ||
| 204 | resp = requests.post(url, json=payload, headers=headers, timeout=60) | 163 | resp = requests.post(url, json=payload, headers=headers, timeout=60) |
| @@ -216,40 +175,32 @@ def make_search_products_tool( | @@ -216,40 +175,32 @@ def make_search_products_tool( | ||
| 216 | "未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。" | 175 | "未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。" |
| 217 | ) | 176 | ) |
| 218 | 177 | ||
| 219 | - # ── LLM quality assessment ────────────────────────────────────── | ||
| 220 | - labels, verdict, quality_summary = _assess_search_quality(query, raw_results) | 178 | + labels, quality_summary = _assess_search_quality(query, raw_results) |
| 179 | + perfect_count = sum(1 for l in labels if l == "Highly Relevant") | ||
| 180 | + partial_count = sum(1 for l in labels if l == "Partially Relevant") | ||
| 181 | + irrelevant_count = len(labels) - perfect_count - partial_count | ||
| 221 | 182 | ||
| 222 | - # ── Build ProductItem list (keep perfect + partial, discard irrelevant) ── | ||
| 223 | products: list[ProductItem] = [] | 183 | products: list[ProductItem] = [] |
| 224 | - perfect_count = partial_count = irrelevant_count = 0 | ||
| 225 | - | ||
| 226 | for raw, label in zip(raw_results, labels): | 184 | for raw, label in zip(raw_results, labels): |
| 227 | - if label == "完美匹配": | ||
| 228 | - perfect_count += 1 | ||
| 229 | - elif label == "部分匹配": | ||
| 230 | - partial_count += 1 | ||
| 231 | - else: | ||
| 232 | - irrelevant_count += 1 | ||
| 233 | - | ||
| 234 | - if label in ("完美匹配", "部分匹配"): | ||
| 235 | - products.append( | ||
| 236 | - ProductItem( | ||
| 237 | - spu_id=str(raw.get("spu_id", "")), | ||
| 238 | - title=raw.get("title") or "", | ||
| 239 | - price=raw.get("price"), | ||
| 240 | - category_path=( | ||
| 241 | - raw.get("category_path") or raw.get("category_name") | ||
| 242 | - ), | ||
| 243 | - vendor=raw.get("vendor"), | ||
| 244 | - image_url=_normalize_image_url(raw.get("image_url")), | ||
| 245 | - relevance_score=raw.get("relevance_score"), | ||
| 246 | - match_label=label, | ||
| 247 | - tags=raw.get("tags") or [], | ||
| 248 | - specifications=raw.get("specifications") or [], | ||
| 249 | - ) | 185 | + if label not in ("Highly Relevant", "Partially Relevant"): |
| 186 | + continue | ||
| 187 | + products.append( | ||
| 188 | + ProductItem( | ||
| 189 | + spu_id=str(raw.get("spu_id", "")), | ||
| 190 | + title=raw.get("title") or "", | ||
| 191 | + price=raw.get("price"), | ||
| 192 | + category_path=( | ||
| 193 | + raw.get("category_path") or raw.get("category_name") | ||
| 194 | + ), | ||
| 195 | + vendor=raw.get("vendor"), | ||
| 196 | + image_url=_normalize_image_url(raw.get("image_url")), | ||
| 197 | + relevance_score=raw.get("relevance_score"), | ||
| 198 | + match_label=label, | ||
| 199 | + tags=raw.get("tags") or [], | ||
| 200 | + specifications=raw.get("specifications") or [], | ||
| 250 | ) | 201 | ) |
| 202 | + ) | ||
| 251 | 203 | ||
| 252 | - # ── Register ──────────────────────────────────────────────────── | ||
| 253 | ref_id = new_ref_id() | 204 | ref_id = new_ref_id() |
| 254 | result = SearchResult( | 205 | result = SearchResult( |
| 255 | ref_id=ref_id, | 206 | ref_id=ref_id, |
| @@ -259,65 +210,27 @@ def make_search_products_tool( | @@ -259,65 +210,27 @@ def make_search_products_tool( | ||
| 259 | perfect_count=perfect_count, | 210 | perfect_count=perfect_count, |
| 260 | partial_count=partial_count, | 211 | partial_count=partial_count, |
| 261 | irrelevant_count=irrelevant_count, | 212 | irrelevant_count=irrelevant_count, |
| 262 | - quality_verdict=verdict, | ||
| 263 | quality_summary=quality_summary, | 213 | quality_summary=quality_summary, |
| 264 | products=products, | 214 | products=products, |
| 265 | ) | 215 | ) |
| 266 | registry.register(session_id, result) | 216 | registry.register(session_id, result) |
| 267 | - | ||
| 268 | - # ── Search result detailed log (ref_id, summary, per-item id + image_url raw/normalized) ── | ||
| 269 | - logger.info( | ||
| 270 | - "[%s] SEARCH_RESULT ref_id=%s query=%s total_api_hits=%s returned_count=%s " | ||
| 271 | - "verdict=%s quality_summary=%s perfect=%s partial=%s irrelevant=%s", | ||
| 272 | - session_id, | ||
| 273 | - ref_id, | ||
| 274 | - query, | ||
| 275 | - total_hits, | ||
| 276 | - len(raw_results), | ||
| 277 | - verdict, | ||
| 278 | - quality_summary, | ||
| 279 | - perfect_count, | ||
| 280 | - partial_count, | ||
| 281 | - irrelevant_count, | ||
| 282 | - ) | ||
| 283 | - for idx, raw in enumerate(raw_results): | ||
| 284 | - raw_img = raw.get("image_url") or "" | ||
| 285 | - logger.info( | ||
| 286 | - "[%s] SEARCH_RESULT_ITEM raw idx=%s spu_id=%s title=%s image_url_raw=%s", | ||
| 287 | - session_id, | ||
| 288 | - idx, | ||
| 289 | - raw.get("spu_id", ""), | ||
| 290 | - (raw.get("title") or "")[:60], | ||
| 291 | - raw_img, | ||
| 292 | - ) | ||
| 293 | - for p in products: | ||
| 294 | - logger.info( | ||
| 295 | - "[%s] SEARCH_RESULT_PRODUCT spu_id=%s match_label=%s image_url_normalized=%s", | ||
| 296 | - session_id, | ||
| 297 | - p.spu_id, | ||
| 298 | - p.match_label, | ||
| 299 | - p.image_url or "", | ||
| 300 | - ) | ||
| 301 | - | 217 | + assessed_n = len(raw_results) |
| 302 | logger.info( | 218 | logger.info( |
| 303 | - f"[{session_id}] Registered {ref_id}: verdict={verdict}, " | ||
| 304 | - f"perfect={perfect_count}, partial={partial_count}, irrel={irrelevant_count}" | 219 | + "[%s] Registered %s: query=%s assessed=%s perfect=%s partial=%s", |
| 220 | + session_id, ref_id, query, assessed_n, perfect_count, partial_count, | ||
| 305 | ) | 221 | ) |
| 306 | 222 | ||
| 307 | - # ── Return summary to agent (NOT the product list) ────────────── | ||
| 308 | - verdict_hint = { | ||
| 309 | - "优质": "结果质量优质,可直接引用。", | ||
| 310 | - "一般": "结果质量一般,可酌情引用,也可补充更精准的 query。", | ||
| 311 | - "较差": "结果质量较差,建议重新规划 query 后再次搜索。", | ||
| 312 | - }.get(verdict, "") | 223 | + top10_titles = [ |
| 224 | + (raw.get("title") or "未知")[:80] | ||
| 225 | + for raw in raw_results[:10] | ||
| 226 | + ] | ||
| 227 | + results_list = "\n".join(f"{i}. {t}" for i, t in enumerate(top10_titles, 1)) | ||
| 313 | 228 | ||
| 314 | return ( | 229 | return ( |
| 315 | f"【搜索完成】query='{query}'\n" | 230 | f"【搜索完成】query='{query}'\n" |
| 316 | - f"API 总命中:{total_hits} 条 | 本次评估:{len(raw_results)} 条\n" | ||
| 317 | - f"质量评估:完美匹配 {perfect_count} 条 | 部分匹配 {partial_count} 条 | 不相关 {irrelevant_count} 条\n" | ||
| 318 | - f"整体判断:{verdict} — {quality_summary}\n" | ||
| 319 | - f"{verdict_hint}\n" | ||
| 320 | - f"结果引用:[SEARCH_REF:{ref_id}]" | 231 | + f"结果引用:[SEARCH_REF:{ref_id}]\n" |
| 232 | + f"搜索结果质量情况:评估总条数{assessed_n}条,Highly Relevant {perfect_count} 条,Partially Relevant {partial_count} 条。\n" | ||
| 233 | + f"results list:\n{results_list}" | ||
| 321 | ) | 234 | ) |
| 322 | 235 | ||
| 323 | except requests.exceptions.RequestException as e: | 236 | except requests.exceptions.RequestException as e: |