Commit 621b69257170a9f674445b29e36fce8b0f798ed4

Authored by tangwang
1 parent 5e3d6d3a

up

... ... @@ -321,7 +321,7 @@ def display_product_card_from_item(product: ProductItem) -> None:
321 321 if product.price is not None:
322 322 st.caption(f"¥{product.price:.2f}")
323 323  
324   - label_style = "⭐" if product.match_label == "Highly Relevant" else "✦"
  324 + label_style = "⭐" if product.match_label == "Relevant" else "✦"
325 325 st.caption(f"{label_style} {product.match_label}")
326 326  
327 327  
... ... @@ -331,7 +331,7 @@ def render_search_result_block(result: SearchResult) -> None:
331 331  
332 332 Shows:
333 333 - A styled header with query + match counts + quality_summary (if any)
334   - - A grid of product cards (Highly Relevant first, then Partially Relevant; max 6)
  334 + - A grid of product cards (Relevant first, then Partially Relevant; max 6)
335 335 """
336 336 summary_line = f'  · {result.quality_summary}' if result.quality_summary else ''
337 337 header_html = (
... ... @@ -339,7 +339,7 @@ def render_search_result_block(result: SearchResult) -> None:
339 339 f'margin:8px 0 4px 0;background:#fafafa;">'
340 340 f'<span style="font-size:0.8rem;color:#555;">'
341 341 f'🔍 <b>{result.query}</b>'
342   - f'&nbsp;·&nbsp;Highly Relevant&nbsp;{result.perfect_count}&nbsp;件'
  342 + f'&nbsp;·&nbsp;Relevant&nbsp;{result.perfect_count}&nbsp;件'
343 343 f'&nbsp;·&nbsp;Partially Relevant&nbsp;{result.partial_count}&nbsp;件'
344 344 f'{summary_line}'
345 345 f'</span></div>'
... ... @@ -347,7 +347,7 @@ def render_search_result_block(result: SearchResult) -&gt; None:
347 347 st.markdown(header_html, unsafe_allow_html=True)
348 348  
349 349 # Perfect matches first, fall back to partials if none
350   - perfect = [p for p in result.products if p.match_label == "Highly Relevant"]
  350 + perfect = [p for p in result.products if p.match_label == "Relevant"]
351 351 partial = [p for p in result.products if p.match_label == "Partially Relevant"]
352 352 to_show = (perfect + partial)[:6] if perfect else partial[:6]
353 353  
... ... @@ -361,14 +361,20 @@ def render_search_result_block(result: SearchResult) -&gt; None:
361 361 display_product_card_from_item(product)
362 362  
363 363  
364   -def render_message_with_refs(content: str, session_id: str) -> None:
  364 +def render_message_with_refs(
  365 + content: str,
  366 + session_id: str,
  367 + fallback_refs: Optional[dict] = None,
  368 +) -> None:
365 369 """
366 370 Render an assistant message that may contain [SEARCH_REF:xxx] tokens.
367 371  
368 372 Text segments are rendered as markdown.
369 373 [SEARCH_REF:xxx] tokens are replaced with full product card blocks
370   - loaded from the global registry.
  374 + loaded from the global registry, or from fallback_refs (e.g. refs stored
  375 + with the message so they survive reruns / different workers).
371 376 """
  377 + fallback_refs = fallback_refs or {}
372 378 # re.split with a capture group alternates: [text, ref_id, text, ref_id, ...]
373 379 parts = SEARCH_REF_PATTERN.split(content)
374 380  
... ... @@ -381,7 +387,7 @@ def render_message_with_refs(content: str, session_id: str) -&gt; None:
381 387 else:
382 388 # ref_id segment
383 389 ref_id = segment.strip()
384   - result = global_registry.get(session_id, ref_id)
  390 + result = global_registry.get(session_id, ref_id) or fallback_refs.get(ref_id)
385 391 if result:
386 392 render_search_result_block(result)
387 393 else:
... ... @@ -450,7 +456,9 @@ def display_message(message: dict):
450 456  
451 457 # Render message: expand [SEARCH_REF:xxx] tokens into product card blocks
452 458 session_id = st.session_state.get("session_id", "")
453   - render_message_with_refs(content, session_id)
  459 + render_message_with_refs(
  460 + content, session_id, fallback_refs=message.get("search_refs")
  461 + )
454 462  
455 463 st.markdown("</div>", unsafe_allow_html=True)
456 464  
... ... @@ -671,13 +679,14 @@ def main():
671 679 tool_calls = result.get("tool_calls", [])
672 680 debug_steps = result.get("debug_steps", [])
673 681  
674   - # Add assistant message
  682 + # Add assistant message (store search_refs so refs resolve after rerun)
675 683 st.session_state.messages.append(
676 684 {
677 685 "role": "assistant",
678 686 "content": response,
679 687 "tool_calls": tool_calls,
680 688 "debug_steps": debug_steps,
  689 + "search_refs": result.get("search_refs", {}),
681 690 }
682 691 )
683 692  
... ...
app/agents/shopping_agent.py
... ... @@ -10,6 +10,8 @@ Architecture:
10 10  
11 11 import json
12 12 import logging
  13 +import re
  14 +from datetime import datetime
13 15 from pathlib import Path
14 16 from typing import Any, Optional, Sequence
15 17  
... ... @@ -33,7 +35,30 @@ logger = logging.getLogger(__name__)
33 35 # 1. Guides multi-query search planning with explicit evaluate-and-decide loop
34 36 # 2. Forbids re-listing product details in the final response
35 37 # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism
36   -SYSTEM_PROMPT = """ 角色定义
  38 +SYSTEM_PROMPT = f"""角色定义
  39 +你是我们店铺的一名专业的电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。
  40 +作为我们店铺的一名专业的销售,除了本店铺的商品的推荐,你可以给用户提供有帮助的信息,但是不要虚构商品、提供本商店搜索结果以外的商品。
  41 +
  42 +一些原则:
  43 +1. 价值提供与信息收集的原则:
  44 + 1. 优先价值提供:适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的澄清需求、提高商品发现的效率,形成“提供-反馈”的良性循环。
  45 + 2. 意图判断-缺口大(比如品类或者使用人群都不能确定):从“品类”、“场景”、“效果”等宽泛的意图切入,给出方案推荐 + 1-2个关键问题让用户选择;示例:
  46 + 1. 你想穿出哪种感觉?职场干练 松弛自在 活力元气 温柔知性
  47 + 2. 平时通勤场合多吗?还是更喜欢生活化穿搭?
  48 + 3. 意图判断-缺口小:直接检索+方案呈现,根据情况,可以考虑该方向下重要的决策因素(思考哪些维度最可能影响推荐结果),进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。
  49 + 4. 选项驱动式澄清:推荐几个清晰的方向,呈现方案或商品搜索结果,再做澄清
  50 + 5. 单轮对话最好只提一个问题,最多两个,禁止多问题堆叠。
  51 + 6. 站在用户立场思考:比如询问用户期待的效果或感觉、使用的场合、想解决的问题,而不是询问具体的款式、参数,你需要将用户表达的需求翻译为具体可检索的商品特征(版型、材质、设计元素、风格标签等),并据此筛选商品、组织推荐逻辑。
  52 +2. 如何使用make_search_products_tool:
  53 + 1. 可以生成多个query进行搜索:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。
  54 + 2. 可以根据搜索结果调整搜索策略:每次调用 search_products 后,工具会返回搜索结果的相关性的判断、以及搜索结果的topN的title,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。结果太差的原因有可能是你生成的query不合理、请根据你看到的商品名称的构成组织搜索关键词。
  55 +3. 在最终回复中使用 [SEARCH_REF:xxx] 内联引用搜索结果:
  56 + 1. 搜索工具会返回一个结果引用标识[SEARCH_REF:xxx],撰写最终答复的时候请直接引用 [SEARCH_REF:xxx] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。
  57 + 2. 因为系统会自动将[SEARCH_REF:xxx]渲染为搜索结果,所以[SEARCH_REF:xxx]必须独占一行,且只在需要渲染该query完整的搜索结果时才进行引用,同一个结果不要重复引用。
  58 +4. 今天是{datetime.now().strftime("%Y-%m-%d")},所有与当前时间(比如天气、最新或即将发生的事件)相关的问题,都要使用web_search工具)。
  59 +"""
  60 +
  61 +SYSTEM_PROMPT___2 = """ 角色定义
37 62 你是我们店铺的一名专业的电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。
38 63 作为我们店铺的一名专业的销售,除了本店铺的商品的推荐,你可以给用户提供有帮助的信息,但是不要虚构商品、提供本商店搜索结果以外的商品。
39 64  
... ... @@ -48,6 +73,7 @@ SYSTEM_PROMPT = &quot;&quot;&quot; 角色定义
48 73 1. 可以生成多个query进行搜索:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。
49 74 2. 可以根据搜索结果调整搜索策略:每次调用 search_products 后,工具会返回搜索结果的相关性的判断、以及搜索结果的topN的title,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。
50 75 3. 使用 [SEARCH_REF:xxx] 内联引用搜索结果:搜索工具会返回一个结果引用标识[SEARCH_REF:xxx],撰写最终答复的时候可以直接引用将 [SEARCH_REF:xxx] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。
  76 + 4. 因为系统会自动将[SEARCH_REF:xxx]渲染为搜索结果,所以只在需要渲染该query完整的搜索结果时才进行引用,同一个结果不要重复引用。
51 77 """
52 78  
53 79  
... ... @@ -81,9 +107,40 @@ def _extract_message_text(msg) -&gt; str:
81 107 return str(content) if content else ""
82 108  
83 109  
  110 +# 部分 API(如 DeepSeek)在 content 中返回 think 标签块,需去掉后只保留正式回复
  111 +_RE_THINK_TAGS = re.compile(r"<think>.*?<\/think>", re.DOTALL | re.IGNORECASE)
  112 +
  113 +
  114 +def _extract_formal_reply(msg) -> str:
  115 + """
  116 + 只截取大模型回复中的「正式结果」,去掉 thinking/reasoning 内容。
  117 + - 若 content 为 list(如 Responses API):只取 type 为 output_text/text 的块,跳过 reasoning。
  118 + - 若 content 为 str:去掉 think 标签及其内容。
  119 + """
  120 + content = getattr(msg, "content", "")
  121 + if isinstance(content, list):
  122 + parts = []
  123 + for block in content:
  124 + if not isinstance(block, dict):
  125 + continue
  126 + block_type = (block.get("type") or "").lower()
  127 + if block_type in ("reasoning",):
  128 + continue
  129 + text = block.get("text") or block.get("content") or ""
  130 + if text:
  131 + parts.append(text)
  132 + return "".join(str(p) for p in parts).strip()
  133 + if isinstance(content, str):
  134 + return _RE_THINK_TAGS.sub("", content).strip()
  135 + return str(content).strip() if content else ""
  136 +
  137 +
84 138 def _message_for_log(msg: BaseMessage) -> dict:
85 139 """Serialize a message for structured logging (content truncated)."""
86   - text = _extract_message_text(msg)
  140 + if getattr(msg, "additional_kwargs", None) and "reasoning" in (msg.additional_kwargs or {}):
  141 + text = _extract_formal_reply(msg) or _extract_message_text(msg)
  142 + else:
  143 + text = _extract_message_text(msg)
87 144 if len(text) > _LOG_CONTENT_MAX:
88 145 text = text[:_LOG_CONTENT_MAX] + f"... [truncated, total {len(text)} chars]"
89 146 out: dict[str, Any] = {
... ... @@ -106,13 +163,17 @@ class ShoppingAgent:
106 163 def __init__(self, session_id: Optional[str] = None):
107 164 self.session_id = session_id or "default"
108 165  
109   - llm_kwargs = dict(
  166 + llm_kwargs: dict[str, Any] = dict(
110 167 model=settings.openai_model,
111 168 temperature=settings.openai_temperature,
112 169 api_key=settings.openai_api_key,
113 170 )
114 171 if settings.openai_api_base_url:
115 172 llm_kwargs["base_url"] = settings.openai_api_base_url
  173 + if getattr(settings, "openai_use_reasoning", False):
  174 + llm_kwargs["use_responses_api"] = True
  175 + effort = getattr(settings, "openai_reasoning_effort", "medium") or "medium"
  176 + llm_kwargs["model_kwargs"] = {"reasoning": {"effort": effort, "summary": "none"}}
116 177  
117 178 self.llm = ChatOpenAI(**llm_kwargs)
118 179  
... ... @@ -246,7 +307,7 @@ class ShoppingAgent:
246 307  
247 308 final_state = self.graph.get_state(config)
248 309 final_msg = final_state.values["messages"][-1]
249   - response_text = _extract_message_text(final_msg)
  310 + response_text = _extract_formal_reply(final_msg) or _extract_message_text(final_msg)
250 311  
251 312 # Collect new SearchResults added during this turn
252 313 registry_after = global_registry.get_all(self.session_id)
... ... @@ -292,7 +353,8 @@ class ShoppingAgent:
292 353 if getattr(msg, "type", None) in ("system", "tool"):
293 354 continue
294 355 role = "user" if msg.type == "human" else "assistant"
295   - result.append({"role": role, "content": _extract_message_text(msg)})
  356 + content = _extract_formal_reply(msg) or _extract_message_text(msg) if role == "assistant" else _extract_message_text(msg)
  357 + result.append({"role": role, "content": content})
296 358 return result
297 359 except Exception as e:
298 360 logger.error(f"get_conversation_history error: {e}")
... ...
app/config.py
... ... @@ -33,6 +33,9 @@ class Settings(BaseSettings):
33 33 openai_vision_model: str = "qwen3-omni-flash"
34 34 openai_temperature: float = 0.7
35 35 openai_max_tokens: int = 1000
  36 + # 对话调用大模型时是否开启 thinking(需兼容 Responses API / reasoning 的模型,如 o1/o3/o4-mini)
  37 + openai_use_reasoning: bool = False
  38 + openai_reasoning_effort: str = "medium" # low | medium | high
36 39 # Base URL for OpenAI-compatible APIs (e.g. Qwen/DashScope)
37 40 # Qwen 北京: https://dashscope.aliyuncs.com/compatible-mode/v1
38 41 openai_api_base_url: Optional[str] = None
... ...
app/search_registry.py
... ... @@ -27,7 +27,7 @@ class ProductItem:
27 27 vendor: Optional[str] = None
28 28 image_url: Optional[str] = None
29 29 relevance_score: Optional[float] = None
30   - # LLM-assigned label: "Highly Relevant" | "Partially Relevant" | "Not Relevant"
  30 + # LLM-assigned label: "Relevant" | "Partially Relevant" | "Irrelevant"
31 31 match_label: str = "Partially Relevant"
32 32 tags: list = field(default_factory=list)
33 33 specifications: list = field(default_factory=list)
... ... @@ -40,7 +40,7 @@ class SearchResult:
40 40  
41 41 Identified by ref_id (e.g. 'sr_3f9a1b2c').
42 42 Stores the query, LLM quality assessment, and the curated product list
43   - (only "Highly Relevant" and "Partially Relevant" items — "Not Relevant" are discarded).
  43 + (only "Relevant" and "Partially Relevant" items — "Irrelevant" are discarded).
44 44 """
45 45  
46 46 ref_id: str
... ...
app/tools/search_tools.py
... ... @@ -2,7 +2,7 @@
2 2 Search Tools for Product Discovery
3 3  
4 4 - search_products is created via make_search_products_tool(session_id, registry).
5   -- After search API, an LLM labels each result as Highly Relevant / Partially Relevant / Not Relevant; we count and
  5 +- After search API, an LLM labels each result as Relevant / Partially Relevant / Irrelevant; we count and
6 6 store the curated list in the registry, return [SEARCH_REF:ref_id] + quality counts + top10 titles.
7 7 """
8 8  
... ... @@ -74,7 +74,7 @@ def _assess_search_quality(query: str, raw_products: list) -&gt; tuple[list[str], s
74 74 product_text = "\n".join(lines)
75 75  
76 76 prompt = f"""评估以下搜索结果与用户查询的匹配程度,完成两件事:
77   -1. 为每条结果打一个等级:Highly Relevant / Partially Relevant / Not Relevant。
  77 +1. 为每条结果打一个等级:Relevant / Partially Relevant / Irrelevant。
78 78 2. 写一段 quality_summary(1–2 句话):简要说明搜索结果主要包含哪些商品、是否基本满足搜索意图、整体匹配度如何。
79 79  
80 80 用户查询:{query}
... ... @@ -82,10 +82,10 @@ def _assess_search_quality(query: str, raw_products: list) -&gt; tuple[list[str], s
82 82 搜索结果(共 {n} 条):
83 83 {product_text}
84 84  
85   -等级说明:Highly Relevant=完全符合查询意图;Partially Relevant=基本相关(如品类等主需求匹配但部分属性不完全符合);Not Relevant=不相关。
  85 +等级说明:Relevant=完全符合查询意图;Partially Relevant=基本相关(如品类等主需求匹配但部分属性不完全符合);Irrelevant=不相关。
86 86  
87 87 请严格按以下 JSON 输出,仅输出 JSON,无其他内容:
88   -{{"labels": ["Highly Relevant", "Partially Relevant", "Not Relevant", ...], "quality_summary": "你的1-2句总结"}}
  88 +{{"labels": ["Relevant", "Partially Relevant", "Irrelevant", ...], "quality_summary": "你的1-2句总结"}}
89 89 labels 数组长度必须等于 {n}。"""
90 90  
91 91 try:
... ... @@ -93,7 +93,7 @@ labels 数组长度必须等于 {n}。&quot;&quot;&quot;
93 93 resp = client.chat.completions.create(
94 94 model=settings.openai_model,
95 95 messages=[{"role": "user", "content": prompt}],
96   - max_tokens=700,
  96 + max_tokens=1200,
97 97 temperature=0.1,
98 98 )
99 99 raw = resp.choices[0].message.content.strip()
... ... @@ -104,7 +104,7 @@ labels 数组长度必须等于 {n}。&quot;&quot;&quot;
104 104 raw = raw.strip()
105 105 data = json.loads(raw)
106 106 labels = data.get("labels", [])
107   - valid = {"Highly Relevant", "Partially Relevant", "Not Relevant"}
  107 + valid = {"Relevant", "Partially Relevant", "Irrelevant"}
108 108 labels = [l if l in valid else "Partially Relevant" for l in labels]
109 109 while len(labels) < n:
110 110 labels.append("Partially Relevant")
... ... @@ -133,14 +133,14 @@ def make_search_products_tool(
133 133  
134 134 @tool
135 135 def search_products(query: str, limit: int = 20) -> str:
136   - """搜索商品库并做质量评估:LLM 为每条结果打等级(Highly Relevant / Partially Relevant / Not Relevant),返回引用与 top10 标题。
  136 + """搜索商品库并做质量评估:LLM 为每条结果打等级(Relevant / Partially Relevant / Irrelevant),返回引用与 top10 标题。
137 137  
138 138 Args:
139 139 query: 自然语言商品描述
140 140 limit: 最多返回条数(1-20)
141 141  
142 142 Returns:
143   - 【搜索完成】+ 结果引用 [SEARCH_REF:ref_id] + 质量情况(评估条数、Highly/Partially Relevant 数)+ results list(top10 标题)
  143 + 【搜索完成】+ 结果引用 [SEARCH_REF:ref_id] + 质量情况(评估条数、Relevant/Partially Relevant 数)+ results list(top10 标题)
144 144 """
145 145 try:
146 146 logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}")
... ... @@ -176,13 +176,13 @@ def make_search_products_tool(
176 176 )
177 177  
178 178 labels, quality_summary = _assess_search_quality(query, raw_results)
179   - perfect_count = sum(1 for l in labels if l == "Highly Relevant")
  179 + perfect_count = sum(1 for l in labels if l == "Relevant")
180 180 partial_count = sum(1 for l in labels if l == "Partially Relevant")
181 181 irrelevant_count = len(labels) - perfect_count - partial_count
182 182  
183 183 products: list[ProductItem] = []
184 184 for raw, label in zip(raw_results, labels):
185   - if label not in ("Highly Relevant", "Partially Relevant"):
  185 + if label not in ("Relevant", "Partially Relevant"):
186 186 continue
187 187 products.append(
188 188 ProductItem(
... ... @@ -229,7 +229,7 @@ def make_search_products_tool(
229 229 return (
230 230 f"【搜索完成】query='{query}'\n"
231 231 f"结果引用:[SEARCH_REF:{ref_id}]\n"
232   - f"搜索结果质量情况:评估总条数{assessed_n}条,Highly Relevant {perfect_count} 条,Partially Relevant {partial_count} 条。\n"
  232 + f"搜索结果质量情况:评估总条数{assessed_n}条,Relevant {perfect_count} 条,Partially Relevant {partial_count} 条。\n"
233 233 f"results list:\n{results_list}"
234 234 )
235 235  
... ... @@ -251,7 +251,7 @@ def web_search(query: str) -&gt; str:
251 251  
252 252 触发场景:
253 253 - 需要**外部知识**:流行趋势、品牌、搭配文化、节日习俗等
254   - - 需要**实时/及时信息**:当季流行元素、某地未来的天气
  254 + - 需要**实时/及时信息**:所有与天气相关的问题、当季流行元素、某地近期或者未来的事件、所有依赖当前时间相关的信息
255 255 - 需要**宏观参考**:不同场合/国家的穿着建议、选购攻略
256 256  
257 257 Args:
... ... @@ -369,7 +369,7 @@ def analyze_image_style(image_path: str) -&gt; str:
369 369 ],
370 370 }
371 371 ],
372   - max_tokens=500,
  372 + max_tokens=800,
373 373 temperature=0.3,
374 374 )
375 375  
... ...