Commit 825828c426ebdfd199003fb17356e3c0485ded90

Authored by tangwang
1 parent 66442668

fix: search image_url normalization, logging, Streamlit width API

- Search image_url: parse results[].image_url, add _normalize_image_url() to
  convert protocol-less URLs (////host/path) to https://host/path; fix double
  slash (use https:// + url.lstrip("/") so normalized URL has single //).
- Logging: log full LLM request/response (LLM_REQUEST, LLM_RESPONSE), full
  tool call results (TOOL_CALL_RESULT); for search tool log SEARCH_RESULT
  summary and per-item SEARCH_RESULT_ITEM (image_url_raw) and
  SEARCH_RESULT_PRODUCT (image_url_normalized).
- Streamlit: replace deprecated use_container_width=True with width="stretch"
  for st.image and st.button.

Co-authored-by: Cursor <cursoragent@cursor.com>
... ... @@ -248,9 +248,9 @@ def initialize_session():
248 248 if "show_image_upload" not in st.session_state:
249 249 st.session_state.show_image_upload = False
250 250  
251   - # Debug panel toggle
  251 + # Debug panel toggle (default True so 显示调试过程 is checked by default)
252 252 if "show_debug" not in st.session_state:
253   - st.session_state.show_debug = False
  253 + st.session_state.show_debug = True
254 254  
255 255  
256 256 def save_uploaded_image(uploaded_file) -> Optional[str]:
... ... @@ -276,7 +276,7 @@ def save_uploaded_image(uploaded_file) -&gt; Optional[str]:
276 276  
277 277  
278 278 def _load_product_image(product: ProductItem) -> Optional[Image.Image]:
279   - """Try to load a product image: image_url from API → local data/images → None."""
  279 + """Try to load a product image: image_url from API (normalized when stored) → local data/images → None."""
280 280 if product.image_url:
281 281 try:
282 282 import requests
... ... @@ -306,7 +306,7 @@ def display_product_card_from_item(product: ProductItem) -&gt; None:
306 306 img = ImageOps.fit(img, target, method=Image.Resampling.LANCZOS)
307 307 except AttributeError:
308 308 img = ImageOps.fit(img, target, method=Image.LANCZOS)
309   - st.image(img, use_container_width=True)
  309 + st.image(img, width="stretch")
310 310 else:
311 311 st.markdown(
312 312 '<div style="height:120px;background:#f5f5f5;border-radius:6px;'
... ... @@ -530,7 +530,7 @@ def main():
530 530 with st.sidebar:
531 531 st.markdown("### ⚙️ Settings")
532 532  
533   - if st.button("🗑️ Clear Chat", use_container_width=True):
  533 + if st.button("🗑️ Clear Chat", width="stretch"):
534 534 if "shopping_agent" in st.session_state:
535 535 st.session_state.shopping_agent.clear_history()
536 536 # Clear search result registry for this session
... ... @@ -595,7 +595,7 @@ def main():
595 595  
596 596 with col1:
597 597 # Image upload toggle button
598   - if st.button("➕", help="Add image", use_container_width=True):
  598 + if st.button("➕", help="Add image", width="stretch"):
599 599 st.session_state.show_image_upload = (
600 600 not st.session_state.show_image_upload
601 601 )
... ...
app/agents/shopping_agent.py
... ... @@ -8,9 +8,10 @@ Architecture:
8 8 re-listing product details; the UI renders product cards from the registry
9 9 """
10 10  
  11 +import json
11 12 import logging
12 13 from pathlib import Path
13   -from typing import Optional, Sequence
  14 +from typing import Any, Optional, Sequence
14 15  
15 16 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
16 17 from langchain_openai import ChatOpenAI
... ... @@ -32,15 +33,14 @@ logger = logging.getLogger(__name__)
32 33 # 1. Guides multi-query search planning with explicit evaluate-and-decide loop
33 34 # 2. Forbids re-listing product details in the final response
34 35 # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism
35   -SYSTEM_PROMPT = """
36   -角色定义
  36 +SYSTEM_PROMPT = """角色定义
37 37 你是一名专业的服装电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。
38 38  
39 39 一些原则:
40 40 1. 你是一个真人导购,是一个贴心、专业的销售,保持灵活,根据上下文,基于常识灵活的切换策略,在合适的上下文询问合适的问题、给出有价值的方案和搜索结果的呈现。
41   -2. 兼顾推荐与信息收集:适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的帮助用户更清晰的呈现需求、提高商品发现的效率,形成“提供-反馈”的良性循环。
42   - 1. 在意图不明时,主动通过1-2个关键问题(如品类、场景、风格、预算)进行引导,并提供初步方向。
43   - 2. 在了解到初步意向后,要进行相关商品的搜索、进行搜索结果的呈现,同时思考该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。
  41 +2. 商品搜索结果推荐与信息收集:
  42 + 1. 根据上下文、用户诉求,灵活的切换侧重点,何时需要进行搜索、何时要引导客户完善需求,你需要站在用户角度进行思考。比如已经有较为清晰的意图,则以搜索、方案推荐为主,有必要的时候,思考该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。如果存在重大的需求方向缺口,主动通过1-2个关键问题进行引导,并提供初步方向。
  43 + 2. 适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的帮助用户更清晰的呈现需求、提高商品发现的效率,形成“提供-反馈”的良性循环。
44 44 3. 对于复杂需求时,要能基于上下文,将导购任务进行合理拆解。
45 45 3. 引导或者收集需求时,需要站在用户立场,比如询问用户期待的效果或感觉、使用的场合、偏好的风格等用户立场需,而不是询问具体的款式或参数,你需要将用户立场的需求理解/翻译/转化为具体的搜索计划,最后筛选产品、结合需求+结果特性组织推荐理由、呈现方案。
46 46 4. 如何使用search_products:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。每次调用 search_products 后,工具会返回以下内容,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。可以进行多轮搜索,但是要适时的总结和反馈信息避免用户等待过长时间:
... ... @@ -65,6 +65,11 @@ class AgentState(TypedDict):
65 65  
66 66 # ── Helper ─────────────────────────────────────────────────────────────────────
67 67  
  68 +# Max length for logging single content field (avoid huge logs)
  69 +_LOG_CONTENT_MAX = 8000
  70 +_LOG_TOOL_RESULT_MAX = 4000
  71 +
  72 +
68 73 def _extract_message_text(msg) -> str:
69 74 """Extract plain text from a LangChain message (handles str or content_blocks)."""
70 75 content = getattr(msg, "content", "")
... ... @@ -81,6 +86,23 @@ def _extract_message_text(msg) -&gt; str:
81 86 return str(content) if content else ""
82 87  
83 88  
  89 +def _message_for_log(msg: BaseMessage) -> dict:
  90 + """Serialize a message for structured logging (content truncated)."""
  91 + text = _extract_message_text(msg)
  92 + if len(text) > _LOG_CONTENT_MAX:
  93 + text = text[:_LOG_CONTENT_MAX] + f"... [truncated, total {len(text)} chars]"
  94 + out: dict[str, Any] = {
  95 + "type": getattr(msg, "type", "unknown"),
  96 + "content": text,
  97 + }
  98 + if hasattr(msg, "tool_calls") and msg.tool_calls:
  99 + out["tool_calls"] = [
  100 + {"name": tc.get("name"), "args": tc.get("args", {})}
  101 + for tc in msg.tool_calls
  102 + ]
  103 + return out
  104 +
  105 +
84 106 # ── Agent class ────────────────────────────────────────────────────────────────
85 107  
86 108 class ShoppingAgent:
... ... @@ -111,7 +133,18 @@ class ShoppingAgent:
111 133 messages = state["messages"]
112 134 if not any(isinstance(m, SystemMessage) for m in messages):
113 135 messages = [SystemMessage(content=SYSTEM_PROMPT)] + list(messages)
  136 + request_log = [_message_for_log(m) for m in messages]
  137 + req_json = json.dumps(request_log, ensure_ascii=False)
  138 + if len(req_json) > _LOG_CONTENT_MAX:
  139 + req_json = req_json[:_LOG_CONTENT_MAX] + f"... [truncated total {len(req_json)}]"
  140 + logger.info("[%s] LLM_REQUEST messages=%s", self.session_id, req_json)
114 141 response = self.llm_with_tools.invoke(messages)
  142 + response_log = _message_for_log(response)
  143 + logger.info(
  144 + "[%s] LLM_RESPONSE %s",
  145 + self.session_id,
  146 + json.dumps(response_log, ensure_ascii=False),
  147 + )
115 148 return {"messages": [response]}
116 149  
117 150 def should_continue(state: AgentState):
... ... @@ -202,6 +235,16 @@ class ShoppingAgent:
202 235 preview = text[:600] + ("…" if len(text) > 600 else "")
203 236 if i < len(unresolved):
204 237 unresolved[i]["result"] = preview
  238 + tc_name = unresolved[i].get("name", "")
  239 + tc_args = unresolved[i].get("args", {})
  240 + result_log = text if len(text) <= _LOG_TOOL_RESULT_MAX else text[:_LOG_TOOL_RESULT_MAX] + f"... [truncated total {len(text)}]"
  241 + logger.info(
  242 + "[%s] TOOL_CALL_RESULT name=%s args=%s result=%s",
  243 + self.session_id,
  244 + tc_name,
  245 + json.dumps(tc_args, ensure_ascii=False),
  246 + result_log,
  247 + )
205 248 step_results.append({"content": preview})
206 249  
207 250 debug_steps.append({"node": "tools", "results": step_results})
... ...
app/tools/search_tools.py
... ... @@ -38,6 +38,21 @@ logger = logging.getLogger(__name__)
38 38 _openai_client: Optional[OpenAI] = None
39 39  
40 40  
  41 +def _normalize_image_url(url: Optional[str]) -> Optional[str]:
  42 + """Normalize image_url from API (e.g. ////cnres.appracle.com/... → https://cnres.appracle.com/...)."""
  43 + if not url or not isinstance(url, str):
  44 + return None
  45 + url = url.strip()
  46 + if not url:
  47 + return None
  48 + if url.startswith("https://") or url.startswith("http://"):
  49 + return url
  50 + # // or ////host/path → https://host/path (exactly one "//" after scheme)
  51 + if url.startswith("/"):
  52 + return "https://" + url.lstrip("/")
  53 + return "https://" + url
  54 +
  55 +
41 56 def get_openai_client() -> OpenAI:
42 57 global _openai_client
43 58 if _openai_client is None:
... ... @@ -226,7 +241,7 @@ def make_search_products_tool(
226 241 raw.get("category_path") or raw.get("category_name")
227 242 ),
228 243 vendor=raw.get("vendor"),
229   - image_url=raw.get("image_url"),
  244 + image_url=_normalize_image_url(raw.get("image_url")),
230 245 relevance_score=raw.get("relevance_score"),
231 246 match_label=label,
232 247 tags=raw.get("tags") or [],
... ... @@ -249,6 +264,41 @@ def make_search_products_tool(
249 264 products=products,
250 265 )
251 266 registry.register(session_id, result)
  267 +
  268 + # ── Search result detailed log (ref_id, summary, per-item id + image_url raw/normalized) ──
  269 + logger.info(
  270 + "[%s] SEARCH_RESULT ref_id=%s query=%s total_api_hits=%s returned_count=%s "
  271 + "verdict=%s quality_summary=%s perfect=%s partial=%s irrelevant=%s",
  272 + session_id,
  273 + ref_id,
  274 + query,
  275 + total_hits,
  276 + len(raw_results),
  277 + verdict,
  278 + quality_summary,
  279 + perfect_count,
  280 + partial_count,
  281 + irrelevant_count,
  282 + )
  283 + for idx, raw in enumerate(raw_results):
  284 + raw_img = raw.get("image_url") or ""
  285 + logger.info(
  286 + "[%s] SEARCH_RESULT_ITEM raw idx=%s spu_id=%s title=%s image_url_raw=%s",
  287 + session_id,
  288 + idx,
  289 + raw.get("spu_id", ""),
  290 + (raw.get("title") or "")[:60],
  291 + raw_img,
  292 + )
  293 + for p in products:
  294 + logger.info(
  295 + "[%s] SEARCH_RESULT_PRODUCT spu_id=%s match_label=%s image_url_normalized=%s",
  296 + session_id,
  297 + p.spu_id,
  298 + p.match_label,
  299 + p.image_url or "",
  300 + )
  301 +
252 302 logger.info(
253 303 f"[{session_id}] Registered {ref_id}: verdict={verdict}, "
254 304 f"perfect={perfect_count}, partial={partial_count}, irrel={irrelevant_count}"
... ...