Commit 825828c426ebdfd199003fb17356e3c0485ded90
1 parent
66442668
fix: search image_url normalization, logging, Streamlit width API
- Search image_url: parse results[].image_url, add _normalize_image_url() to
convert protocol-less URLs (////host/path) to https://host/path; fix double
slash (use https:// + url.lstrip("/") so normalized URL has single //).
- Logging: log full LLM request/response (LLM_REQUEST, LLM_RESPONSE), full
tool call results (TOOL_CALL_RESULT); for search tool log SEARCH_RESULT
summary and per-item SEARCH_RESULT_ITEM (image_url_raw) and
SEARCH_RESULT_PRODUCT (image_url_normalized).
- Streamlit: replace deprecated use_container_width=True with width="stretch"
for st.image and st.button.
Co-authored-by: Cursor <cursoragent@cursor.com>
Showing
3 changed files
with
106 additions
and
13 deletions
Show diff stats
| ... | ... | @@ -248,9 +248,9 @@ def initialize_session(): |
| 248 | 248 | if "show_image_upload" not in st.session_state: |
| 249 | 249 | st.session_state.show_image_upload = False |
| 250 | 250 | |
| 251 | - # Debug panel toggle | |
| 251 | + # Debug panel toggle (default True so 显示调试过程 is checked by default) | |
| 252 | 252 | if "show_debug" not in st.session_state: |
| 253 | - st.session_state.show_debug = False | |
| 253 | + st.session_state.show_debug = True | |
| 254 | 254 | |
| 255 | 255 | |
| 256 | 256 | def save_uploaded_image(uploaded_file) -> Optional[str]: |
| ... | ... | @@ -276,7 +276,7 @@ def save_uploaded_image(uploaded_file) -> Optional[str]: |
| 276 | 276 | |
| 277 | 277 | |
| 278 | 278 | def _load_product_image(product: ProductItem) -> Optional[Image.Image]: |
| 279 | - """Try to load a product image: image_url from API → local data/images → None.""" | |
| 279 | + """Try to load a product image: image_url from API (normalized when stored) → local data/images → None.""" | |
| 280 | 280 | if product.image_url: |
| 281 | 281 | try: |
| 282 | 282 | import requests |
| ... | ... | @@ -306,7 +306,7 @@ def display_product_card_from_item(product: ProductItem) -> None: |
| 306 | 306 | img = ImageOps.fit(img, target, method=Image.Resampling.LANCZOS) |
| 307 | 307 | except AttributeError: |
| 308 | 308 | img = ImageOps.fit(img, target, method=Image.LANCZOS) |
| 309 | - st.image(img, use_container_width=True) | |
| 309 | + st.image(img, width="stretch") | |
| 310 | 310 | else: |
| 311 | 311 | st.markdown( |
| 312 | 312 | '<div style="height:120px;background:#f5f5f5;border-radius:6px;' |
| ... | ... | @@ -530,7 +530,7 @@ def main(): |
| 530 | 530 | with st.sidebar: |
| 531 | 531 | st.markdown("### ⚙️ Settings") |
| 532 | 532 | |
| 533 | - if st.button("🗑️ Clear Chat", use_container_width=True): | |
| 533 | + if st.button("🗑️ Clear Chat", width="stretch"): | |
| 534 | 534 | if "shopping_agent" in st.session_state: |
| 535 | 535 | st.session_state.shopping_agent.clear_history() |
| 536 | 536 | # Clear search result registry for this session |
| ... | ... | @@ -595,7 +595,7 @@ def main(): |
| 595 | 595 | |
| 596 | 596 | with col1: |
| 597 | 597 | # Image upload toggle button |
| 598 | - if st.button("➕", help="Add image", use_container_width=True): | |
| 598 | + if st.button("➕", help="Add image", width="stretch"): | |
| 599 | 599 | st.session_state.show_image_upload = ( |
| 600 | 600 | not st.session_state.show_image_upload |
| 601 | 601 | ) | ... | ... |
app/agents/shopping_agent.py
| ... | ... | @@ -8,9 +8,10 @@ Architecture: |
| 8 | 8 | re-listing product details; the UI renders product cards from the registry |
| 9 | 9 | """ |
| 10 | 10 | |
| 11 | +import json | |
| 11 | 12 | import logging |
| 12 | 13 | from pathlib import Path |
| 13 | -from typing import Optional, Sequence | |
| 14 | +from typing import Any, Optional, Sequence | |
| 14 | 15 | |
| 15 | 16 | from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage |
| 16 | 17 | from langchain_openai import ChatOpenAI |
| ... | ... | @@ -32,15 +33,14 @@ logger = logging.getLogger(__name__) |
| 32 | 33 | # 1. Guides multi-query search planning with explicit evaluate-and-decide loop |
| 33 | 34 | # 2. Forbids re-listing product details in the final response |
| 34 | 35 | # 3. Mandates [SEARCH_REF:xxx] inline citation as the only product presentation mechanism |
| 35 | -SYSTEM_PROMPT = """ | |
| 36 | -角色定义 | |
| 36 | +SYSTEM_PROMPT = """角色定义 | |
| 37 | 37 | 你是一名专业的服装电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。 |
| 38 | 38 | |
| 39 | 39 | 一些原则: |
| 40 | 40 | 1. 你是一个真人导购,是一个贴心、专业的销售,保持灵活,根据上下文,基于常识灵活的切换策略,在合适的上下文询问合适的问题、给出有价值的方案和搜索结果的呈现。 |
| 41 | -2. 兼顾推荐与信息收集:适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的帮助用户更清晰的呈现需求、提高商品发现的效率,形成“提供-反馈”的良性循环。 | |
| 42 | - 1. 在意图不明时,主动通过1-2个关键问题(如品类、场景、风格、预算)进行引导,并提供初步方向。 | |
| 43 | - 2. 在了解到初步意向后,要进行相关商品的搜索、进行搜索结果的呈现,同时思考该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。 | |
| 41 | +2. 商品搜索结果推荐与信息收集: | |
| 42 | + 1. 根据上下文、用户诉求,灵活的切换侧重点,何时需要进行搜索、何时要引导客户完善需求,你需要站在用户角度进行思考。比如已经有较为清晰的意图,则以搜索、方案推荐为主,有必要的时候,思考该方向下重要的决策因素,进行提议和问题收集,让用户既得到相关信息、又得到下一步的方向引导、同时也有机会修正或者细化诉求。如果存在重大的需求方向缺口,主动通过1-2个关键问题进行引导,并提供初步方向。 | |
| 43 | + 2. 适时的提供有价值的信息,如商品推荐、穿搭建议、趋势信息,在推荐方向上有需求缺口、需要明确的重要信息时,要适时的做“信息收集”,引导式的帮助用户更清晰的呈现需求、提高商品发现的效率,形成“提供-反馈”的良性循环。 | |
| 44 | 44 | 3. 对于复杂需求时,要能基于上下文,将导购任务进行合理拆解。 |
| 45 | 45 | 3. 引导或者收集需求时,需要站在用户立场,比如询问用户期待的效果或感觉、使用的场合、偏好的风格等用户立场需,而不是询问具体的款式或参数,你需要将用户立场的需求理解/翻译/转化为具体的搜索计划,最后筛选产品、结合需求+结果特性组织推荐理由、呈现方案。 |
| 46 | 46 | 4. 如何使用search_products:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。每次调用 search_products 后,工具会返回以下内容,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。可以进行多轮搜索,但是要适时的总结和反馈信息避免用户等待过长时间: |
| ... | ... | @@ -65,6 +65,11 @@ class AgentState(TypedDict): |
| 65 | 65 | |
| 66 | 66 | # ── Helper ───────────────────────────────────────────────────────────────────── |
| 67 | 67 | |
| 68 | +# Max length for logging single content field (avoid huge logs) | |
| 69 | +_LOG_CONTENT_MAX = 8000 | |
| 70 | +_LOG_TOOL_RESULT_MAX = 4000 | |
| 71 | + | |
| 72 | + | |
| 68 | 73 | def _extract_message_text(msg) -> str: |
| 69 | 74 | """Extract plain text from a LangChain message (handles str or content_blocks).""" |
| 70 | 75 | content = getattr(msg, "content", "") |
| ... | ... | @@ -81,6 +86,23 @@ def _extract_message_text(msg) -> str: |
| 81 | 86 | return str(content) if content else "" |
| 82 | 87 | |
| 83 | 88 | |
| 89 | +def _message_for_log(msg: BaseMessage) -> dict: | |
| 90 | + """Serialize a message for structured logging (content truncated).""" | |
| 91 | + text = _extract_message_text(msg) | |
| 92 | + if len(text) > _LOG_CONTENT_MAX: | |
| 93 | + text = text[:_LOG_CONTENT_MAX] + f"... [truncated, total {len(text)} chars]" | |
| 94 | + out: dict[str, Any] = { | |
| 95 | + "type": getattr(msg, "type", "unknown"), | |
| 96 | + "content": text, | |
| 97 | + } | |
| 98 | + if hasattr(msg, "tool_calls") and msg.tool_calls: | |
| 99 | + out["tool_calls"] = [ | |
| 100 | + {"name": tc.get("name"), "args": tc.get("args", {})} | |
| 101 | + for tc in msg.tool_calls | |
| 102 | + ] | |
| 103 | + return out | |
| 104 | + | |
| 105 | + | |
| 84 | 106 | # ── Agent class ──────────────────────────────────────────────────────────────── |
| 85 | 107 | |
| 86 | 108 | class ShoppingAgent: |
| ... | ... | @@ -111,7 +133,18 @@ class ShoppingAgent: |
| 111 | 133 | messages = state["messages"] |
| 112 | 134 | if not any(isinstance(m, SystemMessage) for m in messages): |
| 113 | 135 | messages = [SystemMessage(content=SYSTEM_PROMPT)] + list(messages) |
| 136 | + request_log = [_message_for_log(m) for m in messages] | |
| 137 | + req_json = json.dumps(request_log, ensure_ascii=False) | |
| 138 | + if len(req_json) > _LOG_CONTENT_MAX: | |
| 139 | + req_json = req_json[:_LOG_CONTENT_MAX] + f"... [truncated total {len(req_json)}]" | |
| 140 | + logger.info("[%s] LLM_REQUEST messages=%s", self.session_id, req_json) | |
| 114 | 141 | response = self.llm_with_tools.invoke(messages) |
| 142 | + response_log = _message_for_log(response) | |
| 143 | + logger.info( | |
| 144 | + "[%s] LLM_RESPONSE %s", | |
| 145 | + self.session_id, | |
| 146 | + json.dumps(response_log, ensure_ascii=False), | |
| 147 | + ) | |
| 115 | 148 | return {"messages": [response]} |
| 116 | 149 | |
| 117 | 150 | def should_continue(state: AgentState): |
| ... | ... | @@ -202,6 +235,16 @@ class ShoppingAgent: |
| 202 | 235 | preview = text[:600] + ("…" if len(text) > 600 else "") |
| 203 | 236 | if i < len(unresolved): |
| 204 | 237 | unresolved[i]["result"] = preview |
| 238 | + tc_name = unresolved[i].get("name", "") | |
| 239 | + tc_args = unresolved[i].get("args", {}) | |
| 240 | + result_log = text if len(text) <= _LOG_TOOL_RESULT_MAX else text[:_LOG_TOOL_RESULT_MAX] + f"... [truncated total {len(text)}]" | |
| 241 | + logger.info( | |
| 242 | + "[%s] TOOL_CALL_RESULT name=%s args=%s result=%s", | |
| 243 | + self.session_id, | |
| 244 | + tc_name, | |
| 245 | + json.dumps(tc_args, ensure_ascii=False), | |
| 246 | + result_log, | |
| 247 | + ) | |
| 205 | 248 | step_results.append({"content": preview}) |
| 206 | 249 | |
| 207 | 250 | debug_steps.append({"node": "tools", "results": step_results}) | ... | ... |
app/tools/search_tools.py
| ... | ... | @@ -38,6 +38,21 @@ logger = logging.getLogger(__name__) |
| 38 | 38 | _openai_client: Optional[OpenAI] = None |
| 39 | 39 | |
| 40 | 40 | |
| 41 | +def _normalize_image_url(url: Optional[str]) -> Optional[str]: | |
| 42 | + """Normalize image_url from API (e.g. ////cnres.appracle.com/... → https://cnres.appracle.com/...).""" | |
| 43 | + if not url or not isinstance(url, str): | |
| 44 | + return None | |
| 45 | + url = url.strip() | |
| 46 | + if not url: | |
| 47 | + return None | |
| 48 | + if url.startswith("https://") or url.startswith("http://"): | |
| 49 | + return url | |
| 50 | + # // or ////host/path → https://host/path (exactly one "//" after scheme) | |
| 51 | + if url.startswith("/"): | |
| 52 | + return "https://" + url.lstrip("/") | |
| 53 | + return "https://" + url | |
| 54 | + | |
| 55 | + | |
| 41 | 56 | def get_openai_client() -> OpenAI: |
| 42 | 57 | global _openai_client |
| 43 | 58 | if _openai_client is None: |
| ... | ... | @@ -226,7 +241,7 @@ def make_search_products_tool( |
| 226 | 241 | raw.get("category_path") or raw.get("category_name") |
| 227 | 242 | ), |
| 228 | 243 | vendor=raw.get("vendor"), |
| 229 | - image_url=raw.get("image_url"), | |
| 244 | + image_url=_normalize_image_url(raw.get("image_url")), | |
| 230 | 245 | relevance_score=raw.get("relevance_score"), |
| 231 | 246 | match_label=label, |
| 232 | 247 | tags=raw.get("tags") or [], |
| ... | ... | @@ -249,6 +264,41 @@ def make_search_products_tool( |
| 249 | 264 | products=products, |
| 250 | 265 | ) |
| 251 | 266 | registry.register(session_id, result) |
| 267 | + | |
| 268 | + # ── Search result detailed log (ref_id, summary, per-item id + image_url raw/normalized) ── | |
| 269 | + logger.info( | |
| 270 | + "[%s] SEARCH_RESULT ref_id=%s query=%s total_api_hits=%s returned_count=%s " | |
| 271 | + "verdict=%s quality_summary=%s perfect=%s partial=%s irrelevant=%s", | |
| 272 | + session_id, | |
| 273 | + ref_id, | |
| 274 | + query, | |
| 275 | + total_hits, | |
| 276 | + len(raw_results), | |
| 277 | + verdict, | |
| 278 | + quality_summary, | |
| 279 | + perfect_count, | |
| 280 | + partial_count, | |
| 281 | + irrelevant_count, | |
| 282 | + ) | |
| 283 | + for idx, raw in enumerate(raw_results): | |
| 284 | + raw_img = raw.get("image_url") or "" | |
| 285 | + logger.info( | |
| 286 | + "[%s] SEARCH_RESULT_ITEM raw idx=%s spu_id=%s title=%s image_url_raw=%s", | |
| 287 | + session_id, | |
| 288 | + idx, | |
| 289 | + raw.get("spu_id", ""), | |
| 290 | + (raw.get("title") or "")[:60], | |
| 291 | + raw_img, | |
| 292 | + ) | |
| 293 | + for p in products: | |
| 294 | + logger.info( | |
| 295 | + "[%s] SEARCH_RESULT_PRODUCT spu_id=%s match_label=%s image_url_normalized=%s", | |
| 296 | + session_id, | |
| 297 | + p.spu_id, | |
| 298 | + p.match_label, | |
| 299 | + p.image_url or "", | |
| 300 | + ) | |
| 301 | + | |
| 252 | 302 | logger.info( |
| 253 | 303 | f"[{session_id}] Registered {ref_id}: verdict={verdict}, " |
| 254 | 304 | f"perfect={perfect_count}, partial={partial_count}, irrel={irrelevant_count}" | ... | ... |