From 897b5ca9eb04b92d884a8c02861513580673c875 Mon Sep 17 00:00:00 2001 From: tangwang Date: Sun, 22 Feb 2026 09:35:26 +0800 Subject: [PATCH] perf: 前端性能优化 + 搜索统一实现 + ref_id 与命名统一 --- README_prompts.md | 4 ++-- app.py | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------ app/agents/shopping_agent.py | 18 +++++++++--------- app/search_registry.py | 21 ++++++++++++--------- app/tools/search_tools.py | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------- 5 files changed, 299 insertions(+), 207 deletions(-) diff --git a/README_prompts.md b/README_prompts.md index e42516e..38e2a21 100644 --- a/README_prompts.md +++ b/README_prompts.md @@ -43,7 +43,7 @@ graphRAG在商品搜索中如何使用?我想将他用于,对商品的模糊 -请帮我补充一个功能:前端,对于渲染的每一个商品([SEARCH_REF:xxx]渲染的搜索结果),鼠标hover上去的时候,悬浮可供点击的两个东西: +请帮我补充一个功能:前端,对于渲染的每一个商品([SEARCH_RESULTS_REF:xxx]渲染的搜索结果),鼠标hover上去的时候,悬浮可供点击的两个东西: 1. Similar products: 点击后,从右侧拉出、覆盖大部分页面、保留部分背景(我不知道这种技术叫什么,实现类似效果即可)展现的内容是,以该商品未标题,发起商品搜索,页面展示搜索结果。 2. 一个勾选框,点击后为勾选状态。可以勾选多个商品。 下方也悬浮两个菜单,一个ask,一个compare。 @@ -66,7 +66,7 @@ graphRAG在商品搜索中如何使用?我想将他用于,对商品的模糊 -请帮我补充一个功能:前端,对于渲染的每一个商品([SEARCH_REF:xxx]渲染的搜索结果),鼠标hover上去的时候,悬浮可供点击的两个东西: +请帮我补充一个功能:前端,对于渲染的每一个商品([SEARCH_RESULTS_REF:xxx]渲染的搜索结果),鼠标hover上去的时候,悬浮可供点击的两个东西: 1. Similar products: 点击后,从右侧拉出、覆盖大部分页面、保留部分背景(我不知道这种技术叫什么,实现类似效果即可)展现的内容是,以该商品未标题,发起商品搜索,页面展示搜索结果。 2. 一个勾选框,点击后为勾选状态。可以勾选多个商品。 下方也悬浮两个菜单,一个ask,一个compare。 diff --git a/app.py b/app.py index cc3c09a..95f4c8c 100644 --- a/app.py +++ b/app.py @@ -7,6 +7,7 @@ import html import logging import re import uuid +from collections import OrderedDict from pathlib import Path from typing import Any, Optional @@ -17,9 +18,9 @@ from PIL import Image, ImageOps from app.agents.shopping_agent import ShoppingAgent from app.search_registry import ProductItem, SearchResult, global_registry -# Matches [SEARCH_REF:sr_xxxxxxxx] tokens embedded in AI responses. +# Matches [SEARCH_RESULTS_REF:sr_xxxxxxxx] tokens embedded in AI responses. # Case-insensitive, optional spaces around the id. -SEARCH_REF_PATTERN = re.compile(r"\[SEARCH_REF:\s*([a-zA-Z0-9_]+)\s*\]", re.IGNORECASE) +SEARCH_RESULTS_REF_PATTERN = re.compile(r"\[SEARCH_RESULTS_REF:\s*([a-zA-Z0-9_]+)\s*\]", re.IGNORECASE) # Configure logging logging.basicConfig( @@ -28,6 +29,10 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) +# In-memory image cache (url or "local:path" -> PIL Image), max 100 entries +_IMAGE_CACHE: OrderedDict = OrderedDict() +_IMAGE_CACHE_MAX = 100 + # Page config st.set_page_config( page_title="ShopAgent", @@ -455,6 +460,7 @@ def _build_reference_prefix(products: list[dict]) -> str: return "\n".join(lines) +@st.fragment def render_referenced_products_in_input() -> None: """Render referenced products above chat input, each with remove button.""" refs = st.session_state.get("referenced_products", []) @@ -485,46 +491,45 @@ def render_referenced_products_in_input() -> None: def _load_product_image(product: ProductItem) -> Optional[Image.Image]: - """Try to load a product image: image_url from API (normalized when stored) → local data/images → None.""" + """Load product image with cache: image_url or local data/images. Cache key = url or 'local:path'.""" + cache_key: Optional[str] = None if product.image_url: + cache_key = product.image_url + if cache_key in _IMAGE_CACHE: + _IMAGE_CACHE.move_to_end(cache_key) + return _IMAGE_CACHE[cache_key] try: + import io import requests resp = requests.get(product.image_url, timeout=10) if resp.status_code == 200: - import io - return Image.open(io.BytesIO(resp.content)) + img = Image.open(io.BytesIO(resp.content)) + _IMAGE_CACHE[cache_key] = img + _IMAGE_CACHE.move_to_end(cache_key) + if len(_IMAGE_CACHE) > _IMAGE_CACHE_MAX: + _IMAGE_CACHE.popitem(last=False) + return img except Exception as e: logger.debug(f"Remote image fetch failed for {product.spu_id}: {e}") local = Path(f"data/images/{product.spu_id}.jpg") if local.exists(): + cache_key = f"local:{local}" + if cache_key in _IMAGE_CACHE: + _IMAGE_CACHE.move_to_end(cache_key) + return _IMAGE_CACHE[cache_key] try: - return Image.open(local) + img = Image.open(local) + _IMAGE_CACHE[cache_key] = img + _IMAGE_CACHE.move_to_end(cache_key) + if len(_IMAGE_CACHE) > _IMAGE_CACHE_MAX: + _IMAGE_CACHE.popitem(last=False) + return img except Exception as e: logger.debug(f"Local image load failed {local}: {e}") return None -def _run_similar_search(query: str) -> Optional[str]: - """Run product search with query, register result, return new ref_id or None.""" - if not query or not query.strip(): - return None - from app.tools.search_tools import make_search_products_tool - - session_id = st.session_state.get("session_id", "") - if not session_id: - return None - tool = make_search_products_tool(session_id, global_registry) - try: - out = tool.invoke({"query": query.strip()}) - match = SEARCH_REF_PATTERN.search(out) - if match: - return match.group(1).strip() - except Exception as e: - logger.warning(f"Similar search failed: {e}") - return None - - def display_product_card_from_item( product: ProductItem, ref_id: str, @@ -584,19 +589,11 @@ def display_product_card_from_item( if similar_clicked: search_query = (product.title or "").strip() or "商品" - new_ref = _run_similar_search(search_query) - if new_ref: - st.session_state.side_panel = { - "visible": True, - "mode": "similar", - "payload": {"ref_id": new_ref, "query": search_query}, - } - else: - st.session_state.side_panel = { - "visible": True, - "mode": "similar", - "payload": {"ref_id": None, "query": search_query, "error": True}, - } + st.session_state.side_panel = { + "visible": True, + "mode": "similar", + "payload": {"query": search_query, "loading": True}, + } st.rerun() if is_checked: @@ -608,7 +605,7 @@ def display_product_card_from_item( def render_search_result_block(result: SearchResult, widget_prefix: str = "") -> None: """ - Render a full search result block in place of a [SEARCH_REF:ref_id] token. + Render a full search result block in place of a [SEARCH_RESULTS_REF:ref_id] token. widget_prefix: unique per (message, ref block) so Streamlit widget keys stay unique. """ @@ -649,12 +646,12 @@ def render_message_with_refs( msg_index: int = 0, ) -> None: """ - Render an assistant message that may contain [SEARCH_REF:ref_id] tokens. + Render an assistant message that may contain [SEARCH_RESULTS_REF:ref_id] tokens. msg_index: message index in chat, used to keep widget keys unique across messages. """ fallback_refs = fallback_refs or {} - parts = SEARCH_REF_PATTERN.split(content) + parts = SEARCH_RESULTS_REF_PATTERN.split(content) for i, segment in enumerate(parts): if i % 2 == 0: @@ -730,7 +727,7 @@ def display_message(message: dict, msg_index: int = 0): st.markdown("---") - # Render message: expand [SEARCH_REF:ref_id] tokens into product card blocks + # Render message: expand [SEARCH_RESULTS_REF:ref_id] tokens into product card blocks session_id = st.session_state.get("session_id", "") render_message_with_refs( content, session_id, fallback_refs=message.get("search_refs"), msg_index=msg_index @@ -739,6 +736,7 @@ def display_message(message: dict, msg_index: int = 0): st.markdown("", unsafe_allow_html=True) +@st.fragment def render_bottom_actions_bar() -> None: """Show Ask and Compare when there are selected products. Disabled when none selected.""" selected = st.session_state.selected_products @@ -784,53 +782,72 @@ def render_side_drawer() -> None: body_html = "" if mode == "similar": - ref_id = payload.get("ref_id") - query = html.escape(payload.get("query", "")) - if payload.get("error") or not ref_id: - body_html = '

搜索失败或暂无结果。

' + query = html.escape((payload.get("query") or "")) + if payload.get("loading"): + body_html = '

加载中…

' + elif payload.get("products") is not None: + to_show = payload["products"][:12] + cards = [] + for product in to_show: + p_title = html.escape((product.title or "未知商品")[:80]) + price = ( + f"¥{product.price:.2f}" + if product.price is not None + else "价格待更新" + ) + image_html = ( + f'{p_title}' + if product.image_url + else '
🛍️
' + ) + cards.append( + '
' + f"{image_html}" + '
' + f'
{p_title}
' + f'
{price}
' + "
" + ) + cards_html = "".join(cards) if cards else '

(未找到可展示的商品)

' + body_html = ( + f'
' + f'基于「{query}」的搜索结果:
' + '
' + cards_html + "
" + ) else: - result = global_registry.get(session_id, ref_id) - if not result: - body_html = f'

[搜索结果 {html.escape(ref_id)} 不可用]

' - else: - perfect = [p for p in result.products if p.match_label == "Relevant"] - partial = [p for p in result.products if p.match_label == "Partially Relevant"] - to_show = (perfect + partial)[:12] if perfect else partial[:12] - cards = [] - for product in to_show: - p_title = html.escape((product.title or "未知商品")[:80]) - p_label = html.escape(product.match_label or "Partially Relevant") - price = ( - f"¥{product.price:.2f}" - if product.price is not None - else "价格待更新" - ) - image_html = ( - f'{p_title}' - if product.image_url - else '
🛍️
' + # Legacy: ref_id from registry (e.g. from chat) + ref_id = payload.get("ref_id") + if ref_id: + result = global_registry.get(session_id, ref_id) + if result: + to_show = (result.products or [])[:12] + cards = [] + for product in to_show: + p_title = html.escape((product.title or "未知商品")[:80]) + price = f"¥{product.price:.2f}" if product.price is not None else "价格待更新" + image_html = ( + f'{p_title}' + if product.image_url + else '
🛍️
' + ) + cards.append( + '
' + f"{image_html}" + f'
{p_title}
' + f'
{price}
' + ) + body_html = ( + f'
基于「{query}」的搜索结果:
' + '
' + "".join(cards) + "
" ) - cards.append( - '
' - f"{image_html}" - '
' - f'
{p_title}
' - f'
{price}
' - f'
{p_label}
' - "
" - ) - - cards_html = "".join(cards) if cards else '

(未找到可展示的商品)

' - body_html = ( - f'
' - f'基于「{query}」的搜索结果:
' - '
' - f"{cards_html}" - "
" - ) + else: + body_html = f'

[搜索结果 {html.escape(ref_id)} 不可用]

' + else: + body_html = '

搜索失败或暂无结果。

' else: items = payload if isinstance(payload, list) else [] if items: @@ -953,6 +970,20 @@ def main(): st.session_state.side_panel = {"visible": False, "mode": None, "payload": None} st.query_params.clear() + # "Similar" panel: if loading, run API-only search and rerun + panel = st.session_state.side_panel + if panel.get("visible") and panel.get("mode") == "similar": + payload = panel.get("payload") or {} + if payload.get("loading") and payload.get("query"): + from app.tools.search_tools import search_products_api_only + products = search_products_api_only(payload["query"], limit=12) + st.session_state.side_panel["payload"] = { + "query": payload["query"], + "products": products, + "loading": False, + } + st.rerun() + # Drawer rendered early so fixed positioning works from top of DOM render_side_drawer() @@ -968,13 +999,12 @@ def main(): ) # Sidebar (collapsed by default, but accessible) - with st.sidebar: + @st.fragment + def _sidebar_fragment(): st.markdown("### ⚙️ Settings") - if st.button("🗑️ Clear Chat", width="stretch"): if "shopping_agent" in st.session_state: st.session_state.shopping_agent.clear_history() - # Clear search result registry for this session session_id = st.session_state.get("session_id", "") if session_id: global_registry.clear_session(session_id) @@ -984,8 +1014,6 @@ def main(): st.session_state.referenced_products = [] st.session_state.side_panel = {"visible": False, "mode": None, "payload": None} st.rerun() - - # Debug toggle st.markdown("---") st.checkbox( "显示调试过程 (debug)", @@ -993,17 +1021,25 @@ def main(): value=True, help="展开后可查看中间思考过程及工具调用详情", ) - st.markdown("---") st.caption(f"Session: `{st.session_state.session_id[:8]}...`") + with st.sidebar: + _sidebar_fragment() + + MAX_MESSAGES = 50 messages_container = st.container() with messages_container: if not st.session_state.messages: display_welcome() else: - for msg_idx, message in enumerate(st.session_state.messages): - display_message(message, msg_index=msg_idx) + messages = st.session_state.messages + start_idx = max(0, len(messages) - MAX_MESSAGES) + to_show = messages[start_idx:] + if len(messages) > MAX_MESSAGES: + st.caption(f"(仅显示最近 {MAX_MESSAGES} 条,共 {len(messages)} 条消息)") + for i, message in enumerate(to_show): + display_message(message, msg_index=start_idx + i) render_bottom_actions_bar() # Fixed input area at bottom (using container to simulate fixed position) diff --git a/app/agents/shopping_agent.py b/app/agents/shopping_agent.py index 2adec0a..3eba8e4 100644 --- a/app/agents/shopping_agent.py +++ b/app/agents/shopping_agent.py @@ -4,7 +4,7 @@ Conversational Shopping Agent with LangGraph Architecture: - ReAct-style agent: plan → search → evaluate → re-plan or respond - search_products is session-bound, writing curated results to SearchResultRegistry -- Final AI message references results via [SEARCH_REF:ref_id] tokens instead of +- Final AI message references results via [SEARCH_RESULTS_REF:ref_id] tokens instead of re-listing product details; the UI renders product cards from the registry """ @@ -34,7 +34,7 @@ logger = logging.getLogger(__name__) # Key design decisions: # 1. Guides multi-query search planning with explicit evaluate-and-decide loop # 2. Forbids re-listing product details in the final response -# 3. Mandates [SEARCH_REF:ref_id] inline citation as the only product presentation mechanism +# 3. Mandates [SEARCH_RESULTS_REF:ref_id] inline citation as the only product presentation mechanism SYSTEM_PROMPT = f"""角色定义 你是我们店铺的一名专业的电商导购,是一个善于倾听、主动引导、懂得搭配的“时尚顾问”,通过有温度的对话,给用户提供有价值的信息,包括需求引导、方案推荐、搜索结果推荐,最终促成满意的购物决策或转化行为。 作为我们店铺的一名专业的销售,除了本店铺的商品的推荐,你可以给用户提供有帮助的信息,但是不要虚构商品、提供本商店搜索结果以外的商品。 @@ -49,12 +49,12 @@ SYSTEM_PROMPT = f"""角色定义 4. 选项驱动式澄清:推荐几个清晰的方向,呈现方案或商品搜索结果,再做澄清 5. 单轮对话最好只提一个问题,最多两个,禁止多问题堆叠。 6. 站在用户立场思考:比如询问用户期待的效果或感觉、使用的场合、想解决的问题,而不是询问具体的款式、参数,你需要将用户表达的需求翻译为具体可检索的商品特征(版型、材质、设计元素、风格标签等),并据此筛选商品、组织推荐逻辑。 -2. 如何使用make_search_products_tool: +2. 如何使用search_products: 1. 可以生成多个query进行搜索:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。 2. 可以根据搜索结果调整搜索策略:每次调用 search_products 后,工具会返回搜索结果的相关性的判断、以及搜索结果的topN的title,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。结果太差的原因有可能是你生成的query不合理、请根据你看到的商品名称的构成组织搜索关键词。 -3. 在最终回复中使用 [SEARCH_REF:ref_id] 内联引用搜索结果: - 1. 搜索工具会返回一个结果引用标识[SEARCH_REF:ref_id],撰写最终答复的时候请直接引用 [SEARCH_REF:ref_id] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。 - 2. 因为系统会自动将[SEARCH_REF:ref_id]渲染为搜索结果,所以[SEARCH_REF:ref_id]必须独占一行,且只在需要渲染该query完整的搜索结果时才进行引用,同一个结果不要重复引用。 +3. 在最终回复中使用 [SEARCH_RESULTS_REF:ref_id] 内联引用搜索结果: + 1. 搜索工具会返回一个结果引用标识[SEARCH_RESULTS_REF:ref_id],撰写最终答复的时候请直接引用 [SEARCH_RESULTS_REF:ref_id] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。 + 2. 因为系统会自动将[SEARCH_RESULTS_REF:ref_id]渲染为搜索结果,所以[SEARCH_RESULTS_REF:ref_id]必须独占一行,且只在需要渲染该query完整的搜索结果时才进行引用,同一个结果不要重复引用。 4. 今天是{datetime.now().strftime("%Y-%m-%d")},所有与当前时间(比如天气、最新或即将发生的事件)相关的问题,都要使用web_search工具)。 """ @@ -72,8 +72,8 @@ SYSTEM_PROMPT___2 = """ 角色定义 2. 如何使用make_search_products_tool: 1. 可以生成多个query进行搜索:在需要搜索商品的时候,可以将需求分解为 2-4 个搜索查询,每个 query 聚焦一个明确的商品子类或搜索角度。 2. 可以根据搜索结果调整搜索策略:每次调用 search_products 后,工具会返回搜索结果的相关性的判断、以及搜索结果的topN的title,你需要决策是否要调整搜索策略,比如结果质量太差,可能需要调整搜索词、或者加大试探的query数量(不要超过3-5个)。 - 3. 使用 [SEARCH_REF:ref_id] 内联引用搜索结果:搜索工具会返回一个结果引用标识[SEARCH_REF:ref_id],撰写最终答复的时候可以直接引用将 [SEARCH_REF:ref_id] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。 - 4. 因为系统会自动将[SEARCH_REF:ref_id]渲染为搜索结果,所以只在需要渲染该query完整的搜索结果时才进行引用,同一个结果不要重复引用。 + 3. 使用 [SEARCH_RESULTS_REF:ref_id] 内联引用搜索结果:搜索工具会返回一个结果引用标识[SEARCH_RESULTS_REF:ref_id],撰写最终答复的时候可以直接引用将 [SEARCH_RESULTS_REF:ref_id] ,系统会自动在该位置渲染对应的商品卡片列表,无需复述搜索结果。 + 4. 因为系统会自动将[SEARCH_RESULTS_REF:ref_id]渲染为搜索结果,所以只在需要渲染该query完整的搜索结果时才进行引用,同一个结果不要重复引用。 """ @@ -226,7 +226,7 @@ class ShoppingAgent: Returns: dict with keys: - response – final AI message text (may contain [SEARCH_REF:ref_id] tokens) + response – final AI message text (may contain [SEARCH_RESULTS_REF:ref_id] tokens) tool_calls – list of {name, args, result_preview} debug_steps – detailed per-node step log search_refs – dict[ref_id → SearchResult] for all searches this turn diff --git a/app/search_registry.py b/app/search_registry.py index 1def466..4ed5623 100644 --- a/app/search_registry.py +++ b/app/search_registry.py @@ -2,20 +2,16 @@ Search Result Registry Stores structured search results keyed by session and ref_id. -Each [SEARCH_REF:ref_id] in an AI response maps to a SearchResult stored here, +Each [SEARCH_RESULTS_REF:ref_id] in an AI response maps to a SearchResult stored here, allowing the UI to render product cards without the LLM ever re-listing them. + +ref_id uses session-scoped auto-increment (sr_1, sr_2, ...). """ -import uuid from dataclasses import dataclass, field from typing import Optional -def new_ref_id() -> str: - """Generate a short unique search reference ID, e.g. 'sr_3f9a1b2c'.""" - return "sr_" + uuid.uuid4().hex[:8] - - @dataclass class ProductItem: """A single product extracted from a search result, enriched with a match label.""" @@ -38,7 +34,7 @@ class SearchResult: """ A complete, self-contained search result block. - Identified by ref_id (e.g. 'sr_3f9a1b2c'). + Identified by ref_id (e.g. 'sr_1', 'sr_2' — session-scoped auto-increment). Stores the query, LLM quality assessment, and the curated product list (only "Relevant" and "Partially Relevant" items — "Irrelevant" are discarded). """ @@ -68,11 +64,17 @@ class SearchResultRegistry: Lives as a global singleton in the process; Streamlit reruns preserve it as long as the worker process is alive. Session isolation is maintained - by keying on session_id. + by keying on session_id. ref_id is per-session auto-increment (sr_1, sr_2, ...). """ def __init__(self) -> None: self._store: dict[str, dict[str, SearchResult]] = {} + self._session_counter: dict[str, int] = {} + + def next_ref_id(self, session_id: str) -> str: + """Return next ref_id for this session (sr_1, sr_2, ...).""" + self._session_counter[session_id] = self._session_counter.get(session_id, 0) + 1 + return f"sr_{self._session_counter[session_id]}" def register(self, session_id: str, result: SearchResult) -> str: """Store a SearchResult and return its ref_id.""" @@ -92,6 +94,7 @@ class SearchResultRegistry: def clear_session(self, session_id: str) -> None: """Remove all search results for a session (e.g. on chat clear).""" self._store.pop(session_id, None) + self._session_counter.pop(session_id, None) # ── Global singleton ────────────────────────────────────────────────────────── diff --git a/app/tools/search_tools.py b/app/tools/search_tools.py index 47ca257..3fd0fad 100644 --- a/app/tools/search_tools.py +++ b/app/tools/search_tools.py @@ -3,7 +3,7 @@ Search Tools for Product Discovery - search_products is created via make_search_products_tool(session_id, registry). - After search API, an LLM labels each result as Relevant / Partially Relevant / Irrelevant; we count and - store the curated list in the registry, return [SEARCH_REF:ref_id] + quality counts + top10 titles. + store the curated list in the registry, return [SEARCH_RESULTS_REF:ref_id] + quality counts + top10 titles. """ import base64 @@ -23,7 +23,6 @@ from app.search_registry import ( SearchResult, SearchResultRegistry, global_registry, - new_ref_id, ) logger = logging.getLogger(__name__) @@ -123,6 +122,125 @@ labels 数组长度必须等于 {n}。""" return ["Partially Relevant"] * n, "" +# ── Shared search implementation ────────────────────────────────────────────── + +def _call_search_api(query: str, size: int) -> Optional[tuple[list, int]]: + """Call product search API. Returns (raw_results, total_hits) or None on failure.""" + if not query or not query.strip(): + return None + try: + url = f"{settings.search_api_base_url.rstrip('/')}/search/" + headers = { + "Content-Type": "application/json", + "X-Tenant-ID": settings.search_api_tenant_id, + } + payload = { + "query": query.strip(), + "size": min(max(size, 1), 20), + "from": 0, + "language": "zh", + "enable_rerank": True, + "rerank_query_template": query.strip(), + "rerank_doc_template": "{title}", + } + resp = requests.post(url, json=payload, headers=headers, timeout=60) + if resp.status_code != 200: + logger.warning(f"Search API {resp.status_code}: {resp.text[:200]}") + return None + data = resp.json() + raw_results: list = data.get("results", []) + total_hits: int = data.get("total", 0) + return (raw_results, total_hits) + except Exception as e: + logger.warning(f"Search API error: {e}") + return None + + +def _raw_to_product_items( + raw_results: list, labels: Optional[list[str]] = None +) -> list[ProductItem]: + """Build ProductItem list from API raw results. If labels given, filter by Relevant/Partially Relevant.""" + if labels is not None: + valid = {"Relevant", "Partially Relevant"} + return [ + ProductItem( + spu_id=str(r.get("spu_id", "")), + title=r.get("title") or "", + price=r.get("price"), + category_path=r.get("category_path") or r.get("category_name"), + vendor=r.get("vendor"), + image_url=_normalize_image_url(r.get("image_url")), + relevance_score=r.get("relevance_score"), + match_label=label, + tags=r.get("tags") or [], + specifications=r.get("specifications") or [], + ) + for r, label in zip(raw_results, labels) + if label in valid + ] + return [ + ProductItem( + spu_id=str(r.get("spu_id", "")), + title=r.get("title") or "", + price=r.get("price"), + category_path=r.get("category_path") or r.get("category_name"), + vendor=r.get("vendor"), + image_url=_normalize_image_url(r.get("image_url")), + relevance_score=r.get("relevance_score"), + match_label="Partially Relevant", + tags=r.get("tags") or [], + specifications=r.get("specifications") or [], + ) + for r in raw_results + ] + + +def search_products_impl( + query: str, + limit: int = 20, + *, + assess_quality: bool = True, + session_id: Optional[str] = None, + registry: Optional[SearchResultRegistry] = None, +) -> tuple[Optional[str], list[ProductItem], int]: + """ + Single implementation: call API, optionally run LLM assessment, optionally register. + Returns (ref_id_or_none, products, assessed_count). assessed_count = len(raw_results) when assess_quality else 0. + """ + out = _call_search_api(query, limit) + if not out: + return (None, [], 0) + raw_results, total_hits = out + if not raw_results: + return (None, [], 0) + + if assess_quality and session_id and registry: + labels, quality_summary = _assess_search_quality(query, raw_results) + products = _raw_to_product_items(raw_results, labels) + perfect_count = sum(1 for l in labels if l == "Relevant") + partial_count = sum(1 for l in labels if l == "Partially Relevant") + ref_id = registry.next_ref_id(session_id) + result = SearchResult( + ref_id=ref_id, + query=query, + total_api_hits=total_hits, + returned_count=len(raw_results), + perfect_count=perfect_count, + partial_count=partial_count, + irrelevant_count=len(labels) - perfect_count - partial_count, + quality_summary=quality_summary, + products=products, + ) + registry.register(session_id, result) + logger.info( + "[%s] Registered %s: query=%s perfect=%s partial=%s", + session_id, ref_id, query, perfect_count, partial_count, + ) + return (ref_id, products, len(raw_results)) + products = _raw_to_product_items(raw_results) + return (None, products, 0) + + # ── Tool factory ─────────────────────────────────────────────────────────────── def make_search_products_tool( @@ -131,12 +249,7 @@ def make_search_products_tool( ): """ Return a search_products tool bound to a specific session and registry. - - The tool: - 1. Calls the product search API. - 2. Runs LLM quality assessment on up to 20 results. - 3. Stores a SearchResult in the registry. - 4. Returns a concise quality summary + [SEARCH_REF:ref_id]. + Uses LLM assessment and registers result; returns [SEARCH_RESULTS_REF:ref_id] string. """ @tool @@ -147,100 +260,34 @@ def make_search_products_tool( query: 自然语言商品描述 Returns: - 【搜索完成】+ 结果引用 [SEARCH_REF:ref_id] + 质量情况(评估条数、Relevant/Partially Relevant 数)+ results list(top10 标题) + 【搜索完成】+ 结果引用 [SEARCH_RESULTS_REF:ref_id] + 质量情况 + results list(top10 标题) """ try: limit = min(max(settings.search_products_limit, 1), 20) logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}") - - url = f"{settings.search_api_base_url.rstrip('/')}/search/" - headers = { - "Content-Type": "application/json", - "X-Tenant-ID": settings.search_api_tenant_id, - } - payload = { - "query": query, - "size": limit, - "from": 0, - "language": "zh", - "enable_rerank": True, - "rerank_query_template": query, - "rerank_doc_template": "{title}", - } - - resp = requests.post(url, json=payload, headers=headers, timeout=60) - if resp.status_code != 200: - logger.error(f"Search API error {resp.status_code}: {resp.text[:300]}") - return f"搜索失败:API 返回状态码 {resp.status_code},请稍后重试。" - - data = resp.json() - raw_results: list = data.get("results", []) - total_hits: int = data.get("total", 0) - - if not raw_results: - return ( - f"【搜索完成】query='{query}'\n" - "未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。" - ) - - labels, quality_summary = _assess_search_quality(query, raw_results) - perfect_count = sum(1 for l in labels if l == "Relevant") - partial_count = sum(1 for l in labels if l == "Partially Relevant") - irrelevant_count = len(labels) - perfect_count - partial_count - - products: list[ProductItem] = [] - for raw, label in zip(raw_results, labels): - if label not in ("Relevant", "Partially Relevant"): - continue - products.append( - ProductItem( - spu_id=str(raw.get("spu_id", "")), - title=raw.get("title") or "", - price=raw.get("price"), - category_path=( - raw.get("category_path") or raw.get("category_name") - ), - vendor=raw.get("vendor"), - image_url=_normalize_image_url(raw.get("image_url")), - relevance_score=raw.get("relevance_score"), - match_label=label, - tags=raw.get("tags") or [], - specifications=raw.get("specifications") or [], - ) - ) - - ref_id = new_ref_id() - result = SearchResult( - ref_id=ref_id, - query=query, - total_api_hits=total_hits, - returned_count=len(raw_results), - perfect_count=perfect_count, - partial_count=partial_count, - irrelevant_count=irrelevant_count, - quality_summary=quality_summary, - products=products, - ) - registry.register(session_id, result) - assessed_n = len(raw_results) - logger.info( - "[%s] Registered %s: query=%s assessed=%s perfect=%s partial=%s", - session_id, ref_id, query, assessed_n, perfect_count, partial_count, + ref_id, products, assessed_n = search_products_impl( + query, limit, + assess_quality=True, + session_id=session_id, + registry=registry, ) - - top10_titles = [ - (raw.get("title") or "未知")[:80] - for raw in raw_results[:10] - ] + if ref_id is None: + if not products: + return ( + f"【搜索完成】query='{query}'\n" + "未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。" + ) + return f"搜索失败:API 返回异常,请稍后重试。" + perfect_count = sum(1 for p in products if p.match_label == "Relevant") + partial_count = sum(1 for p in products if p.match_label == "Partially Relevant") + top10_titles = [(p.title or "未知")[:80] for p in products[:10]] results_list = "\n".join(f"{i}. {t}" for i, t in enumerate(top10_titles, 1)) - return ( f"【搜索完成】query='{query}'\n" - f"结果引用:[SEARCH_REF:{ref_id}]\n" + f"结果引用:[SEARCH_RESULTS_REF:{ref_id}]\n" f"搜索结果质量情况:评估总条数{assessed_n}条,Relevant {perfect_count} 条,Partially Relevant {partial_count} 条。\n" f"results list:\n{results_list}" ) - except requests.exceptions.RequestException as e: logger.error(f"[{session_id}] Search network error: {e}", exc_info=True) return f"搜索失败(网络错误):{e}" @@ -251,6 +298,12 @@ def make_search_products_tool( return search_products +def search_products_api_only(query: str, limit: int = 12) -> list[ProductItem]: + """API-only search (no LLM assessment). For 'Similar products' side panel.""" + _, products, _ = search_products_impl(query, limit, assess_quality=False) + return products + + # ── Standalone tools (no session binding needed) ─────────────────────────────── @tool -- libgit2 0.21.2