e7f2b240
tangwang
first commit
|
1
2
|
"""
Search Tools for Product Discovery
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
3
|
|
5e3d6d3a
tangwang
refactor(search):...
|
4
|
- search_products is created via make_search_products_tool(session_id, registry).
|
621b6925
tangwang
up
|
5
|
- After search API, an LLM labels each result as Relevant / Partially Relevant / Irrelevant; we count and
|
5e3d6d3a
tangwang
refactor(search):...
|
6
|
store the curated list in the registry, return [SEARCH_REF:ref_id] + quality counts + top10 titles.
|
e7f2b240
tangwang
first commit
|
7
8
9
|
"""
import base64
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
10
|
import json
|
e7f2b240
tangwang
first commit
|
11
|
import logging
|
46f8dd12
tangwang
1. add prod under...
|
12
|
import os
|
e7f2b240
tangwang
first commit
|
13
14
15
|
from pathlib import Path
from typing import Optional
|
8810a6fa
tangwang
重构
|
16
|
import requests
|
e7f2b240
tangwang
first commit
|
17
18
19
20
|
from langchain_core.tools import tool
from openai import OpenAI
from app.config import settings
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
21
22
23
24
25
26
27
|
from app.search_registry import (
ProductItem,
SearchResult,
SearchResultRegistry,
global_registry,
new_ref_id,
)
|
e7f2b240
tangwang
first commit
|
28
29
30
|
logger = logging.getLogger(__name__)
|
e7f2b240
tangwang
first commit
|
31
32
33
|
_openai_client: Optional[OpenAI] = None
|
825828c4
tangwang
fix: search image...
|
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
def _normalize_image_url(url: Optional[str]) -> Optional[str]:
"""Normalize image_url from API (e.g. ////cnres.appracle.com/... → https://cnres.appracle.com/...)."""
if not url or not isinstance(url, str):
return None
url = url.strip()
if not url:
return None
if url.startswith("https://") or url.startswith("http://"):
return url
# // or ////host/path → https://host/path (exactly one "//" after scheme)
if url.startswith("/"):
return "https://" + url.lstrip("/")
return "https://" + url
|
e7f2b240
tangwang
first commit
|
49
50
51
|
def get_openai_client() -> OpenAI:
global _openai_client
if _openai_client is None:
|
8810a6fa
tangwang
重构
|
52
53
54
55
|
kwargs = {"api_key": settings.openai_api_key}
if settings.openai_api_base_url:
kwargs["base_url"] = settings.openai_api_base_url
_openai_client = OpenAI(**kwargs)
|
e7f2b240
tangwang
first commit
|
56
57
58
|
return _openai_client
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
59
60
|
# ── LLM quality assessment ─────────────────────────────────────────────────────
|
5e3d6d3a
tangwang
refactor(search):...
|
61
|
def _assess_search_quality(query: str, raw_products: list) -> tuple[list[str], str]:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
62
|
"""
|
5e3d6d3a
tangwang
refactor(search):...
|
63
64
|
Use LLM to label each search result and write a short quality_summary.
Returns (labels, quality_summary). labels: one per product; quality_summary: 1–2 sentences.
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
65
66
67
|
"""
n = len(raw_products)
if n == 0:
|
5e3d6d3a
tangwang
refactor(search):...
|
68
|
return [], ""
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
69
|
|
5e3d6d3a
tangwang
refactor(search):...
|
70
|
lines = []
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
71
72
|
for i, p in enumerate(raw_products, 1):
title = (p.get("title") or "")[:60]
|
5e3d6d3a
tangwang
refactor(search):...
|
73
|
lines.append(f"{i}. {title}")
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
74
75
|
product_text = "\n".join(lines)
|
5e3d6d3a
tangwang
refactor(search):...
|
76
|
prompt = f"""评估以下搜索结果与用户查询的匹配程度,完成两件事:
|
621b6925
tangwang
up
|
77
|
1. 为每条结果打一个等级:Relevant / Partially Relevant / Irrelevant。
|
5e3d6d3a
tangwang
refactor(search):...
|
78
|
2. 写一段 quality_summary(1–2 句话):简要说明搜索结果主要包含哪些商品、是否基本满足搜索意图、整体匹配度如何。
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
79
80
81
|
用户查询:{query}
|
5e3d6d3a
tangwang
refactor(search):...
|
82
|
搜索结果(共 {n} 条):
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
83
84
|
{product_text}
|
621b6925
tangwang
up
|
85
|
等级说明:Relevant=完全符合查询意图;Partially Relevant=基本相关(如品类等主需求匹配但部分属性不完全符合);Irrelevant=不相关。
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
86
|
|
5e3d6d3a
tangwang
refactor(search):...
|
87
|
请严格按以下 JSON 输出,仅输出 JSON,无其他内容:
|
621b6925
tangwang
up
|
88
|
{{"labels": ["Relevant", "Partially Relevant", "Irrelevant", ...], "quality_summary": "你的1-2句总结"}}
|
5e3d6d3a
tangwang
refactor(search):...
|
89
|
labels 数组长度必须等于 {n}。"""
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
90
91
92
93
94
95
|
try:
client = get_openai_client()
resp = client.chat.completions.create(
model=settings.openai_model,
messages=[{"role": "user", "content": prompt}],
|
621b6925
tangwang
up
|
96
|
max_tokens=1200,
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
97
98
99
|
temperature=0.1,
)
raw = resp.choices[0].message.content.strip()
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
100
101
102
103
104
|
if raw.startswith("```"):
raw = raw.split("```")[1]
if raw.startswith("json"):
raw = raw[4:]
raw = raw.strip()
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
105
|
data = json.loads(raw)
|
5e3d6d3a
tangwang
refactor(search):...
|
106
|
labels = data.get("labels", [])
|
621b6925
tangwang
up
|
107
|
valid = {"Relevant", "Partially Relevant", "Irrelevant"}
|
5e3d6d3a
tangwang
refactor(search):...
|
108
|
labels = [l if l in valid else "Partially Relevant" for l in labels]
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
109
|
while len(labels) < n:
|
5e3d6d3a
tangwang
refactor(search):...
|
110
111
112
|
labels.append("Partially Relevant")
quality_summary = (data.get("quality_summary") or "").strip() or ""
return labels[:n], quality_summary
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
113
|
except Exception as e:
|
5e3d6d3a
tangwang
refactor(search):...
|
114
115
|
logger.warning(f"Quality assessment failed: {e}; using fallback.")
return ["Partially Relevant"] * n, ""
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# ── Tool factory ───────────────────────────────────────────────────────────────
def make_search_products_tool(
session_id: str,
registry: SearchResultRegistry,
):
"""
Return a search_products tool bound to a specific session and registry.
The tool:
1. Calls the product search API.
2. Runs LLM quality assessment on up to 20 results.
3. Stores a SearchResult in the registry.
4. Returns a concise quality summary + [SEARCH_REF:ref_id].
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
132
133
134
135
|
"""
@tool
def search_products(query: str, limit: int = 20) -> str:
|
621b6925
tangwang
up
|
136
|
"""搜索商品库并做质量评估:LLM 为每条结果打等级(Relevant / Partially Relevant / Irrelevant),返回引用与 top10 标题。
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
137
138
|
Args:
|
5e3d6d3a
tangwang
refactor(search):...
|
139
140
|
query: 自然语言商品描述
limit: 最多返回条数(1-20)
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
141
142
|
Returns:
|
621b6925
tangwang
up
|
143
|
【搜索完成】+ 结果引用 [SEARCH_REF:ref_id] + 质量情况(评估条数、Relevant/Partially Relevant 数)+ results list(top10 标题)
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
"""
try:
logger.info(f"[{session_id}] search_products: query={query!r} limit={limit}")
url = f"{settings.search_api_base_url.rstrip('/')}/search/"
headers = {
"Content-Type": "application/json",
"X-Tenant-ID": settings.search_api_tenant_id,
}
payload = {
"query": query,
"size": min(max(limit, 1), 20),
"from": 0,
"language": "zh",
|
5e3d6d3a
tangwang
refactor(search):...
|
158
159
160
|
"enable_rerank": True,
"rerank_query_template": query,
"rerank_doc_template": "{title}",
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
}
resp = requests.post(url, json=payload, headers=headers, timeout=60)
if resp.status_code != 200:
logger.error(f"Search API error {resp.status_code}: {resp.text[:300]}")
return f"搜索失败:API 返回状态码 {resp.status_code},请稍后重试。"
data = resp.json()
raw_results: list = data.get("results", [])
total_hits: int = data.get("total", 0)
if not raw_results:
return (
f"【搜索完成】query='{query}'\n"
"未找到匹配商品,建议换用更宽泛或不同角度的关键词重新搜索。"
)
|
5e3d6d3a
tangwang
refactor(search):...
|
178
|
labels, quality_summary = _assess_search_quality(query, raw_results)
|
621b6925
tangwang
up
|
179
|
perfect_count = sum(1 for l in labels if l == "Relevant")
|
5e3d6d3a
tangwang
refactor(search):...
|
180
181
|
partial_count = sum(1 for l in labels if l == "Partially Relevant")
irrelevant_count = len(labels) - perfect_count - partial_count
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
182
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
183
|
products: list[ProductItem] = []
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
184
|
for raw, label in zip(raw_results, labels):
|
621b6925
tangwang
up
|
185
|
if label not in ("Relevant", "Partially Relevant"):
|
5e3d6d3a
tangwang
refactor(search):...
|
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
continue
products.append(
ProductItem(
spu_id=str(raw.get("spu_id", "")),
title=raw.get("title") or "",
price=raw.get("price"),
category_path=(
raw.get("category_path") or raw.get("category_name")
),
vendor=raw.get("vendor"),
image_url=_normalize_image_url(raw.get("image_url")),
relevance_score=raw.get("relevance_score"),
match_label=label,
tags=raw.get("tags") or [],
specifications=raw.get("specifications") or [],
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
201
|
)
|
5e3d6d3a
tangwang
refactor(search):...
|
202
|
)
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
203
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
204
205
206
207
208
209
210
211
212
|
ref_id = new_ref_id()
result = SearchResult(
ref_id=ref_id,
query=query,
total_api_hits=total_hits,
returned_count=len(raw_results),
perfect_count=perfect_count,
partial_count=partial_count,
irrelevant_count=irrelevant_count,
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
213
214
215
216
|
quality_summary=quality_summary,
products=products,
)
registry.register(session_id, result)
|
5e3d6d3a
tangwang
refactor(search):...
|
217
|
assessed_n = len(raw_results)
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
218
|
logger.info(
|
5e3d6d3a
tangwang
refactor(search):...
|
219
220
|
"[%s] Registered %s: query=%s assessed=%s perfect=%s partial=%s",
session_id, ref_id, query, assessed_n, perfect_count, partial_count,
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
221
222
|
)
|
5e3d6d3a
tangwang
refactor(search):...
|
223
224
225
226
227
|
top10_titles = [
(raw.get("title") or "未知")[:80]
for raw in raw_results[:10]
]
results_list = "\n".join(f"{i}. {t}" for i, t in enumerate(top10_titles, 1))
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
228
229
230
|
return (
f"【搜索完成】query='{query}'\n"
|
5e3d6d3a
tangwang
refactor(search):...
|
231
|
f"结果引用:[SEARCH_REF:{ref_id}]\n"
|
621b6925
tangwang
up
|
232
|
f"搜索结果质量情况:评估总条数{assessed_n}条,Relevant {perfect_count} 条,Partially Relevant {partial_count} 条。\n"
|
5e3d6d3a
tangwang
refactor(search):...
|
233
|
f"results list:\n{results_list}"
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
234
235
236
237
238
239
240
241
242
243
244
245
246
247
|
)
except requests.exceptions.RequestException as e:
logger.error(f"[{session_id}] Search network error: {e}", exc_info=True)
return f"搜索失败(网络错误):{e}"
except Exception as e:
logger.error(f"[{session_id}] Search error: {e}", exc_info=True)
return f"搜索失败:{e}"
return search_products
# ── Standalone tools (no session binding needed) ───────────────────────────────
|
e7f2b240
tangwang
first commit
|
248
|
@tool
|
46f8dd12
tangwang
1. add prod under...
|
249
250
251
|
def web_search(query: str) -> str:
"""使用 Tavily 进行通用 Web 搜索,补充外部/实时知识。
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
252
253
|
触发场景:
- 需要**外部知识**:流行趋势、品牌、搭配文化、节日习俗等
|
621b6925
tangwang
up
|
254
|
- 需要**实时/及时信息**:所有与天气相关的问题、当季流行元素、某地近期或者未来的事件、所有依赖当前时间相关的信息
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
255
|
- 需要**宏观参考**:不同场合/国家的穿着建议、选购攻略
|
46f8dd12
tangwang
1. add prod under...
|
256
257
|
Args:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
258
|
query: 要搜索的问题,自然语言描述
|
46f8dd12
tangwang
1. add prod under...
|
259
260
|
Returns:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
261
|
总结后的回答 + 若干参考来源链接
|
46f8dd12
tangwang
1. add prod under...
|
262
263
264
265
|
"""
try:
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
|
46f8dd12
tangwang
1. add prod under...
|
266
267
268
269
270
|
return (
"无法调用外部 Web 搜索:未检测到 TAVILY_API_KEY 环境变量。\n"
"请在运行环境中配置 TAVILY_API_KEY 后再重试。"
)
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
271
|
logger.info(f"web_search: {query!r}")
|
46f8dd12
tangwang
1. add prod under...
|
272
273
274
275
276
277
278
279
280
281
282
|
url = "https://api.tavily.com/search"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
payload = {
"query": query,
"search_depth": "advanced",
"include_answer": True,
}
|
46f8dd12
tangwang
1. add prod under...
|
283
284
285
|
response = requests.post(url, json=payload, headers=headers, timeout=60)
if response.status_code != 200:
|
46f8dd12
tangwang
1. add prod under...
|
286
287
288
289
290
291
292
293
294
295
296
297
|
return f"调用外部 Web 搜索失败:Tavily 返回状态码 {response.status_code}"
data = response.json()
answer = data.get("answer") or "(Tavily 未返回直接回答,仅返回了搜索结果。)"
results = data.get("results") or []
output_lines = [
"【外部 Web 搜索结果(Tavily)】",
"",
"回答摘要:",
answer.strip(),
]
|
46f8dd12
tangwang
1. add prod under...
|
298
299
300
301
302
|
if results:
output_lines.append("")
output_lines.append("参考来源(部分):")
for idx, item in enumerate(results[:5], 1):
title = item.get("title") or "无标题"
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
303
|
link = item.get("url") or ""
|
46f8dd12
tangwang
1. add prod under...
|
304
|
output_lines.append(f"{idx}. {title}")
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
305
306
|
if link:
output_lines.append(f" 链接: {link}")
|
46f8dd12
tangwang
1. add prod under...
|
307
308
309
310
|
return "\n".join(output_lines).strip()
except requests.exceptions.RequestException as e:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
311
|
logger.error("web_search network error: %s", e, exc_info=True)
|
46f8dd12
tangwang
1. add prod under...
|
312
313
|
return f"调用外部 Web 搜索失败(网络错误):{e}"
except Exception as e:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
314
|
logger.error("web_search error: %s", e, exc_info=True)
|
46f8dd12
tangwang
1. add prod under...
|
315
316
317
318
|
return f"调用外部 Web 搜索失败:{e}"
@tool
|
e7f2b240
tangwang
first commit
|
319
|
def analyze_image_style(image_path: str) -> str:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
320
|
"""分析用户上传的商品图片,提取视觉风格属性,用于后续商品搜索。
|
e7f2b240
tangwang
first commit
|
321
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
322
323
324
|
适用场景:
- 用户上传图片,想找相似商品
- 需要理解图片中商品的风格、颜色、材质等属性
|
e7f2b240
tangwang
first commit
|
325
326
|
Args:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
327
|
image_path: 图片文件路径
|
e7f2b240
tangwang
first commit
|
328
329
|
Returns:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
330
|
商品视觉属性的详细文字描述,可直接作为 search_products 的 query
|
e7f2b240
tangwang
first commit
|
331
332
|
"""
try:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
333
|
logger.info(f"analyze_image_style: {image_path!r}")
|
e7f2b240
tangwang
first commit
|
334
335
336
|
img_path = Path(image_path)
if not img_path.exists():
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
337
|
return f"错误:图片文件不存在:{image_path}"
|
e7f2b240
tangwang
first commit
|
338
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
339
340
|
with open(img_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
|
e7f2b240
tangwang
first commit
|
341
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
342
|
prompt = """请分析这张商品图片,提供详细的视觉属性描述,用于商品搜索。
|
e7f2b240
tangwang
first commit
|
343
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
344
345
346
347
348
349
350
|
请包含:
- 商品类型(如:连衣裙、运动鞋、双肩包、西装等)
- 主要颜色
- 风格定位(如:休闲、正式、运动、复古、现代简约等)
- 图案/纹理(如:纯色、条纹、格纹、碎花、几何图案等)
- 关键设计特征(如:领型、袖长、版型、材质外观等)
- 适用场合(如:办公、户外、度假、聚会、运动等)
|
e7f2b240
tangwang
first commit
|
351
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
352
|
输出格式:3-4句自然语言描述,可直接用作搜索关键词。"""
|
e7f2b240
tangwang
first commit
|
353
354
355
|
client = get_openai_client()
response = client.chat.completions.create(
|
46f8dd12
tangwang
1. add prod under...
|
356
|
model=settings.openai_vision_model,
|
e7f2b240
tangwang
first commit
|
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
|
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}",
"detail": "high",
},
},
],
}
],
|
621b6925
tangwang
up
|
372
|
max_tokens=800,
|
e7f2b240
tangwang
first commit
|
373
374
375
376
|
temperature=0.3,
)
analysis = response.choices[0].message.content.strip()
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
377
|
logger.info("Image analysis completed.")
|
e7f2b240
tangwang
first commit
|
378
379
380
|
return analysis
except Exception as e:
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
381
382
|
logger.error(f"analyze_image_style error: {e}", exc_info=True)
return f"图片分析失败:{e}"
|
e7f2b240
tangwang
first commit
|
383
384
|
|
66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
|
# ── Tool list factory ──────────────────────────────────────────────────────────
def get_all_tools(
session_id: str = "default",
registry: Optional[SearchResultRegistry] = None,
) -> list:
"""
Return all agent tools.
search_products is session-bound (factory); other tools are stateless.
"""
if registry is None:
registry = global_registry
return [
make_search_products_tool(session_id, registry),
analyze_image_style,
web_search,
]
|