66442668
tangwang
feat: 搜索结果引用与并行搜索...
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
"""
Search Result Registry
Stores structured search results keyed by session and ref_id.
Each [SEARCH_REF:xxx] in an AI response maps to a SearchResult stored here,
allowing the UI to render product cards without the LLM ever re-listing them.
"""
import uuid
from dataclasses import dataclass, field
from typing import Optional
def new_ref_id() -> str:
"""Generate a short unique search reference ID, e.g. 'sr_3f9a1b2c'."""
return "sr_" + uuid.uuid4().hex[:8]
@dataclass
class ProductItem:
"""A single product extracted from a search result, enriched with a match label."""
spu_id: str
title: str
price: Optional[float] = None
category_path: Optional[str] = None
vendor: Optional[str] = None
image_url: Optional[str] = None
relevance_score: Optional[float] = None
# LLM-assigned label: "完美匹配" | "部分匹配" | "不相关"
match_label: str = "部分匹配"
tags: list = field(default_factory=list)
specifications: list = field(default_factory=list)
@dataclass
class SearchResult:
"""
A complete, self-contained search result block.
Identified by ref_id (e.g. 'sr_3f9a1b2c').
Stores the query, LLM quality assessment, and the curated product list
(only "完美匹配" and "部分匹配" items — "不相关" are discarded).
"""
ref_id: str
query: str
# Raw API stats
total_api_hits: int # total documents matched by the search engine
returned_count: int # number of results we actually assessed
# LLM quality labels breakdown
perfect_count: int
partial_count: int
irrelevant_count: int
# LLM overall quality verdict
quality_verdict: str # "优质" | "一般" | "较差"
quality_summary: str # one-sentence LLM explanation
# Curated product list (perfect + partial only)
products: list # list[ProductItem]
class SearchResultRegistry:
"""
Session-scoped store: session_id → { ref_id → SearchResult }.
Lives as a global singleton in the process; Streamlit reruns preserve it
as long as the worker process is alive. Session isolation is maintained
by keying on session_id.
"""
def __init__(self) -> None:
self._store: dict[str, dict[str, SearchResult]] = {}
def register(self, session_id: str, result: SearchResult) -> str:
"""Store a SearchResult and return its ref_id."""
if session_id not in self._store:
self._store[session_id] = {}
self._store[session_id][result.ref_id] = result
return result.ref_id
def get(self, session_id: str, ref_id: str) -> Optional[SearchResult]:
"""Look up a single SearchResult by session and ref_id."""
return self._store.get(session_id, {}).get(ref_id)
def get_all(self, session_id: str) -> dict:
"""Return all SearchResults for a session (ref_id → SearchResult)."""
return dict(self._store.get(session_id, {}))
def clear_session(self, session_id: str) -> None:
"""Remove all search results for a session (e.g. on chat clear)."""
self._store.pop(session_id, None)
# ── Global singleton ──────────────────────────────────────────────────────────
# Imported by search_tools and app.py; both sides share the same object.
global_registry = SearchResultRegistry()
|