Blame view

app/search_registry.py 3.74 KB
66442668   tangwang   feat: 搜索结果引用与并行搜索...
1
2
3
4
  """
  Search Result Registry
  
  Stores structured search results keyed by session and ref_id.
897b5ca9   tangwang   perf: 前端性能优化 + 搜索...
5
  Each [SEARCH_RESULTS_REF:ref_id] in an AI response maps to a SearchResult stored here,
66442668   tangwang   feat: 搜索结果引用与并行搜索...
6
  allowing the UI to render product cards without the LLM ever re-listing them.
897b5ca9   tangwang   perf: 前端性能优化 + 搜索...
7
8
  
  ref_id uses session-scoped auto-increment (sr_1, sr_2, ...).
66442668   tangwang   feat: 搜索结果引用与并行搜索...
9
10
  """
  
66442668   tangwang   feat: 搜索结果引用与并行搜索...
11
12
13
14
  from dataclasses import dataclass, field
  from typing import Optional
  
  
66442668   tangwang   feat: 搜索结果引用与并行搜索...
15
16
17
18
19
20
21
22
23
24
25
  @dataclass
  class ProductItem:
      """A single product extracted from a search result, enriched with a match label."""
  
      spu_id: str
      title: str
      price: Optional[float] = None
      category_path: Optional[str] = None
      vendor: Optional[str] = None
      image_url: Optional[str] = None
      relevance_score: Optional[float] = None
621b6925   tangwang   up
26
      # LLM-assigned label: "Relevant" | "Partially Relevant" | "Irrelevant"
5e3d6d3a   tangwang   refactor(search):...
27
      match_label: str = "Partially Relevant"
66442668   tangwang   feat: 搜索结果引用与并行搜索...
28
29
30
31
32
33
34
35
36
      tags: list = field(default_factory=list)
      specifications: list = field(default_factory=list)
  
  
  @dataclass
  class SearchResult:
      """
      A complete, self-contained search result block.
  
897b5ca9   tangwang   perf: 前端性能优化 + 搜索...
37
      Identified by ref_id (e.g. 'sr_1', 'sr_2'  session-scoped auto-increment).
66442668   tangwang   feat: 搜索结果引用与并行搜索...
38
      Stores the query, LLM quality assessment, and the curated product list
621b6925   tangwang   up
39
      (only "Relevant" and "Partially Relevant" items  "Irrelevant" are discarded).
66442668   tangwang   feat: 搜索结果引用与并行搜索...
40
41
42
43
44
45
46
47
48
49
50
51
52
53
      """
  
      ref_id: str
      query: str
  
      # Raw API stats
      total_api_hits: int       # total documents matched by the search engine
      returned_count: int       # number of results we actually assessed
  
      # LLM quality labels breakdown
      perfect_count: int
      partial_count: int
      irrelevant_count: int
  
5e3d6d3a   tangwang   refactor(search):...
54
55
      # LLM-written short summary: what the results mainly contain, whether they meet intent, match degree
      quality_summary: str
66442668   tangwang   feat: 搜索结果引用与并行搜索...
56
57
58
59
60
61
62
63
64
65
66
  
      # Curated product list (perfect + partial only)
      products: list            # list[ProductItem]
  
  
  class SearchResultRegistry:
      """
      Session-scoped store: session_id  { ref_id  SearchResult }.
  
      Lives as a global singleton in the process; Streamlit reruns preserve it
      as long as the worker process is alive.  Session isolation is maintained
897b5ca9   tangwang   perf: 前端性能优化 + 搜索...
67
      by keying on session_id.  ref_id is per-session auto-increment (sr_1, sr_2, ...).
66442668   tangwang   feat: 搜索结果引用与并行搜索...
68
69
70
71
      """
  
      def __init__(self) -> None:
          self._store: dict[str, dict[str, SearchResult]] = {}
897b5ca9   tangwang   perf: 前端性能优化 + 搜索...
72
73
74
75
76
77
          self._session_counter: dict[str, int] = {}
  
      def next_ref_id(self, session_id: str) -> str:
          """Return next ref_id for this session (sr_1, sr_2, ...)."""
          self._session_counter[session_id] = self._session_counter.get(session_id, 0) + 1
          return f"sr_{self._session_counter[session_id]}"
66442668   tangwang   feat: 搜索结果引用与并行搜索...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  
      def register(self, session_id: str, result: SearchResult) -> str:
          """Store a SearchResult and return its ref_id."""
          if session_id not in self._store:
              self._store[session_id] = {}
          self._store[session_id][result.ref_id] = result
          return result.ref_id
  
      def get(self, session_id: str, ref_id: str) -> Optional[SearchResult]:
          """Look up a single SearchResult by session and ref_id."""
          return self._store.get(session_id, {}).get(ref_id)
  
      def get_all(self, session_id: str) -> dict:
          """Return all SearchResults for a session (ref_id → SearchResult)."""
          return dict(self._store.get(session_id, {}))
  
      def clear_session(self, session_id: str) -> None:
          """Remove all search results for a session (e.g. on chat clear)."""
          self._store.pop(session_id, None)
897b5ca9   tangwang   perf: 前端性能优化 + 搜索...
97
          self._session_counter.pop(session_id, None)
66442668   tangwang   feat: 搜索结果引用与并行搜索...
98
99
100
101
102
  
  
  # ── Global singleton ──────────────────────────────────────────────────────────
  # Imported by search_tools and app.py; both sides share the same object.
  global_registry = SearchResultRegistry()