Blame view

app/search_registry.py 3.38 KB
66442668   tangwang   feat: 搜索结果引用与并行搜索...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
  """
  Search Result Registry
  
  Stores structured search results keyed by session and ref_id.
  Each [SEARCH_REF:xxx] in an AI response maps to a SearchResult stored here,
  allowing the UI to render product cards without the LLM ever re-listing them.
  """
  
  import uuid
  from dataclasses import dataclass, field
  from typing import Optional
  
  
  def new_ref_id() -> str:
      """Generate a short unique search reference ID, e.g. 'sr_3f9a1b2c'."""
      return "sr_" + uuid.uuid4().hex[:8]
  
  
  @dataclass
  class ProductItem:
      """A single product extracted from a search result, enriched with a match label."""
  
      spu_id: str
      title: str
      price: Optional[float] = None
      category_path: Optional[str] = None
      vendor: Optional[str] = None
      image_url: Optional[str] = None
      relevance_score: Optional[float] = None
5e3d6d3a   tangwang   refactor(search):...
30
31
      # LLM-assigned label: "Highly Relevant" | "Partially Relevant" | "Not Relevant"
      match_label: str = "Partially Relevant"
66442668   tangwang   feat: 搜索结果引用与并行搜索...
32
33
34
35
36
37
38
39
40
41
42
      tags: list = field(default_factory=list)
      specifications: list = field(default_factory=list)
  
  
  @dataclass
  class SearchResult:
      """
      A complete, self-contained search result block.
  
      Identified by ref_id (e.g. 'sr_3f9a1b2c').
      Stores the query, LLM quality assessment, and the curated product list
5e3d6d3a   tangwang   refactor(search):...
43
      (only "Highly Relevant" and "Partially Relevant" items  "Not Relevant" are discarded).
66442668   tangwang   feat: 搜索结果引用与并行搜索...
44
45
46
47
48
49
50
51
52
53
54
55
56
57
      """
  
      ref_id: str
      query: str
  
      # Raw API stats
      total_api_hits: int       # total documents matched by the search engine
      returned_count: int       # number of results we actually assessed
  
      # LLM quality labels breakdown
      perfect_count: int
      partial_count: int
      irrelevant_count: int
  
5e3d6d3a   tangwang   refactor(search):...
58
59
      # LLM-written short summary: what the results mainly contain, whether they meet intent, match degree
      quality_summary: str
66442668   tangwang   feat: 搜索结果引用与并行搜索...
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  
      # Curated product list (perfect + partial only)
      products: list            # list[ProductItem]
  
  
  class SearchResultRegistry:
      """
      Session-scoped store: session_id  { ref_id  SearchResult }.
  
      Lives as a global singleton in the process; Streamlit reruns preserve it
      as long as the worker process is alive.  Session isolation is maintained
      by keying on session_id.
      """
  
      def __init__(self) -> None:
          self._store: dict[str, dict[str, SearchResult]] = {}
  
      def register(self, session_id: str, result: SearchResult) -> str:
          """Store a SearchResult and return its ref_id."""
          if session_id not in self._store:
              self._store[session_id] = {}
          self._store[session_id][result.ref_id] = result
          return result.ref_id
  
      def get(self, session_id: str, ref_id: str) -> Optional[SearchResult]:
          """Look up a single SearchResult by session and ref_id."""
          return self._store.get(session_id, {}).get(ref_id)
  
      def get_all(self, session_id: str) -> dict:
          """Return all SearchResults for a session (ref_id → SearchResult)."""
          return dict(self._store.get(session_id, {}))
  
      def clear_session(self, session_id: str) -> None:
          """Remove all search results for a session (e.g. on chat clear)."""
          self._store.pop(session_id, None)
  
  
  # ── Global singleton ──────────────────────────────────────────────────────────
  # Imported by search_tools and app.py; both sides share the same object.
  global_registry = SearchResultRegistry()