search_registry.py
3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
Search Result Registry
Stores structured search results keyed by session and ref_id.
Each [SEARCH_RESULTS_REF:ref_id] in an AI response maps to a SearchResult stored here,
allowing the UI to render product cards without the LLM ever re-listing them.
ref_id uses session-scoped auto-increment (sr_1, sr_2, ...).
"""
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class ProductItem:
"""A single product extracted from a search result, enriched with a match label."""
spu_id: str
title: str
price: Optional[float] = None
category_path: Optional[str] = None
vendor: Optional[str] = None
image_url: Optional[str] = None
relevance_score: Optional[float] = None
# LLM-assigned label: "Relevant" | "Partially Relevant" | "Irrelevant"
match_label: str = "Partially Relevant"
tags: list = field(default_factory=list)
specifications: list = field(default_factory=list)
@dataclass
class SearchResult:
"""
A complete, self-contained search result block.
Identified by ref_id (e.g. 'sr_1', 'sr_2' — session-scoped auto-increment).
Stores the query, LLM quality assessment, and the curated product list
(only "Relevant" and "Partially Relevant" items — "Irrelevant" are discarded).
"""
ref_id: str
query: str
# Raw API stats
total_api_hits: int # total documents matched by the search engine
returned_count: int # number of results we actually assessed
# LLM quality labels breakdown
perfect_count: int
partial_count: int
irrelevant_count: int
# LLM-written short summary: what the results mainly contain, whether they meet intent, match degree
quality_summary: str
# Curated product list (perfect + partial only)
products: list # list[ProductItem]
class SearchResultRegistry:
"""
Session-scoped store: session_id → { ref_id → SearchResult }.
Lives as a global singleton in the process; Streamlit reruns preserve it
as long as the worker process is alive. Session isolation is maintained
by keying on session_id. ref_id is per-session auto-increment (sr_1, sr_2, ...).
"""
def __init__(self) -> None:
self._store: dict[str, dict[str, SearchResult]] = {}
self._session_counter: dict[str, int] = {}
def next_ref_id(self, session_id: str) -> str:
"""Return next ref_id for this session (sr_1, sr_2, ...)."""
self._session_counter[session_id] = self._session_counter.get(session_id, 0) + 1
return f"sr_{self._session_counter[session_id]}"
def register(self, session_id: str, result: SearchResult) -> str:
"""Store a SearchResult and return its ref_id."""
if session_id not in self._store:
self._store[session_id] = {}
self._store[session_id][result.ref_id] = result
return result.ref_id
def get(self, session_id: str, ref_id: str) -> Optional[SearchResult]:
"""Look up a single SearchResult by session and ref_id."""
return self._store.get(session_id, {}).get(ref_id)
def get_all(self, session_id: str) -> dict:
"""Return all SearchResults for a session (ref_id → SearchResult)."""
return dict(self._store.get(session_id, {}))
def clear_session(self, session_id: str) -> None:
"""Remove all search results for a session (e.g. on chat clear)."""
self._store.pop(session_id, None)
self._session_counter.pop(session_id, None)
# ── Global singleton ──────────────────────────────────────────────────────────
# Imported by search_tools and app.py; both sides share the same object.
global_registry = SearchResultRegistry()