search_registry.py 3.74 KB
"""
Search Result Registry

Stores structured search results keyed by session and ref_id.
Each [SEARCH_RESULTS_REF:ref_id] in an AI response maps to a SearchResult stored here,
allowing the UI to render product cards without the LLM ever re-listing them.

ref_id uses session-scoped auto-increment (sr_1, sr_2, ...).
"""

from dataclasses import dataclass, field
from typing import Optional


@dataclass
class ProductItem:
    """A single product extracted from a search result, enriched with a match label."""

    spu_id: str
    title: str
    price: Optional[float] = None
    category_path: Optional[str] = None
    vendor: Optional[str] = None
    image_url: Optional[str] = None
    relevance_score: Optional[float] = None
    # LLM-assigned label: "Relevant" | "Partially Relevant" | "Irrelevant"
    match_label: str = "Partially Relevant"
    tags: list = field(default_factory=list)
    specifications: list = field(default_factory=list)


@dataclass
class SearchResult:
    """
    A complete, self-contained search result block.

    Identified by ref_id (e.g. 'sr_1', 'sr_2' — session-scoped auto-increment).
    Stores the query, LLM quality assessment, and the curated product list
    (only "Relevant" and "Partially Relevant" items — "Irrelevant" are discarded).
    """

    ref_id: str
    query: str

    # Raw API stats
    total_api_hits: int       # total documents matched by the search engine
    returned_count: int       # number of results we actually assessed

    # LLM quality labels breakdown
    perfect_count: int
    partial_count: int
    irrelevant_count: int

    # LLM-written short summary: what the results mainly contain, whether they meet intent, match degree
    quality_summary: str

    # Curated product list (perfect + partial only)
    products: list            # list[ProductItem]


class SearchResultRegistry:
    """
    Session-scoped store: session_id → { ref_id → SearchResult }.

    Lives as a global singleton in the process; Streamlit reruns preserve it
    as long as the worker process is alive.  Session isolation is maintained
    by keying on session_id.  ref_id is per-session auto-increment (sr_1, sr_2, ...).
    """

    def __init__(self) -> None:
        self._store: dict[str, dict[str, SearchResult]] = {}
        self._session_counter: dict[str, int] = {}

    def next_ref_id(self, session_id: str) -> str:
        """Return next ref_id for this session (sr_1, sr_2, ...)."""
        self._session_counter[session_id] = self._session_counter.get(session_id, 0) + 1
        return f"sr_{self._session_counter[session_id]}"

    def register(self, session_id: str, result: SearchResult) -> str:
        """Store a SearchResult and return its ref_id."""
        if session_id not in self._store:
            self._store[session_id] = {}
        self._store[session_id][result.ref_id] = result
        return result.ref_id

    def get(self, session_id: str, ref_id: str) -> Optional[SearchResult]:
        """Look up a single SearchResult by session and ref_id."""
        return self._store.get(session_id, {}).get(ref_id)

    def get_all(self, session_id: str) -> dict:
        """Return all SearchResults for a session (ref_id → SearchResult)."""
        return dict(self._store.get(session_id, {}))

    def clear_session(self, session_id: str) -> None:
        """Remove all search results for a session (e.g. on chat clear)."""
        self._store.pop(session_id, None)
        self._session_counter.pop(session_id, None)


# ── Global singleton ──────────────────────────────────────────────────────────
# Imported by search_tools and app.py; both sides share the same object.
global_registry = SearchResultRegistry()