search_registry.py 3.37 KB
"""
Search Result Registry

Stores structured search results keyed by session and ref_id.
Each [SEARCH_REF:xxx] in an AI response maps to a SearchResult stored here,
allowing the UI to render product cards without the LLM ever re-listing them.
"""

import uuid
from dataclasses import dataclass, field
from typing import Optional


def new_ref_id() -> str:
    """Generate a short unique search reference ID, e.g. 'sr_3f9a1b2c'."""
    return "sr_" + uuid.uuid4().hex[:8]


@dataclass
class ProductItem:
    """A single product extracted from a search result, enriched with a match label."""

    spu_id: str
    title: str
    price: Optional[float] = None
    category_path: Optional[str] = None
    vendor: Optional[str] = None
    image_url: Optional[str] = None
    relevance_score: Optional[float] = None
    # LLM-assigned label: "Relevant" | "Partially Relevant" | "Irrelevant"
    match_label: str = "Partially Relevant"
    tags: list = field(default_factory=list)
    specifications: list = field(default_factory=list)


@dataclass
class SearchResult:
    """
    A complete, self-contained search result block.

    Identified by ref_id (e.g. 'sr_3f9a1b2c').
    Stores the query, LLM quality assessment, and the curated product list
    (only "Relevant" and "Partially Relevant" items — "Irrelevant" are discarded).
    """

    ref_id: str
    query: str

    # Raw API stats
    total_api_hits: int       # total documents matched by the search engine
    returned_count: int       # number of results we actually assessed

    # LLM quality labels breakdown
    perfect_count: int
    partial_count: int
    irrelevant_count: int

    # LLM-written short summary: what the results mainly contain, whether they meet intent, match degree
    quality_summary: str

    # Curated product list (perfect + partial only)
    products: list            # list[ProductItem]


class SearchResultRegistry:
    """
    Session-scoped store: session_id → { ref_id → SearchResult }.

    Lives as a global singleton in the process; Streamlit reruns preserve it
    as long as the worker process is alive.  Session isolation is maintained
    by keying on session_id.
    """

    def __init__(self) -> None:
        self._store: dict[str, dict[str, SearchResult]] = {}

    def register(self, session_id: str, result: SearchResult) -> str:
        """Store a SearchResult and return its ref_id."""
        if session_id not in self._store:
            self._store[session_id] = {}
        self._store[session_id][result.ref_id] = result
        return result.ref_id

    def get(self, session_id: str, ref_id: str) -> Optional[SearchResult]:
        """Look up a single SearchResult by session and ref_id."""
        return self._store.get(session_id, {}).get(ref_id)

    def get_all(self, session_id: str) -> dict:
        """Return all SearchResults for a session (ref_id → SearchResult)."""
        return dict(self._store.get(session_id, {}))

    def clear_session(self, session_id: str) -> None:
        """Remove all search results for a session (e.g. on chat clear)."""
        self._store.pop(session_id, None)


# ── Global singleton ──────────────────────────────────────────────────────────
# Imported by search_tools and app.py; both sides share the same object.
global_registry = SearchResultRegistry()