test_llm_enrichment_batch_fill.py 2.53 KB
from __future__ import annotations

from typing import Any, Dict, List

import pandas as pd

from indexer.document_transformer import SPUDocumentTransformer


def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
    seen_calls: List[Dict[str, Any]] = []

    def _fake_build_index_content_fields(items, tenant_id=None, category_taxonomy_profile=None):
        seen_calls.append(
            {
                "n": len(items),
                "tenant_id": tenant_id,
                "category_taxonomy_profile": category_taxonomy_profile,
            }
        )
        return [
            {
                "id": item["id"],
                "qanchors": {
                    "zh": [f"zh-anchor-{item['id']}"],
                    "en": [f"en-anchor-{item['id']}"],
                },
                "enriched_tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
                "enriched_attributes": [
                    {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}},
                ],
                "enriched_taxonomy_attributes": [
                    {"name": "Product Type", "value": {"zh": ["连衣裙"], "en": ["dress"]}},
                ],
            }
            for item in items
        ]

    import indexer.document_transformer as doc_tr

    monkeypatch.setattr(doc_tr, "build_index_content_fields", _fake_build_index_content_fields)

    transformer = SPUDocumentTransformer(
        category_id_to_name={},
        searchable_option_dimensions=[],
        tenant_config={"index_languages": ["zh", "en"], "primary_language": "zh"},
        translator=None,
        encoder=None,
        enable_title_embedding=False,
        image_encoder=None,
        enable_image_embedding=False,
    )

    docs: List[Dict[str, Any]] = []
    rows: List[pd.Series] = []
    for i in range(45):
        docs.append({"tenant_id": "162", "spu_id": str(i)})
        rows.append(pd.Series({"id": i, "title": f"title-{i}"}))

    transformer.fill_llm_attributes_batch(docs, rows)

    assert seen_calls == [{"n": 45, "tenant_id": "162", "category_taxonomy_profile": "apparel"}]

    assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"]
    assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
    assert docs[0]["enriched_tags"]["zh"] == ["t1", "t2"]
    assert docs[0]["enriched_tags"]["en"] == ["t1", "t2"]
    assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"]
    assert {
        "name": "Product Type",
        "value": {"zh": ["连衣裙"], "en": ["dress"]},
    } in docs[0]["enriched_taxonomy_attributes"]