from __future__ import annotations from typing import Any, Dict, List import pandas as pd from indexer.document_transformer import SPUDocumentTransformer def test_fill_llm_attributes_batch_calls_analyze_in_batches(monkeypatch): seen_calls: List[Dict[str, Any]] = [] def _fake_analyze_products(products, target_lang="zh", batch_size=None, tenant_id=None): # should always request batch_size=20 and pass full list; internal splitter handles >20 seen_calls.append( { "n": len(products), "target_lang": target_lang, "batch_size": batch_size, "tenant_id": tenant_id, } ) return [ { "id": p["id"], "lang": target_lang, "title_input": p["title"], "tags": "t1,t2", "anchor_text": f"{target_lang}-anchor-{p['id']}", } for p in products ] import indexer.document_transformer as doc_tr monkeypatch.setattr(doc_tr, "analyze_products", _fake_analyze_products) transformer = SPUDocumentTransformer( category_id_to_name={}, searchable_option_dimensions=[], tenant_config={"index_languages": ["zh", "en"], "primary_language": "zh"}, translator=None, encoder=None, enable_title_embedding=False, image_encoder=None, enable_image_embedding=False, ) docs: List[Dict[str, Any]] = [] rows: List[pd.Series] = [] for i in range(45): docs.append({"tenant_id": "162", "spu_id": str(i)}) rows.append(pd.Series({"id": i, "title": f"title-{i}"})) transformer.fill_llm_attributes_batch(docs, rows) # called once per language, with full list; analyze_products handles splitting assert seen_calls == [ {"n": 45, "target_lang": "zh", "batch_size": 20, "tenant_id": "162"}, {"n": 45, "target_lang": "en", "batch_size": 20, "tenant_id": "162"}, ] assert docs[0]["qanchors"]["zh"] == "zh-anchor-0" assert docs[0]["qanchors"]["en"] == "en-anchor-0"