diff --git a/indexer/product_enrich.py b/indexer/product_enrich.py index 4611005..c3397fb 100644 --- a/indexer/product_enrich.py +++ b/indexer/product_enrich.py @@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value: target[lang] = merged -def _append_enriched_attribute( +def _get_or_create_named_value_entry( + target: List[Dict[str, Any]], + name: str, + *, + default_value: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + for item in target: + if item.get("name") == name: + value = item.get("value") + if isinstance(value, dict): + return item + break + + entry = {"name": name, "value": default_value or {}} + target.append(entry) + return entry + + +def _append_named_lang_phrase_map( target: List[Dict[str, Any]], name: str, lang: str, raw_value: Any, ) -> None: - for value in split_multi_value_field(raw_value): - if any( - item.get("name") == name - and isinstance(item.get("value"), dict) - and item["value"].get(lang) == value - for item in target - ): - continue - target.append({"name": name, "value": {lang: value}}) + entry = _get_or_create_named_value_entry(target, name=name, default_value={}) + _append_lang_phrase_map(entry["value"], lang=lang, raw_value=raw_value) def _get_product_id(product: Dict[str, Any]) -> str: @@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: raw = _get_analysis_field_value(row, source_name) if not raw: continue - _append_enriched_attribute( + _append_named_lang_phrase_map( result["enriched_attributes"], name=output_name, lang=lang, diff --git a/tests/ci/test_service_api_contracts.py b/tests/ci/test_service_api_contracts.py index 13ee04a..259b0af 100644 --- a/tests/ci/test_service_api_contracts.py +++ b/tests/ci/test_service_api_contracts.py @@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch }, "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]}, "enriched_attributes": [ - {"name": "enriched_tags", "value": {"zh": "tag1"}}, - {"name": "enriched_tags", "value": {"en": "tag1"}}, + {"name": "enriched_tags", "value": {"zh": ["tag1"], "en": ["tag1"]}}, ], } for p in items @@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"] assert data["results"][0]["enriched_attributes"][0] == { "name": "enriched_tags", - "value": {"zh": "tag1"}, + "value": {"zh": ["tag1"], "en": ["tag1"]}, } diff --git a/tests/test_llm_enrichment_batch_fill.py b/tests/test_llm_enrichment_batch_fill.py index 3e05e82..8cb006e 100644 --- a/tests/test_llm_enrichment_batch_fill.py +++ b/tests/test_llm_enrichment_batch_fill.py @@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): }, "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]}, "enriched_attributes": [ - {"name": "tags", "value": {"zh": "t1"}}, - {"name": "tags", "value": {"en": "t1"}}, + {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}}, ], } for item in items @@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] assert docs[0]["tags"]["zh"] == ["t1", "t2"] assert docs[0]["tags"]["en"] == ["t1", "t2"] - assert {"name": "tags", "value": {"zh": "t1"}} in docs[0]["enriched_attributes"] - assert {"name": "tags", "value": {"en": "t1"}} in docs[0]["enriched_attributes"] + assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"] diff --git a/tests/test_product_enrich_partial_mode.py b/tests/test_product_enrich_partial_mode.py index cb00eec..756cfbb 100644 --- a/tests/test_product_enrich_partial_mode.py +++ b/tests/test_product_enrich_partial_mode.py @@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]}, "enriched_attributes": [ - {"name": "enriched_tags", "value": {"zh": "zh-tag1"}}, - {"name": "enriched_tags", "value": {"zh": "zh-tag2"}}, - {"name": "target_audience", "value": {"zh": "zh-audience"}}, - {"name": "enriched_tags", "value": {"en": "en-tag1"}}, - {"name": "enriched_tags", "value": {"en": "en-tag2"}}, - {"name": "target_audience", "value": {"en": "en-audience"}}, + { + "name": "enriched_tags", + "value": { + "zh": ["zh-tag1", "zh-tag2"], + "en": ["en-tag1", "en-tag2"], + }, + }, + {"name": "target_audience", "value": {"zh": ["zh-audience"], "en": ["en-audience"]}}, ], } ] -- libgit2 0.21.2