Commit 80f1e0367222d368749ac83c730c55467bbae9ea

Authored by tangwang
1 parent 42024409

enriched_attributes 现在会按 name 聚合,同名项下的 value.zh / value.en

都会合并成数组,和 qanchors / enriched_tags
的处理方式保持一致,更符合你现在这套 ES mapping 的灌入方式。ES 的 text
字段本身支持数组,所以像 value.zh: ["舒适", "无鞋带设计"]
这种写法是可以正常入库的;nested
只是外层对象数组,不影响内部语言字段存数组。
indexer/product_enrich.py
... ... @@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value:
203 203 target[lang] = merged
204 204  
205 205  
206   -def _append_enriched_attribute(
  206 +def _get_or_create_named_value_entry(
  207 + target: List[Dict[str, Any]],
  208 + name: str,
  209 + *,
  210 + default_value: Optional[Dict[str, Any]] = None,
  211 +) -> Dict[str, Any]:
  212 + for item in target:
  213 + if item.get("name") == name:
  214 + value = item.get("value")
  215 + if isinstance(value, dict):
  216 + return item
  217 + break
  218 +
  219 + entry = {"name": name, "value": default_value or {}}
  220 + target.append(entry)
  221 + return entry
  222 +
  223 +
  224 +def _append_named_lang_phrase_map(
207 225 target: List[Dict[str, Any]],
208 226 name: str,
209 227 lang: str,
210 228 raw_value: Any,
211 229 ) -> None:
212   - for value in split_multi_value_field(raw_value):
213   - if any(
214   - item.get("name") == name
215   - and isinstance(item.get("value"), dict)
216   - and item["value"].get(lang) == value
217   - for item in target
218   - ):
219   - continue
220   - target.append({"name": name, "value": {lang: value}})
  230 + entry = _get_or_create_named_value_entry(target, name=name, default_value={})
  231 + _append_lang_phrase_map(entry["value"], lang=lang, raw_value=raw_value)
221 232  
222 233  
223 234 def _get_product_id(product: Dict[str, Any]) -> str:
... ... @@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang:
306 317 raw = _get_analysis_field_value(row, source_name)
307 318 if not raw:
308 319 continue
309   - _append_enriched_attribute(
  320 + _append_named_lang_phrase_map(
310 321 result["enriched_attributes"],
311 322 name=output_name,
312 323 lang=lang,
... ...
tests/ci/test_service_api_contracts.py
... ... @@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
356 356 },
357 357 "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]},
358 358 "enriched_attributes": [
359   - {"name": "enriched_tags", "value": {"zh": "tag1"}},
360   - {"name": "enriched_tags", "value": {"en": "tag1"}},
  359 + {"name": "enriched_tags", "value": {"zh": ["tag1"], "en": ["tag1"]}},
361 360 ],
362 361 }
363 362 for p in items
... ... @@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
387 386 assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"]
388 387 assert data["results"][0]["enriched_attributes"][0] == {
389 388 "name": "enriched_tags",
390   - "value": {"zh": "tag1"},
  389 + "value": {"zh": ["tag1"], "en": ["tag1"]},
391 390 }
392 391  
393 392  
... ...
tests/test_llm_enrichment_batch_fill.py
... ... @@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
21 21 },
22 22 "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
23 23 "enriched_attributes": [
24   - {"name": "tags", "value": {"zh": "t1"}},
25   - {"name": "tags", "value": {"en": "t1"}},
  24 + {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}},
26 25 ],
27 26 }
28 27 for item in items
... ... @@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
57 56 assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
58 57 assert docs[0]["tags"]["zh"] == ["t1", "t2"]
59 58 assert docs[0]["tags"]["en"] == ["t1", "t2"]
60   - assert {"name": "tags", "value": {"zh": "t1"}} in docs[0]["enriched_attributes"]
61   - assert {"name": "tags", "value": {"en": "t1"}} in docs[0]["enriched_attributes"]
  59 + assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"]
... ...
tests/test_product_enrich_partial_mode.py
... ... @@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output()
414 414 "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]},
415 415 "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]},
416 416 "enriched_attributes": [
417   - {"name": "enriched_tags", "value": {"zh": "zh-tag1"}},
418   - {"name": "enriched_tags", "value": {"zh": "zh-tag2"}},
419   - {"name": "target_audience", "value": {"zh": "zh-audience"}},
420   - {"name": "enriched_tags", "value": {"en": "en-tag1"}},
421   - {"name": "enriched_tags", "value": {"en": "en-tag2"}},
422   - {"name": "target_audience", "value": {"en": "en-audience"}},
  417 + {
  418 + "name": "enriched_tags",
  419 + "value": {
  420 + "zh": ["zh-tag1", "zh-tag2"],
  421 + "en": ["en-tag1", "en-tag2"],
  422 + },
  423 + },
  424 + {"name": "target_audience", "value": {"zh": ["zh-audience"], "en": ["en-audience"]}},
423 425 ],
424 426 }
425 427 ]
... ...