Commit 80f1e0367222d368749ac83c730c55467bbae9ea

Authored by tangwang
1 parent 42024409

enriched_attributes 现在会按 name 聚合,同名项下的 value.zh / value.en

都会合并成数组,和 qanchors / enriched_tags
的处理方式保持一致,更符合你现在这套 ES mapping 的灌入方式。ES 的 text
字段本身支持数组,所以像 value.zh: ["舒适", "无鞋带设计"]
这种写法是可以正常入库的;nested
只是外层对象数组,不影响内部语言字段存数组。
indexer/product_enrich.py
@@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value: @@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value:
203 target[lang] = merged 203 target[lang] = merged
204 204
205 205
206 -def _append_enriched_attribute( 206 +def _get_or_create_named_value_entry(
  207 + target: List[Dict[str, Any]],
  208 + name: str,
  209 + *,
  210 + default_value: Optional[Dict[str, Any]] = None,
  211 +) -> Dict[str, Any]:
  212 + for item in target:
  213 + if item.get("name") == name:
  214 + value = item.get("value")
  215 + if isinstance(value, dict):
  216 + return item
  217 + break
  218 +
  219 + entry = {"name": name, "value": default_value or {}}
  220 + target.append(entry)
  221 + return entry
  222 +
  223 +
  224 +def _append_named_lang_phrase_map(
207 target: List[Dict[str, Any]], 225 target: List[Dict[str, Any]],
208 name: str, 226 name: str,
209 lang: str, 227 lang: str,
210 raw_value: Any, 228 raw_value: Any,
211 ) -> None: 229 ) -> None:
212 - for value in split_multi_value_field(raw_value):  
213 - if any(  
214 - item.get("name") == name  
215 - and isinstance(item.get("value"), dict)  
216 - and item["value"].get(lang) == value  
217 - for item in target  
218 - ):  
219 - continue  
220 - target.append({"name": name, "value": {lang: value}}) 230 + entry = _get_or_create_named_value_entry(target, name=name, default_value={})
  231 + _append_lang_phrase_map(entry["value"], lang=lang, raw_value=raw_value)
221 232
222 233
223 def _get_product_id(product: Dict[str, Any]) -> str: 234 def _get_product_id(product: Dict[str, Any]) -> str:
@@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: @@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang:
306 raw = _get_analysis_field_value(row, source_name) 317 raw = _get_analysis_field_value(row, source_name)
307 if not raw: 318 if not raw:
308 continue 319 continue
309 - _append_enriched_attribute( 320 + _append_named_lang_phrase_map(
310 result["enriched_attributes"], 321 result["enriched_attributes"],
311 name=output_name, 322 name=output_name,
312 lang=lang, 323 lang=lang,
tests/ci/test_service_api_contracts.py
@@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch @@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
356 }, 356 },
357 "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]}, 357 "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]},
358 "enriched_attributes": [ 358 "enriched_attributes": [
359 - {"name": "enriched_tags", "value": {"zh": "tag1"}},  
360 - {"name": "enriched_tags", "value": {"en": "tag1"}}, 359 + {"name": "enriched_tags", "value": {"zh": ["tag1"], "en": ["tag1"]}},
361 ], 360 ],
362 } 361 }
363 for p in items 362 for p in items
@@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch @@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
387 assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"] 386 assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"]
388 assert data["results"][0]["enriched_attributes"][0] == { 387 assert data["results"][0]["enriched_attributes"][0] == {
389 "name": "enriched_tags", 388 "name": "enriched_tags",
390 - "value": {"zh": "tag1"}, 389 + "value": {"zh": ["tag1"], "en": ["tag1"]},
391 } 390 }
392 391
393 392
tests/test_llm_enrichment_batch_fill.py
@@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): @@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
21 }, 21 },
22 "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]}, 22 "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
23 "enriched_attributes": [ 23 "enriched_attributes": [
24 - {"name": "tags", "value": {"zh": "t1"}},  
25 - {"name": "tags", "value": {"en": "t1"}}, 24 + {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}},
26 ], 25 ],
27 } 26 }
28 for item in items 27 for item in items
@@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): @@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
57 assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] 56 assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
58 assert docs[0]["tags"]["zh"] == ["t1", "t2"] 57 assert docs[0]["tags"]["zh"] == ["t1", "t2"]
59 assert docs[0]["tags"]["en"] == ["t1", "t2"] 58 assert docs[0]["tags"]["en"] == ["t1", "t2"]
60 - assert {"name": "tags", "value": {"zh": "t1"}} in docs[0]["enriched_attributes"]  
61 - assert {"name": "tags", "value": {"en": "t1"}} in docs[0]["enriched_attributes"] 59 + assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"]
tests/test_product_enrich_partial_mode.py
@@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() @@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output()
414 "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, 414 "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]},
415 "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]}, 415 "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]},
416 "enriched_attributes": [ 416 "enriched_attributes": [
417 - {"name": "enriched_tags", "value": {"zh": "zh-tag1"}},  
418 - {"name": "enriched_tags", "value": {"zh": "zh-tag2"}},  
419 - {"name": "target_audience", "value": {"zh": "zh-audience"}},  
420 - {"name": "enriched_tags", "value": {"en": "en-tag1"}},  
421 - {"name": "enriched_tags", "value": {"en": "en-tag2"}},  
422 - {"name": "target_audience", "value": {"en": "en-audience"}}, 417 + {
  418 + "name": "enriched_tags",
  419 + "value": {
  420 + "zh": ["zh-tag1", "zh-tag2"],
  421 + "en": ["en-tag1", "en-tag2"],
  422 + },
  423 + },
  424 + {"name": "target_audience", "value": {"zh": ["zh-audience"], "en": ["en-audience"]}},
423 ], 425 ],
424 } 426 }
425 ] 427 ]