Commit 80f1e0367222d368749ac83c730c55467bbae9ea
1 parent
42024409
enriched_attributes 现在会按 name 聚合,同名项下的 value.zh / value.en
都会合并成数组,和 qanchors / enriched_tags 的处理方式保持一致,更符合你现在这套 ES mapping 的灌入方式。ES 的 text 字段本身支持数组,所以像 value.zh: ["舒适", "无鞋带设计"] 这种写法是可以正常入库的;nested 只是外层对象数组,不影响内部语言字段存数组。
Showing
4 changed files
with
34 additions
and
24 deletions
Show diff stats
indexer/product_enrich.py
| ... | ... | @@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value: |
| 203 | 203 | target[lang] = merged |
| 204 | 204 | |
| 205 | 205 | |
| 206 | -def _append_enriched_attribute( | |
| 206 | +def _get_or_create_named_value_entry( | |
| 207 | + target: List[Dict[str, Any]], | |
| 208 | + name: str, | |
| 209 | + *, | |
| 210 | + default_value: Optional[Dict[str, Any]] = None, | |
| 211 | +) -> Dict[str, Any]: | |
| 212 | + for item in target: | |
| 213 | + if item.get("name") == name: | |
| 214 | + value = item.get("value") | |
| 215 | + if isinstance(value, dict): | |
| 216 | + return item | |
| 217 | + break | |
| 218 | + | |
| 219 | + entry = {"name": name, "value": default_value or {}} | |
| 220 | + target.append(entry) | |
| 221 | + return entry | |
| 222 | + | |
| 223 | + | |
| 224 | +def _append_named_lang_phrase_map( | |
| 207 | 225 | target: List[Dict[str, Any]], |
| 208 | 226 | name: str, |
| 209 | 227 | lang: str, |
| 210 | 228 | raw_value: Any, |
| 211 | 229 | ) -> None: |
| 212 | - for value in split_multi_value_field(raw_value): | |
| 213 | - if any( | |
| 214 | - item.get("name") == name | |
| 215 | - and isinstance(item.get("value"), dict) | |
| 216 | - and item["value"].get(lang) == value | |
| 217 | - for item in target | |
| 218 | - ): | |
| 219 | - continue | |
| 220 | - target.append({"name": name, "value": {lang: value}}) | |
| 230 | + entry = _get_or_create_named_value_entry(target, name=name, default_value={}) | |
| 231 | + _append_lang_phrase_map(entry["value"], lang=lang, raw_value=raw_value) | |
| 221 | 232 | |
| 222 | 233 | |
| 223 | 234 | def _get_product_id(product: Dict[str, Any]) -> str: |
| ... | ... | @@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: |
| 306 | 317 | raw = _get_analysis_field_value(row, source_name) |
| 307 | 318 | if not raw: |
| 308 | 319 | continue |
| 309 | - _append_enriched_attribute( | |
| 320 | + _append_named_lang_phrase_map( | |
| 310 | 321 | result["enriched_attributes"], |
| 311 | 322 | name=output_name, |
| 312 | 323 | lang=lang, | ... | ... |
tests/ci/test_service_api_contracts.py
| ... | ... | @@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch |
| 356 | 356 | }, |
| 357 | 357 | "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]}, |
| 358 | 358 | "enriched_attributes": [ |
| 359 | - {"name": "enriched_tags", "value": {"zh": "tag1"}}, | |
| 360 | - {"name": "enriched_tags", "value": {"en": "tag1"}}, | |
| 359 | + {"name": "enriched_tags", "value": {"zh": ["tag1"], "en": ["tag1"]}}, | |
| 361 | 360 | ], |
| 362 | 361 | } |
| 363 | 362 | for p in items |
| ... | ... | @@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch |
| 387 | 386 | assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"] |
| 388 | 387 | assert data["results"][0]["enriched_attributes"][0] == { |
| 389 | 388 | "name": "enriched_tags", |
| 390 | - "value": {"zh": "tag1"}, | |
| 389 | + "value": {"zh": ["tag1"], "en": ["tag1"]}, | |
| 391 | 390 | } |
| 392 | 391 | |
| 393 | 392 | ... | ... |
tests/test_llm_enrichment_batch_fill.py
| ... | ... | @@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): |
| 21 | 21 | }, |
| 22 | 22 | "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]}, |
| 23 | 23 | "enriched_attributes": [ |
| 24 | - {"name": "tags", "value": {"zh": "t1"}}, | |
| 25 | - {"name": "tags", "value": {"en": "t1"}}, | |
| 24 | + {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}}, | |
| 26 | 25 | ], |
| 27 | 26 | } |
| 28 | 27 | for item in items |
| ... | ... | @@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): |
| 57 | 56 | assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] |
| 58 | 57 | assert docs[0]["tags"]["zh"] == ["t1", "t2"] |
| 59 | 58 | assert docs[0]["tags"]["en"] == ["t1", "t2"] |
| 60 | - assert {"name": "tags", "value": {"zh": "t1"}} in docs[0]["enriched_attributes"] | |
| 61 | - assert {"name": "tags", "value": {"en": "t1"}} in docs[0]["enriched_attributes"] | |
| 59 | + assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"] | ... | ... |
tests/test_product_enrich_partial_mode.py
| ... | ... | @@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() |
| 414 | 414 | "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, |
| 415 | 415 | "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]}, |
| 416 | 416 | "enriched_attributes": [ |
| 417 | - {"name": "enriched_tags", "value": {"zh": "zh-tag1"}}, | |
| 418 | - {"name": "enriched_tags", "value": {"zh": "zh-tag2"}}, | |
| 419 | - {"name": "target_audience", "value": {"zh": "zh-audience"}}, | |
| 420 | - {"name": "enriched_tags", "value": {"en": "en-tag1"}}, | |
| 421 | - {"name": "enriched_tags", "value": {"en": "en-tag2"}}, | |
| 422 | - {"name": "target_audience", "value": {"en": "en-audience"}}, | |
| 417 | + { | |
| 418 | + "name": "enriched_tags", | |
| 419 | + "value": { | |
| 420 | + "zh": ["zh-tag1", "zh-tag2"], | |
| 421 | + "en": ["en-tag1", "en-tag2"], | |
| 422 | + }, | |
| 423 | + }, | |
| 424 | + {"name": "target_audience", "value": {"zh": ["zh-audience"], "en": ["en-audience"]}}, | |
| 423 | 425 | ], |
| 424 | 426 | } |
| 425 | 427 | ] | ... | ... |