Commit 80f1e0367222d368749ac83c730c55467bbae9ea
1 parent
42024409
enriched_attributes 现在会按 name 聚合,同名项下的 value.zh / value.en
都会合并成数组,和 qanchors / enriched_tags 的处理方式保持一致,更符合你现在这套 ES mapping 的灌入方式。ES 的 text 字段本身支持数组,所以像 value.zh: ["舒适", "无鞋带设计"] 这种写法是可以正常入库的;nested 只是外层对象数组,不影响内部语言字段存数组。
Showing
4 changed files
with
34 additions
and
24 deletions
Show diff stats
indexer/product_enrich.py
| @@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value: | @@ -203,21 +203,32 @@ def _append_lang_phrase_map(target: Dict[str, List[str]], lang: str, raw_value: | ||
| 203 | target[lang] = merged | 203 | target[lang] = merged |
| 204 | 204 | ||
| 205 | 205 | ||
| 206 | -def _append_enriched_attribute( | 206 | +def _get_or_create_named_value_entry( |
| 207 | + target: List[Dict[str, Any]], | ||
| 208 | + name: str, | ||
| 209 | + *, | ||
| 210 | + default_value: Optional[Dict[str, Any]] = None, | ||
| 211 | +) -> Dict[str, Any]: | ||
| 212 | + for item in target: | ||
| 213 | + if item.get("name") == name: | ||
| 214 | + value = item.get("value") | ||
| 215 | + if isinstance(value, dict): | ||
| 216 | + return item | ||
| 217 | + break | ||
| 218 | + | ||
| 219 | + entry = {"name": name, "value": default_value or {}} | ||
| 220 | + target.append(entry) | ||
| 221 | + return entry | ||
| 222 | + | ||
| 223 | + | ||
| 224 | +def _append_named_lang_phrase_map( | ||
| 207 | target: List[Dict[str, Any]], | 225 | target: List[Dict[str, Any]], |
| 208 | name: str, | 226 | name: str, |
| 209 | lang: str, | 227 | lang: str, |
| 210 | raw_value: Any, | 228 | raw_value: Any, |
| 211 | ) -> None: | 229 | ) -> None: |
| 212 | - for value in split_multi_value_field(raw_value): | ||
| 213 | - if any( | ||
| 214 | - item.get("name") == name | ||
| 215 | - and isinstance(item.get("value"), dict) | ||
| 216 | - and item["value"].get(lang) == value | ||
| 217 | - for item in target | ||
| 218 | - ): | ||
| 219 | - continue | ||
| 220 | - target.append({"name": name, "value": {lang: value}}) | 230 | + entry = _get_or_create_named_value_entry(target, name=name, default_value={}) |
| 231 | + _append_lang_phrase_map(entry["value"], lang=lang, raw_value=raw_value) | ||
| 221 | 232 | ||
| 222 | 233 | ||
| 223 | def _get_product_id(product: Dict[str, Any]) -> str: | 234 | def _get_product_id(product: Dict[str, Any]) -> str: |
| @@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: | @@ -306,7 +317,7 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang: | ||
| 306 | raw = _get_analysis_field_value(row, source_name) | 317 | raw = _get_analysis_field_value(row, source_name) |
| 307 | if not raw: | 318 | if not raw: |
| 308 | continue | 319 | continue |
| 309 | - _append_enriched_attribute( | 320 | + _append_named_lang_phrase_map( |
| 310 | result["enriched_attributes"], | 321 | result["enriched_attributes"], |
| 311 | name=output_name, | 322 | name=output_name, |
| 312 | lang=lang, | 323 | lang=lang, |
tests/ci/test_service_api_contracts.py
| @@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch | @@ -356,8 +356,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch | ||
| 356 | }, | 356 | }, |
| 357 | "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]}, | 357 | "enriched_tags": {"zh": ["tag1", "tag2"], "en": ["tag1", "tag2"]}, |
| 358 | "enriched_attributes": [ | 358 | "enriched_attributes": [ |
| 359 | - {"name": "enriched_tags", "value": {"zh": "tag1"}}, | ||
| 360 | - {"name": "enriched_tags", "value": {"en": "tag1"}}, | 359 | + {"name": "enriched_tags", "value": {"zh": ["tag1"], "en": ["tag1"]}}, |
| 361 | ], | 360 | ], |
| 362 | } | 361 | } |
| 363 | for p in items | 362 | for p in items |
| @@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch | @@ -387,7 +386,7 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch | ||
| 387 | assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"] | 386 | assert data["results"][0]["enriched_tags"]["en"] == ["tag1", "tag2"] |
| 388 | assert data["results"][0]["enriched_attributes"][0] == { | 387 | assert data["results"][0]["enriched_attributes"][0] == { |
| 389 | "name": "enriched_tags", | 388 | "name": "enriched_tags", |
| 390 | - "value": {"zh": "tag1"}, | 389 | + "value": {"zh": ["tag1"], "en": ["tag1"]}, |
| 391 | } | 390 | } |
| 392 | 391 | ||
| 393 | 392 |
tests/test_llm_enrichment_batch_fill.py
| @@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | @@ -21,8 +21,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | ||
| 21 | }, | 21 | }, |
| 22 | "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]}, | 22 | "tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]}, |
| 23 | "enriched_attributes": [ | 23 | "enriched_attributes": [ |
| 24 | - {"name": "tags", "value": {"zh": "t1"}}, | ||
| 25 | - {"name": "tags", "value": {"en": "t1"}}, | 24 | + {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}}, |
| 26 | ], | 25 | ], |
| 27 | } | 26 | } |
| 28 | for item in items | 27 | for item in items |
| @@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | @@ -57,5 +56,4 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | ||
| 57 | assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] | 56 | assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] |
| 58 | assert docs[0]["tags"]["zh"] == ["t1", "t2"] | 57 | assert docs[0]["tags"]["zh"] == ["t1", "t2"] |
| 59 | assert docs[0]["tags"]["en"] == ["t1", "t2"] | 58 | assert docs[0]["tags"]["en"] == ["t1", "t2"] |
| 60 | - assert {"name": "tags", "value": {"zh": "t1"}} in docs[0]["enriched_attributes"] | ||
| 61 | - assert {"name": "tags", "value": {"en": "t1"}} in docs[0]["enriched_attributes"] | 59 | + assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"] |
tests/test_product_enrich_partial_mode.py
| @@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() | @@ -414,12 +414,14 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() | ||
| 414 | "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, | 414 | "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, |
| 415 | "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]}, | 415 | "enriched_tags": {"zh": ["zh-tag1", "zh-tag2"], "en": ["en-tag1", "en-tag2"]}, |
| 416 | "enriched_attributes": [ | 416 | "enriched_attributes": [ |
| 417 | - {"name": "enriched_tags", "value": {"zh": "zh-tag1"}}, | ||
| 418 | - {"name": "enriched_tags", "value": {"zh": "zh-tag2"}}, | ||
| 419 | - {"name": "target_audience", "value": {"zh": "zh-audience"}}, | ||
| 420 | - {"name": "enriched_tags", "value": {"en": "en-tag1"}}, | ||
| 421 | - {"name": "enriched_tags", "value": {"en": "en-tag2"}}, | ||
| 422 | - {"name": "target_audience", "value": {"en": "en-audience"}}, | 417 | + { |
| 418 | + "name": "enriched_tags", | ||
| 419 | + "value": { | ||
| 420 | + "zh": ["zh-tag1", "zh-tag2"], | ||
| 421 | + "en": ["en-tag1", "en-tag2"], | ||
| 422 | + }, | ||
| 423 | + }, | ||
| 424 | + {"name": "target_audience", "value": {"zh": ["zh-audience"], "en": ["en-audience"]}}, | ||
| 423 | ], | 425 | ], |
| 424 | } | 426 | } |
| 425 | ] | 427 | ] |