Commit 048631be73b69068b8fc2d139a1a9f769704e34a
1 parent
dabd52a5
1. 新增说明文档《product_enrich模块说明.md》
2. 删掉自动推断 taxonomy profile的逻辑,build_index_content_fields() 3. 所有 taxonomy profile 都输出 zh/en”,并把按行业切语言的逻辑去掉 只接受显式传入的 category_taxonomy_profile
Showing
10 changed files
with
427 additions
and
409 deletions
Show diff stats
config/config.yaml
| @@ -114,6 +114,7 @@ field_boosts: | @@ -114,6 +114,7 @@ field_boosts: | ||
| 114 | qanchors: 1.0 | 114 | qanchors: 1.0 |
| 115 | enriched_tags: 1.0 | 115 | enriched_tags: 1.0 |
| 116 | enriched_attributes.value: 1.5 | 116 | enriched_attributes.value: 1.5 |
| 117 | + enriched_taxonomy_attributes.value: 0.3 | ||
| 117 | category_name_text: 2.0 | 118 | category_name_text: 2.0 |
| 118 | category_path: 2.0 | 119 | category_path: 2.0 |
| 119 | keywords: 2.0 | 120 | keywords: 2.0 |
| @@ -194,6 +195,7 @@ query_config: | @@ -194,6 +195,7 @@ query_config: | ||
| 194 | - qanchors | 195 | - qanchors |
| 195 | - enriched_tags | 196 | - enriched_tags |
| 196 | - enriched_attributes.value | 197 | - enriched_attributes.value |
| 198 | + - enriched_taxonomy_attributes.value | ||
| 197 | - option1_values | 199 | - option1_values |
| 198 | - option2_values | 200 | - option2_values |
| 199 | - option3_values | 201 | - option3_values |
| @@ -252,6 +254,7 @@ query_config: | @@ -252,6 +254,7 @@ query_config: | ||
| 252 | # - qanchors | 254 | # - qanchors |
| 253 | # - enriched_tags | 255 | # - enriched_tags |
| 254 | # - enriched_attributes | 256 | # - enriched_attributes |
| 257 | + # - enriched_taxonomy_attributes.value | ||
| 255 | - min_price | 258 | - min_price |
| 256 | - compare_at_price | 259 | - compare_at_price |
| 257 | - image_url | 260 | - image_url |
docs/搜索API对接指南-05-索引接口(Indexer).md
| @@ -668,9 +668,9 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | @@ -668,9 +668,9 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | ||
| 668 | - `others` | 668 | - `others` |
| 669 | 669 | ||
| 670 | 说明: | 670 | 说明: |
| 671 | -- `apparel` 仍返回 `zh` + `en` 两种 taxonomy 值。 | ||
| 672 | -- 其余 profile 的 `enriched_taxonomy_attributes.value` 只返回 `en`,以控制字段体积并保持结构简单。 | ||
| 673 | -- Indexer 内部构建 ES 文档时,如果调用链没有显式指定 profile,会优先根据商品的类目字段自动推断 taxonomy profile;外部调用 `/indexer/enrich-content` 时仍以请求中的 `category_taxonomy_profile` 为准。 | 671 | +- 所有 profile 的 `enriched_taxonomy_attributes.value` 都统一返回 `zh` + `en`。 |
| 672 | +- 外部调用 `/indexer/enrich-content` 时,以请求中的 `category_taxonomy_profile` 为准。 | ||
| 673 | +- 当前 Indexer 内部构建 ES 文档时,taxonomy profile 暂时固定使用 `apparel`;代码里已保留 TODO,后续从数据库读取该租户真实所属行业后再替换。 | ||
| 674 | 674 | ||
| 675 | #### 请求参数 | 675 | #### 请求参数 |
| 676 | 676 | ||
| @@ -726,8 +726,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | @@ -726,8 +726,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | ||
| 726 | 726 | ||
| 727 | - 接口不接受语言控制参数。 | 727 | - 接口不接受语言控制参数。 |
| 728 | - 返回哪些语言、返回哪些语义维度,统一由 `indexer.product_enrich` 内部逻辑决定。 | 728 | - 返回哪些语言、返回哪些语义维度,统一由 `indexer.product_enrich` 内部逻辑决定。 |
| 729 | -- 当前为了与 `search_products` mapping 对齐,通用增强字段只包含核心索引语言 `zh`、`en`。 | ||
| 730 | -- taxonomy 字段中,`apparel` 返回 `zh`、`en`;其他 profile 仅返回 `en`。 | 729 | +- 当前为了与 `search_products` mapping 对齐,通用增强字段与 taxonomy 字段都统一只返回核心索引语言 `zh`、`en`。 |
| 731 | 730 | ||
| 732 | 批量请求建议: | 731 | 批量请求建议: |
| 733 | - **全量**:强烈建议 尽可能 **20 个 SPU/doc** 攒成一个批次后再请求一次。 | 732 | - **全量**:强烈建议 尽可能 **20 个 SPU/doc** 攒成一个批次后再请求一次。 |
| @@ -787,7 +786,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | @@ -787,7 +786,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | ||
| 787 | | `results[].qanchors` | object | 与 ES `qanchors` 字段同结构,按语言键返回短语数组 | | 786 | | `results[].qanchors` | object | 与 ES `qanchors` 字段同结构,按语言键返回短语数组 | |
| 788 | | `results[].enriched_tags` | object | 与 ES `enriched_tags` 字段同结构,按语言键返回标签数组 | | 787 | | `results[].enriched_tags` | object | 与 ES `enriched_tags` 字段同结构,按语言键返回标签数组 | |
| 789 | | `results[].enriched_attributes` | array | 与 ES `enriched_attributes` nested 字段同结构,每项为 `{ "name", "value": { "zh"?: "...", "en"?: "..." } }` | | 788 | | `results[].enriched_attributes` | array | 与 ES `enriched_attributes` nested 字段同结构,每项为 `{ "name", "value": { "zh"?: "...", "en"?: "..." } }` | |
| 790 | -| `results[].enriched_taxonomy_attributes` | array | 与 ES `enriched_taxonomy_attributes` nested 字段同结构。`apparel` 每项通常为 `{ "name", "value": { "zh"?: [...], "en"?: [...] } }`;其他 profile 仅返回 `{ "name", "value": { "en": [...] } }` | | 789 | +| `results[].enriched_taxonomy_attributes` | array | 与 ES `enriched_taxonomy_attributes` nested 字段同结构。每项通常为 `{ "name", "value": { "zh"?: [...], "en"?: [...] } }` | |
| 791 | | `results[].error` | string | 若该条处理失败(如 LLM 异常),会在此字段返回错误信息 | | 790 | | `results[].error` | string | 若该条处理失败(如 LLM 异常),会在此字段返回错误信息 | |
| 792 | 791 | ||
| 793 | **错误响应**: | 792 | **错误响应**: |
docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md
| @@ -444,7 +444,7 @@ curl "http://localhost:6006/health" | @@ -444,7 +444,7 @@ curl "http://localhost:6006/health" | ||
| 444 | 444 | ||
| 445 | - **Base URL**: Indexer 服务地址,如 `http://localhost:6004` | 445 | - **Base URL**: Indexer 服务地址,如 `http://localhost:6004` |
| 446 | - **路径**: `POST /indexer/enrich-content` | 446 | - **路径**: `POST /indexer/enrich-content` |
| 447 | -- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`,用于拼装 ES 文档。支持通过 `enrichment_scopes` 选择执行 `generic` / `category_taxonomy`,并通过 `category_taxonomy_profile` 选择对应大类的 taxonomy prompt/profile;默认执行 `generic + category_taxonomy(apparel)`。当前支持的 taxonomy profile 包括 `apparel`、`3c`、`bags`、`pet_supplies`、`electronics`、`outdoor`、`home_appliances`、`home_living`、`wigs`、`beauty`、`accessories`、`toys`、`shoes`、`sports`、`others`。其中 `apparel` 的 taxonomy 输出为 `zh` + `en`,其余 profile 的 taxonomy 输出仅返回 `en`。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。 | 447 | +- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`,用于拼装 ES 文档。支持通过 `enrichment_scopes` 选择执行 `generic` / `category_taxonomy`,并通过 `category_taxonomy_profile` 选择对应大类的 taxonomy prompt/profile;默认执行 `generic + category_taxonomy(apparel)`。当前支持的 taxonomy profile 包括 `apparel`、`3c`、`bags`、`pet_supplies`、`electronics`、`outdoor`、`home_appliances`、`home_living`、`wigs`、`beauty`、`accessories`、`toys`、`shoes`、`sports`、`others`。所有 profile 的 taxonomy 输出都统一返回 `zh` + `en`,`category_taxonomy_profile` 只决定字段集合。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。 |
| 448 | 448 | ||
| 449 | 请求/响应格式、示例及错误码见 [-05-索引接口(Indexer)](./搜索API对接指南-05-索引接口(Indexer).md#58-内容理解字段生成接口)。 | 449 | 请求/响应格式、示例及错误码见 [-05-索引接口(Indexer)](./搜索API对接指南-05-索引接口(Indexer).md#58-内容理解字段生成接口)。 |
| 450 | 450 |
indexer/document_transformer.py
| @@ -259,13 +259,6 @@ class SPUDocumentTransformer: | @@ -259,13 +259,6 @@ class SPUDocumentTransformer: | ||
| 259 | title = str(row.get("title") or "").strip() | 259 | title = str(row.get("title") or "").strip() |
| 260 | if not spu_id or not title: | 260 | if not spu_id or not title: |
| 261 | continue | 261 | continue |
| 262 | - category_path_obj = docs[i].get("category_path") or {} | ||
| 263 | - resolved_category_path = "" | ||
| 264 | - if isinstance(category_path_obj, dict): | ||
| 265 | - resolved_category_path = next( | ||
| 266 | - (str(value).strip() for value in category_path_obj.values() if str(value).strip()), | ||
| 267 | - "", | ||
| 268 | - ) | ||
| 269 | id_to_idx[spu_id] = i | 262 | id_to_idx[spu_id] = i |
| 270 | items.append( | 263 | items.append( |
| 271 | { | 264 | { |
| @@ -274,9 +267,6 @@ class SPUDocumentTransformer: | @@ -274,9 +267,6 @@ class SPUDocumentTransformer: | ||
| 274 | "brief": str(row.get("brief") or "").strip(), | 267 | "brief": str(row.get("brief") or "").strip(), |
| 275 | "description": str(row.get("description") or "").strip(), | 268 | "description": str(row.get("description") or "").strip(), |
| 276 | "image_url": str(row.get("image_src") or "").strip(), | 269 | "image_url": str(row.get("image_src") or "").strip(), |
| 277 | - "category": str(row.get("category") or "").strip(), | ||
| 278 | - "category_path": resolved_category_path, | ||
| 279 | - "category1_name": str(docs[i].get("category1_name") or "").strip(), | ||
| 280 | } | 270 | } |
| 281 | ) | 271 | ) |
| 282 | if not items: | 272 | if not items: |
| @@ -284,7 +274,12 @@ class SPUDocumentTransformer: | @@ -284,7 +274,12 @@ class SPUDocumentTransformer: | ||
| 284 | 274 | ||
| 285 | tenant_id = str(docs[0].get("tenant_id") or "").strip() or None | 275 | tenant_id = str(docs[0].get("tenant_id") or "").strip() or None |
| 286 | try: | 276 | try: |
| 287 | - results = build_index_content_fields(items=items, tenant_id=tenant_id) | 277 | + # TODO: 从数据库读取该 tenant 的真实行业,并据此替换当前默认的 apparel profile。 |
| 278 | + results = build_index_content_fields( | ||
| 279 | + items=items, | ||
| 280 | + tenant_id=tenant_id, | ||
| 281 | + category_taxonomy_profile="apparel", | ||
| 282 | + ) | ||
| 288 | except Exception as e: | 283 | except Exception as e: |
| 289 | logger.warning("LLM batch attribute fill failed: %s", e) | 284 | logger.warning("LLM batch attribute fill failed: %s", e) |
| 290 | return | 285 | return |
| @@ -679,6 +674,7 @@ class SPUDocumentTransformer: | @@ -679,6 +674,7 @@ class SPUDocumentTransformer: | ||
| 679 | 674 | ||
| 680 | tenant_id = doc.get("tenant_id") | 675 | tenant_id = doc.get("tenant_id") |
| 681 | try: | 676 | try: |
| 677 | + # TODO: 从数据库读取该 tenant 的真实行业,并据此替换当前默认的 apparel profile。 | ||
| 682 | results = build_index_content_fields( | 678 | results = build_index_content_fields( |
| 683 | items=[ | 679 | items=[ |
| 684 | { | 680 | { |
| @@ -687,19 +683,10 @@ class SPUDocumentTransformer: | @@ -687,19 +683,10 @@ class SPUDocumentTransformer: | ||
| 687 | "brief": str(spu_row.get("brief") or "").strip(), | 683 | "brief": str(spu_row.get("brief") or "").strip(), |
| 688 | "description": str(spu_row.get("description") or "").strip(), | 684 | "description": str(spu_row.get("description") or "").strip(), |
| 689 | "image_url": str(spu_row.get("image_src") or "").strip(), | 685 | "image_url": str(spu_row.get("image_src") or "").strip(), |
| 690 | - "category": str(spu_row.get("category") or "").strip(), | ||
| 691 | - "category_path": next( | ||
| 692 | - ( | ||
| 693 | - str(value).strip() | ||
| 694 | - for value in (doc.get("category_path") or {}).values() | ||
| 695 | - if str(value).strip() | ||
| 696 | - ), | ||
| 697 | - "", | ||
| 698 | - ), | ||
| 699 | - "category1_name": str(doc.get("category1_name") or "").strip(), | ||
| 700 | } | 686 | } |
| 701 | ], | 687 | ], |
| 702 | tenant_id=str(tenant_id), | 688 | tenant_id=str(tenant_id), |
| 689 | + category_taxonomy_profile="apparel", | ||
| 703 | ) | 690 | ) |
| 704 | except Exception as e: | 691 | except Exception as e: |
| 705 | logger.warning("LLM attribute fill failed for SPU %s: %s", spu_id, e) | 692 | logger.warning("LLM attribute fill failed for SPU %s: %s", spu_id, e) |
indexer/product_enrich.py
| @@ -18,7 +18,7 @@ from dataclasses import dataclass, field | @@ -18,7 +18,7 @@ from dataclasses import dataclass, field | ||
| 18 | from collections import OrderedDict | 18 | from collections import OrderedDict |
| 19 | from datetime import datetime | 19 | from datetime import datetime |
| 20 | from concurrent.futures import ThreadPoolExecutor | 20 | from concurrent.futures import ThreadPoolExecutor |
| 21 | -from typing import List, Dict, Tuple, Any, Optional | 21 | +from typing import List, Dict, Tuple, Any, Optional, FrozenSet |
| 22 | 22 | ||
| 23 | import redis | 23 | import redis |
| 24 | import requests | 24 | import requests |
| @@ -146,8 +146,22 @@ if _missing_prompt_langs: | @@ -146,8 +146,22 @@ if _missing_prompt_langs: | ||
| 146 | ) | 146 | ) |
| 147 | 147 | ||
| 148 | 148 | ||
| 149 | -# 多值字段分隔:英文逗号、中文逗号、顿号,及历史约定的 ; | / 与空白 | 149 | +# 多值字段分隔 |
| 150 | _MULTI_VALUE_FIELD_SPLIT_RE = re.compile(r"[,、,;|/\n\t]+") | 150 | _MULTI_VALUE_FIELD_SPLIT_RE = re.compile(r"[,、,;|/\n\t]+") |
| 151 | +# 表格单元格中视为「无内容」的占位 | ||
| 152 | +_MARKDOWN_EMPTY_CELL_LITERALS: Tuple[str, ...] = ("-","–", "—", "none", "null", "n/a", "无") | ||
| 153 | +_MARKDOWN_EMPTY_CELL_TOKENS_CF: FrozenSet[str] = frozenset( | ||
| 154 | + lit.casefold() for lit in _MARKDOWN_EMPTY_CELL_LITERALS | ||
| 155 | +) | ||
| 156 | + | ||
| 157 | +def _normalize_markdown_table_cell(raw: Optional[str]) -> str: | ||
| 158 | + """strip;将占位符统一视为空字符串。""" | ||
| 159 | + s = str(raw or "").strip() | ||
| 160 | + if not s: | ||
| 161 | + return "" | ||
| 162 | + if s.casefold() in _MARKDOWN_EMPTY_CELL_TOKENS_CF: | ||
| 163 | + return "" | ||
| 164 | + return s | ||
| 151 | _CORE_INDEX_LANGUAGES = ("zh", "en") | 165 | _CORE_INDEX_LANGUAGES = ("zh", "en") |
| 152 | _DEFAULT_ENRICHMENT_SCOPES = ("generic", "category_taxonomy") | 166 | _DEFAULT_ENRICHMENT_SCOPES = ("generic", "category_taxonomy") |
| 153 | _DEFAULT_CATEGORY_TAXONOMY_PROFILE = "apparel" | 167 | _DEFAULT_CATEGORY_TAXONOMY_PROFILE = "apparel" |
| @@ -195,19 +209,12 @@ class AnalysisSchema: | @@ -195,19 +209,12 @@ class AnalysisSchema: | ||
| 195 | markdown_table_headers: Dict[str, List[str]] | 209 | markdown_table_headers: Dict[str, List[str]] |
| 196 | result_fields: Tuple[str, ...] | 210 | result_fields: Tuple[str, ...] |
| 197 | meaningful_fields: Tuple[str, ...] | 211 | meaningful_fields: Tuple[str, ...] |
| 198 | - output_languages: Tuple[str, ...] = ("zh", "en") | ||
| 199 | cache_version: str = "v1" | 212 | cache_version: str = "v1" |
| 200 | field_aliases: Dict[str, Tuple[str, ...]] = field(default_factory=dict) | 213 | field_aliases: Dict[str, Tuple[str, ...]] = field(default_factory=dict) |
| 201 | - fallback_headers: Optional[List[str]] = None | ||
| 202 | quality_fields: Tuple[str, ...] = () | 214 | quality_fields: Tuple[str, ...] = () |
| 203 | 215 | ||
| 204 | def get_headers(self, target_lang: str) -> Optional[List[str]]: | 216 | def get_headers(self, target_lang: str) -> Optional[List[str]]: |
| 205 | - headers = self.markdown_table_headers.get(target_lang) | ||
| 206 | - if headers: | ||
| 207 | - return headers | ||
| 208 | - if self.fallback_headers: | ||
| 209 | - return self.fallback_headers | ||
| 210 | - return None | 217 | + return self.markdown_table_headers.get(target_lang) |
| 211 | 218 | ||
| 212 | 219 | ||
| 213 | _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { | 220 | _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { |
| @@ -217,7 +224,6 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { | @@ -217,7 +224,6 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { | ||
| 217 | markdown_table_headers=LANGUAGE_MARKDOWN_TABLE_HEADERS, | 224 | markdown_table_headers=LANGUAGE_MARKDOWN_TABLE_HEADERS, |
| 218 | result_fields=_CONTENT_ANALYSIS_RESULT_FIELDS, | 225 | result_fields=_CONTENT_ANALYSIS_RESULT_FIELDS, |
| 219 | meaningful_fields=_CONTENT_ANALYSIS_MEANINGFUL_FIELDS, | 226 | meaningful_fields=_CONTENT_ANALYSIS_MEANINGFUL_FIELDS, |
| 220 | - output_languages=_CORE_INDEX_LANGUAGES, | ||
| 221 | cache_version="v2", | 227 | cache_version="v2", |
| 222 | field_aliases=_CONTENT_ANALYSIS_FIELD_ALIASES, | 228 | field_aliases=_CONTENT_ANALYSIS_FIELD_ALIASES, |
| 223 | quality_fields=_CONTENT_ANALYSIS_QUALITY_FIELDS, | 229 | quality_fields=_CONTENT_ANALYSIS_QUALITY_FIELDS, |
| @@ -225,17 +231,13 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { | @@ -225,17 +231,13 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { | ||
| 225 | } | 231 | } |
| 226 | 232 | ||
| 227 | def _build_taxonomy_profile_schema(profile: str, config: Dict[str, Any]) -> AnalysisSchema: | 233 | def _build_taxonomy_profile_schema(profile: str, config: Dict[str, Any]) -> AnalysisSchema: |
| 228 | - result_fields = tuple(field["key"] for field in config["fields"]) | ||
| 229 | - headers = config["markdown_table_headers"] | ||
| 230 | return AnalysisSchema( | 234 | return AnalysisSchema( |
| 231 | name=f"taxonomy:{profile}", | 235 | name=f"taxonomy:{profile}", |
| 232 | shared_instruction=config["shared_instruction"], | 236 | shared_instruction=config["shared_instruction"], |
| 233 | - markdown_table_headers=headers, | ||
| 234 | - result_fields=result_fields, | ||
| 235 | - meaningful_fields=result_fields, | ||
| 236 | - output_languages=tuple(config["output_languages"]), | 237 | + markdown_table_headers=config["markdown_table_headers"], |
| 238 | + result_fields=tuple(field["key"] for field in config["fields"]), | ||
| 239 | + meaningful_fields=tuple(field["key"] for field in config["fields"]), | ||
| 237 | cache_version="v1", | 240 | cache_version="v1", |
| 238 | - fallback_headers=headers.get("en") if len(headers) > 1 else None, | ||
| 239 | ) | 241 | ) |
| 240 | 242 | ||
| 241 | 243 | ||
| @@ -254,29 +256,6 @@ def get_supported_category_taxonomy_profiles() -> Tuple[str, ...]: | @@ -254,29 +256,6 @@ def get_supported_category_taxonomy_profiles() -> Tuple[str, ...]: | ||
| 254 | return tuple(_CATEGORY_TAXONOMY_PROFILE_SCHEMAS.keys()) | 256 | return tuple(_CATEGORY_TAXONOMY_PROFILE_SCHEMAS.keys()) |
| 255 | 257 | ||
| 256 | 258 | ||
| 257 | -def _normalize_category_hint(text: Any) -> str: | ||
| 258 | - value = str(text or "").strip().lower() | ||
| 259 | - if not value: | ||
| 260 | - return "" | ||
| 261 | - value = value.replace("_", " ").replace(">", " ").replace("/", " ") | ||
| 262 | - value = re.sub(r"\s+", " ", value) | ||
| 263 | - return value | ||
| 264 | - | ||
| 265 | - | ||
| 266 | -_CATEGORY_TAXONOMY_PROFILE_ALIAS_MATCHERS: Tuple[Tuple[str, str], ...] = tuple( | ||
| 267 | - sorted( | ||
| 268 | - ( | ||
| 269 | - (_normalize_category_hint(alias), profile) | ||
| 270 | - for profile, config in CATEGORY_TAXONOMY_PROFILES.items() | ||
| 271 | - for alias in (profile, *tuple(config.get("aliases") or ())) | ||
| 272 | - if _normalize_category_hint(alias) | ||
| 273 | - ), | ||
| 274 | - key=lambda item: len(item[0]), | ||
| 275 | - reverse=True, | ||
| 276 | - ) | ||
| 277 | -) | ||
| 278 | - | ||
| 279 | - | ||
| 280 | def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str] = None) -> str: | 259 | def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str] = None) -> str: |
| 281 | profile = str(category_taxonomy_profile or _DEFAULT_CATEGORY_TAXONOMY_PROFILE).strip() | 260 | profile = str(category_taxonomy_profile or _DEFAULT_CATEGORY_TAXONOMY_PROFILE).strip() |
| 282 | if profile not in _CATEGORY_TAXONOMY_PROFILE_SCHEMAS: | 261 | if profile not in _CATEGORY_TAXONOMY_PROFILE_SCHEMAS: |
| @@ -287,41 +266,6 @@ def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str | @@ -287,41 +266,6 @@ def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str | ||
| 287 | return profile | 266 | return profile |
| 288 | 267 | ||
| 289 | 268 | ||
| 290 | -def detect_category_taxonomy_profile(item: Dict[str, Any]) -> Optional[str]: | ||
| 291 | - """ | ||
| 292 | - 根据商品已有类目信息猜测 taxonomy profile。 | ||
| 293 | - 未命中时返回 None,由上层决定是否回退到默认 profile。 | ||
| 294 | - """ | ||
| 295 | - category_hints = ( | ||
| 296 | - item.get("category_taxonomy_profile"), | ||
| 297 | - item.get("category1_name"), | ||
| 298 | - item.get("category_name_text"), | ||
| 299 | - item.get("category"), | ||
| 300 | - item.get("category_path"), | ||
| 301 | - ) | ||
| 302 | - for hint in category_hints: | ||
| 303 | - normalized_hint = _normalize_category_hint(hint) | ||
| 304 | - if not normalized_hint: | ||
| 305 | - continue | ||
| 306 | - for alias, profile in _CATEGORY_TAXONOMY_PROFILE_ALIAS_MATCHERS: | ||
| 307 | - if alias and alias in normalized_hint: | ||
| 308 | - return profile | ||
| 309 | - return None | ||
| 310 | - | ||
| 311 | - | ||
| 312 | -def _resolve_category_taxonomy_profile( | ||
| 313 | - item: Dict[str, Any], | ||
| 314 | - fallback_profile: Optional[str] = None, | ||
| 315 | -) -> str: | ||
| 316 | - explicit_profile = str(item.get("category_taxonomy_profile") or "").strip() | ||
| 317 | - if explicit_profile: | ||
| 318 | - return _normalize_category_taxonomy_profile(explicit_profile) | ||
| 319 | - detected_profile = detect_category_taxonomy_profile(item) | ||
| 320 | - if detected_profile: | ||
| 321 | - return detected_profile | ||
| 322 | - return _normalize_category_taxonomy_profile(fallback_profile) | ||
| 323 | - | ||
| 324 | - | ||
| 325 | def _get_analysis_schema( | 269 | def _get_analysis_schema( |
| 326 | analysis_kind: str, | 270 | analysis_kind: str, |
| 327 | *, | 271 | *, |
| @@ -342,17 +286,6 @@ def _get_taxonomy_attribute_field_map( | @@ -342,17 +286,6 @@ def _get_taxonomy_attribute_field_map( | ||
| 342 | return _CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS[profile] | 286 | return _CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS[profile] |
| 343 | 287 | ||
| 344 | 288 | ||
| 345 | -def _get_analysis_output_languages( | ||
| 346 | - analysis_kind: str, | ||
| 347 | - *, | ||
| 348 | - category_taxonomy_profile: Optional[str] = None, | ||
| 349 | -) -> Tuple[str, ...]: | ||
| 350 | - return _get_analysis_schema( | ||
| 351 | - analysis_kind, | ||
| 352 | - category_taxonomy_profile=category_taxonomy_profile, | ||
| 353 | - ).output_languages | ||
| 354 | - | ||
| 355 | - | ||
| 356 | def _normalize_enrichment_scopes( | 289 | def _normalize_enrichment_scopes( |
| 357 | enrichment_scopes: Optional[List[str]] = None, | 290 | enrichment_scopes: Optional[List[str]] = None, |
| 358 | ) -> Tuple[str, ...]: | 291 | ) -> Tuple[str, ...]: |
| @@ -562,11 +495,6 @@ def _normalize_index_content_item(item: Dict[str, Any]) -> Dict[str, str]: | @@ -562,11 +495,6 @@ def _normalize_index_content_item(item: Dict[str, Any]) -> Dict[str, str]: | ||
| 562 | "brief": str(item.get("brief") or "").strip(), | 495 | "brief": str(item.get("brief") or "").strip(), |
| 563 | "description": str(item.get("description") or "").strip(), | 496 | "description": str(item.get("description") or "").strip(), |
| 564 | "image_url": str(item.get("image_url") or "").strip(), | 497 | "image_url": str(item.get("image_url") or "").strip(), |
| 565 | - "category": str(item.get("category") or "").strip(), | ||
| 566 | - "category_path": str(item.get("category_path") or "").strip(), | ||
| 567 | - "category_name_text": str(item.get("category_name_text") or "").strip(), | ||
| 568 | - "category1_name": str(item.get("category1_name") or "").strip(), | ||
| 569 | - "category_taxonomy_profile": str(item.get("category_taxonomy_profile") or "").strip(), | ||
| 570 | } | 498 | } |
| 571 | 499 | ||
| 572 | 500 | ||
| @@ -584,8 +512,7 @@ def build_index_content_fields( | @@ -584,8 +512,7 @@ def build_index_content_fields( | ||
| 584 | - `title` | 512 | - `title` |
| 585 | - 可选 `brief` / `description` / `image_url` | 513 | - 可选 `brief` / `description` / `image_url` |
| 586 | - 可选 `enrichment_scopes`,默认同时执行 `generic` 与 `category_taxonomy` | 514 | - 可选 `enrichment_scopes`,默认同时执行 `generic` 与 `category_taxonomy` |
| 587 | - - 可选 `category_taxonomy_profile`;若不传,则优先根据 item 自带的类目字段推断,否则回退到默认 `apparel` | ||
| 588 | - - 可选类目提示字段:`category` / `category_path` / `category_name_text` / `category1_name` | 515 | + - 可选 `category_taxonomy_profile`,默认 `apparel` |
| 589 | 516 | ||
| 590 | 返回项结构: | 517 | 返回项结构: |
| 591 | - `id` | 518 | - `id` |
| @@ -600,21 +527,10 @@ def build_index_content_fields( | @@ -600,21 +527,10 @@ def build_index_content_fields( | ||
| 600 | - `enriched_tags.{lang}` 为标签数组 | 527 | - `enriched_tags.{lang}` 为标签数组 |
| 601 | """ | 528 | """ |
| 602 | requested_enrichment_scopes = _normalize_enrichment_scopes(enrichment_scopes) | 529 | requested_enrichment_scopes = _normalize_enrichment_scopes(enrichment_scopes) |
| 603 | - fallback_taxonomy_profile = ( | ||
| 604 | - _normalize_category_taxonomy_profile(category_taxonomy_profile) | ||
| 605 | - if category_taxonomy_profile | ||
| 606 | - else None | ||
| 607 | - ) | 530 | + normalized_taxonomy_profile = _normalize_category_taxonomy_profile(category_taxonomy_profile) |
| 608 | normalized_items = [_normalize_index_content_item(item) for item in items] | 531 | normalized_items = [_normalize_index_content_item(item) for item in items] |
| 609 | if not normalized_items: | 532 | if not normalized_items: |
| 610 | return [] | 533 | return [] |
| 611 | - taxonomy_profile_by_id = { | ||
| 612 | - item["id"]: _resolve_category_taxonomy_profile( | ||
| 613 | - item, | ||
| 614 | - fallback_profile=fallback_taxonomy_profile, | ||
| 615 | - ) | ||
| 616 | - for item in normalized_items | ||
| 617 | - } | ||
| 618 | 534 | ||
| 619 | results_by_id: Dict[str, Dict[str, Any]] = { | 535 | results_by_id: Dict[str, Dict[str, Any]] = { |
| 620 | item["id"]: { | 536 | item["id"]: { |
| @@ -627,7 +543,7 @@ def build_index_content_fields( | @@ -627,7 +543,7 @@ def build_index_content_fields( | ||
| 627 | for item in normalized_items | 543 | for item in normalized_items |
| 628 | } | 544 | } |
| 629 | 545 | ||
| 630 | - for lang in _get_analysis_output_languages("content"): | 546 | + for lang in _CORE_INDEX_LANGUAGES: |
| 631 | if "generic" in requested_enrichment_scopes: | 547 | if "generic" in requested_enrichment_scopes: |
| 632 | try: | 548 | try: |
| 633 | rows = analyze_products( | 549 | rows = analyze_products( |
| @@ -636,7 +552,7 @@ def build_index_content_fields( | @@ -636,7 +552,7 @@ def build_index_content_fields( | ||
| 636 | batch_size=BATCH_SIZE, | 552 | batch_size=BATCH_SIZE, |
| 637 | tenant_id=tenant_id, | 553 | tenant_id=tenant_id, |
| 638 | analysis_kind="content", | 554 | analysis_kind="content", |
| 639 | - category_taxonomy_profile=fallback_taxonomy_profile, | 555 | + category_taxonomy_profile=normalized_taxonomy_profile, |
| 640 | ) | 556 | ) |
| 641 | except Exception as e: | 557 | except Exception as e: |
| 642 | logger.warning("build_index_content_fields content enrichment failed for lang=%s: %s", lang, e) | 558 | logger.warning("build_index_content_fields content enrichment failed for lang=%s: %s", lang, e) |
| @@ -654,48 +570,40 @@ def build_index_content_fields( | @@ -654,48 +570,40 @@ def build_index_content_fields( | ||
| 654 | _apply_index_content_row(results_by_id[item_id], row=row, lang=lang) | 570 | _apply_index_content_row(results_by_id[item_id], row=row, lang=lang) |
| 655 | 571 | ||
| 656 | if "category_taxonomy" in requested_enrichment_scopes: | 572 | if "category_taxonomy" in requested_enrichment_scopes: |
| 657 | - items_by_profile: Dict[str, List[Dict[str, str]]] = {} | ||
| 658 | - for item in normalized_items: | ||
| 659 | - items_by_profile.setdefault(taxonomy_profile_by_id[item["id"]], []).append(item) | ||
| 660 | - | ||
| 661 | - for taxonomy_profile, profile_items in items_by_profile.items(): | ||
| 662 | - for lang in _get_analysis_output_languages( | ||
| 663 | - "taxonomy", | ||
| 664 | - category_taxonomy_profile=taxonomy_profile, | ||
| 665 | - ): | ||
| 666 | - try: | ||
| 667 | - taxonomy_rows = analyze_products( | ||
| 668 | - products=profile_items, | ||
| 669 | - target_lang=lang, | ||
| 670 | - batch_size=BATCH_SIZE, | ||
| 671 | - tenant_id=tenant_id, | ||
| 672 | - analysis_kind="taxonomy", | ||
| 673 | - category_taxonomy_profile=taxonomy_profile, | ||
| 674 | - ) | ||
| 675 | - except Exception as e: | ||
| 676 | - logger.warning( | ||
| 677 | - "build_index_content_fields taxonomy enrichment failed for profile=%s lang=%s: %s", | ||
| 678 | - taxonomy_profile, | ||
| 679 | - lang, | ||
| 680 | - e, | ||
| 681 | - ) | ||
| 682 | - for item in profile_items: | ||
| 683 | - results_by_id[item["id"]].setdefault("error", str(e)) | ||
| 684 | - continue | 573 | + for lang in _CORE_INDEX_LANGUAGES: |
| 574 | + try: | ||
| 575 | + taxonomy_rows = analyze_products( | ||
| 576 | + products=normalized_items, | ||
| 577 | + target_lang=lang, | ||
| 578 | + batch_size=BATCH_SIZE, | ||
| 579 | + tenant_id=tenant_id, | ||
| 580 | + analysis_kind="taxonomy", | ||
| 581 | + category_taxonomy_profile=normalized_taxonomy_profile, | ||
| 582 | + ) | ||
| 583 | + except Exception as e: | ||
| 584 | + logger.warning( | ||
| 585 | + "build_index_content_fields taxonomy enrichment failed for profile=%s lang=%s: %s", | ||
| 586 | + normalized_taxonomy_profile, | ||
| 587 | + lang, | ||
| 588 | + e, | ||
| 589 | + ) | ||
| 590 | + for item in normalized_items: | ||
| 591 | + results_by_id[item["id"]].setdefault("error", str(e)) | ||
| 592 | + continue | ||
| 685 | 593 | ||
| 686 | - for row in taxonomy_rows or []: | ||
| 687 | - item_id = str(row.get("id") or "").strip() | ||
| 688 | - if not item_id or item_id not in results_by_id: | ||
| 689 | - continue | ||
| 690 | - if row.get("error"): | ||
| 691 | - results_by_id[item_id].setdefault("error", row["error"]) | ||
| 692 | - continue | ||
| 693 | - _apply_index_taxonomy_row( | ||
| 694 | - results_by_id[item_id], | ||
| 695 | - row=row, | ||
| 696 | - lang=lang, | ||
| 697 | - category_taxonomy_profile=taxonomy_profile, | ||
| 698 | - ) | 594 | + for row in taxonomy_rows or []: |
| 595 | + item_id = str(row.get("id") or "").strip() | ||
| 596 | + if not item_id or item_id not in results_by_id: | ||
| 597 | + continue | ||
| 598 | + if row.get("error"): | ||
| 599 | + results_by_id[item_id].setdefault("error", row["error"]) | ||
| 600 | + continue | ||
| 601 | + _apply_index_taxonomy_row( | ||
| 602 | + results_by_id[item_id], | ||
| 603 | + row=row, | ||
| 604 | + lang=lang, | ||
| 605 | + category_taxonomy_profile=normalized_taxonomy_profile, | ||
| 606 | + ) | ||
| 699 | 607 | ||
| 700 | return [results_by_id[item["id"]] for item in normalized_items] | 608 | return [results_by_id[item["id"]] for item in normalized_items] |
| 701 | 609 | ||
| @@ -1173,7 +1081,8 @@ def parse_markdown_table( | @@ -1173,7 +1081,8 @@ def parse_markdown_table( | ||
| 1173 | if len(parts) >= 2: | 1081 | if len(parts) >= 2: |
| 1174 | row = {"seq_no": parts[0]} | 1082 | row = {"seq_no": parts[0]} |
| 1175 | for field_index, field_name in enumerate(schema.result_fields, start=1): | 1083 | for field_index, field_name in enumerate(schema.result_fields, start=1): |
| 1176 | - row[field_name] = parts[field_index] if len(parts) > field_index else "" | 1084 | + cell = parts[field_index] if len(parts) > field_index else "" |
| 1085 | + row[field_name] = _normalize_markdown_table_cell(cell) | ||
| 1177 | data.append(row) | 1086 | data.append(row) |
| 1178 | 1087 | ||
| 1179 | return data | 1088 | return data |
indexer/product_enrich_prompts.py
| @@ -60,11 +60,9 @@ def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[st | @@ -60,11 +60,9 @@ def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[st | ||
| 60 | "", | 60 | "", |
| 61 | "Rules:", | 61 | "Rules:", |
| 62 | "- Keep the same row order and row count as input.", | 62 | "- Keep the same row order and row count as input.", |
| 63 | - "- Infer only from the provided product text.", | ||
| 64 | - "- Leave blank if not applicable or not reasonably supported.", | 63 | + "- Leave blank if not applicable, unmentioned, or unsupported.", |
| 65 | "- Use concise, standardized ecommerce wording.", | 64 | "- Use concise, standardized ecommerce wording.", |
| 66 | - "- Do not combine different attribute dimensions in one field.", | ||
| 67 | - "- If multiple values are needed, use the delimiter required by the localization setting.", | 65 | + "- If multiple values, separate with commas.", |
| 68 | "", | 66 | "", |
| 69 | "Input product list:", | 67 | "Input product list:", |
| 70 | ] | 68 | ] |
| @@ -75,19 +73,14 @@ def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[st | @@ -75,19 +73,14 @@ def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[st | ||
| 75 | def _make_taxonomy_profile( | 73 | def _make_taxonomy_profile( |
| 76 | profile_label: str, | 74 | profile_label: str, |
| 77 | fields: Tuple[Dict[str, str], ...], | 75 | fields: Tuple[Dict[str, str], ...], |
| 78 | - *, | ||
| 79 | - aliases: Tuple[str, ...], | ||
| 80 | - output_languages: Tuple[str, ...] = ("en",), | ||
| 81 | - zh_headers: Tuple[str, ...] = (), | ||
| 82 | ) -> Dict[str, Any]: | 76 | ) -> Dict[str, Any]: |
| 83 | - headers = {"en": ["No.", *[field["label"] for field in fields]]} | ||
| 84 | - if zh_headers: | ||
| 85 | - headers["zh"] = ["序号", *zh_headers] | 77 | + headers = { |
| 78 | + "en": ["No.", *[field["label"] for field in fields]], | ||
| 79 | + "zh": ["序号", *[field["zh_label"] for field in fields]], | ||
| 80 | + } | ||
| 86 | return { | 81 | return { |
| 87 | "profile_label": profile_label, | 82 | "profile_label": profile_label, |
| 88 | "fields": fields, | 83 | "fields": fields, |
| 89 | - "aliases": aliases, | ||
| 90 | - "output_languages": output_languages, | ||
| 91 | "shared_instruction": _build_taxonomy_shared_instruction(profile_label, fields), | 84 | "shared_instruction": _build_taxonomy_shared_instruction(profile_label, fields), |
| 92 | "markdown_table_headers": headers, | 85 | "markdown_table_headers": headers, |
| 93 | } | 86 | } |
| @@ -123,268 +116,250 @@ APPAREL_TAXONOMY_FIELDS = ( | @@ -123,268 +116,250 @@ APPAREL_TAXONOMY_FIELDS = ( | ||
| 123 | ) | 116 | ) |
| 124 | 117 | ||
| 125 | THREE_C_TAXONOMY_FIELDS = ( | 118 | THREE_C_TAXONOMY_FIELDS = ( |
| 126 | - _taxonomy_field("product_type", "Product Type", "concise 3C accessory or peripheral category label"), | ||
| 127 | - _taxonomy_field("compatible_device", "Compatible Device / Model", "supported device family, series, model, or form factor when clearly stated"), | ||
| 128 | - _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, wireless, Bluetooth, Wi-Fi, NFC, or 2.4G"), | ||
| 129 | - _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant connector or port, e.g. USB-C, Lightning, HDMI, AUX, RJ45"), | ||
| 130 | - _taxonomy_field("power_charging", "Power Source / Charging", "charging or power mode, e.g. battery powered, fast charging, rechargeable, plug-in"), | ||
| 131 | - _taxonomy_field("key_features", "Key Features", "primary hardware features such as noise cancelling, foldable, magnetic, backlit, waterproof"), | ||
| 132 | - _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported"), | ||
| 133 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 134 | - _taxonomy_field("pack_size", "Pack Size", "unit count or bundle size when stated"), | ||
| 135 | - _taxonomy_field("use_case", "Use Case", "intended usage such as travel, office, gaming, car, charging, streaming"), | 119 | + _taxonomy_field("product_type", "Product Type", "concise 3C accessory or peripheral category label", "品类"), |
| 120 | + _taxonomy_field("compatible_device", "Compatible Device / Model", "supported device family, series, model, or form factor when clearly stated", "适配设备 / 型号"), | ||
| 121 | + _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, wireless, Bluetooth, Wi-Fi, NFC, or 2.4G", "连接方式"), | ||
| 122 | + _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant connector or port, e.g. USB-C, Lightning, HDMI, AUX, RJ45", "接口 / 端口类型"), | ||
| 123 | + _taxonomy_field("power_charging", "Power Source / Charging", "charging or power mode, e.g. battery powered, fast charging, rechargeable, plug-in", "供电 / 充电方式"), | ||
| 124 | + _taxonomy_field("key_features", "Key Features", "primary hardware features such as noise cancelling, foldable, magnetic, backlit, waterproof", "关键特征"), | ||
| 125 | + _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported", "材质 / 表面处理"), | ||
| 126 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 127 | + _taxonomy_field("pack_size", "Pack Size", "unit count or bundle size when stated", "包装规格"), | ||
| 128 | + _taxonomy_field("use_case", "Use Case", "intended usage such as travel, office, gaming, car, charging, streaming", "使用场景"), | ||
| 136 | ) | 129 | ) |
| 137 | 130 | ||
| 138 | BAGS_TAXONOMY_FIELDS = ( | 131 | BAGS_TAXONOMY_FIELDS = ( |
| 139 | - _taxonomy_field("product_type", "Product Type", "concise bag category such as backpack, tote bag, crossbody bag, luggage, or wallet"), | ||
| 140 | - _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied"), | ||
| 141 | - _taxonomy_field("carry_style", "Carry Style", "how the bag is worn or carried, e.g. handheld, shoulder, crossbody, backpack"), | ||
| 142 | - _taxonomy_field("size_capacity", "Size / Capacity", "size tier or capacity when supported, e.g. mini, large capacity, 20L"), | ||
| 143 | - _taxonomy_field("material", "Material", "main bag material such as leather, nylon, canvas, PU, straw"), | ||
| 144 | - _taxonomy_field("closure_type", "Closure Type", "bag closure such as zipper, flap, buckle, drawstring, magnetic snap"), | ||
| 145 | - _taxonomy_field("structure_compartments", "Structure / Compartments", "organizational structure such as multi-pocket, laptop sleeve, card slots, expandable"), | ||
| 146 | - _taxonomy_field("strap_handle_type", "Strap / Handle Type", "strap or handle design such as chain strap, top handle, adjustable strap"), | ||
| 147 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 148 | - _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as commute, travel, evening, school, casual"), | 132 | + _taxonomy_field("product_type", "Product Type", "concise bag category such as backpack, tote bag, crossbody bag, luggage, or wallet", "品类"), |
| 133 | + _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"), | ||
| 134 | + _taxonomy_field("carry_style", "Carry Style", "how the bag is worn or carried, e.g. handheld, shoulder, crossbody, backpack", "携带方式"), | ||
| 135 | + _taxonomy_field("size_capacity", "Size / Capacity", "size tier or capacity when supported, e.g. mini, large capacity, 20L", "尺寸 / 容量"), | ||
| 136 | + _taxonomy_field("material", "Material", "main bag material such as leather, nylon, canvas, PU, straw", "材质"), | ||
| 137 | + _taxonomy_field("closure_type", "Closure Type", "bag closure such as zipper, flap, buckle, drawstring, magnetic snap", "闭合方式"), | ||
| 138 | + _taxonomy_field("structure_compartments", "Structure / Compartments", "organizational structure such as multi-pocket, laptop sleeve, card slots, expandable", "结构 / 分层"), | ||
| 139 | + _taxonomy_field("strap_handle_type", "Strap / Handle Type", "strap or handle design such as chain strap, top handle, adjustable strap", "肩带 / 提手类型"), | ||
| 140 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 141 | + _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as commute, travel, evening, school, casual", "适用场景"), | ||
| 149 | ) | 142 | ) |
| 150 | 143 | ||
| 151 | PET_SUPPLIES_TAXONOMY_FIELDS = ( | 144 | PET_SUPPLIES_TAXONOMY_FIELDS = ( |
| 152 | - _taxonomy_field("product_type", "Product Type", "concise pet supplies category label"), | ||
| 153 | - _taxonomy_field("pet_type", "Pet Type", "target pet such as dog, cat, bird, fish, hamster"), | ||
| 154 | - _taxonomy_field("breed_size", "Breed Size", "pet size or breed size when stated, e.g. small breed, large dogs"), | ||
| 155 | - _taxonomy_field("life_stage", "Life Stage", "pet age stage when supported, e.g. puppy, kitten, adult, senior"), | ||
| 156 | - _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredient composition when supported"), | ||
| 157 | - _taxonomy_field("flavor_scent", "Flavor / Scent", "flavor or scent when applicable"), | ||
| 158 | - _taxonomy_field("key_features", "Key Features", "primary attributes such as interactive, leak-proof, orthopedic, washable, elevated"), | ||
| 159 | - _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as dental care, calming, digestion support, joint support"), | ||
| 160 | - _taxonomy_field("size_capacity", "Size / Capacity", "size, count, or net content when stated"), | ||
| 161 | - _taxonomy_field("use_scenario", "Use Scenario", "usage such as feeding, training, grooming, travel, indoor play"), | 145 | + _taxonomy_field("product_type", "Product Type", "concise pet supplies category label", "品类"), |
| 146 | + _taxonomy_field("pet_type", "Pet Type", "target pet such as dog, cat, bird, fish, hamster", "宠物类型"), | ||
| 147 | + _taxonomy_field("breed_size", "Breed Size", "pet size or breed size when stated, e.g. small breed, large dogs", "体型 / 品种大小"), | ||
| 148 | + _taxonomy_field("life_stage", "Life Stage", "pet age stage when supported, e.g. puppy, kitten, adult, senior", "成长阶段"), | ||
| 149 | + _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredient composition when supported", "材质 / 成分"), | ||
| 150 | + _taxonomy_field("flavor_scent", "Flavor / Scent", "flavor or scent when applicable", "口味 / 气味"), | ||
| 151 | + _taxonomy_field("key_features", "Key Features", "primary attributes such as interactive, leak-proof, orthopedic, washable, elevated", "关键特征"), | ||
| 152 | + _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as dental care, calming, digestion support, joint support", "功能"), | ||
| 153 | + _taxonomy_field("size_capacity", "Size / Capacity", "size, count, or net content when stated", "尺寸 / 容量"), | ||
| 154 | + _taxonomy_field("use_scenario", "Use Scenario", "usage such as feeding, training, grooming, travel, indoor play", "使用场景"), | ||
| 162 | ) | 155 | ) |
| 163 | 156 | ||
| 164 | ELECTRONICS_TAXONOMY_FIELDS = ( | 157 | ELECTRONICS_TAXONOMY_FIELDS = ( |
| 165 | - _taxonomy_field("product_type", "Product Type", "concise electronics device or component category label"), | ||
| 166 | - _taxonomy_field("device_category", "Device Category / Compatibility", "supported platform, component class, or compatible device family when stated"), | ||
| 167 | - _taxonomy_field("power_voltage", "Power / Voltage", "power, voltage, wattage, or battery spec when supported"), | ||
| 168 | - _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, Bluetooth, Wi-Fi, RF, or smart app control"), | ||
| 169 | - _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant port or interface such as USB-C, AC plug type, HDMI, SATA"), | ||
| 170 | - _taxonomy_field("capacity_storage", "Capacity / Storage", "capacity or storage spec such as 256GB, 2TB, 5000mAh"), | ||
| 171 | - _taxonomy_field("key_features", "Key Features", "main product features such as touch control, HD display, noise reduction, smart control"), | ||
| 172 | - _taxonomy_field("material_finish", "Material / Finish", "main housing material or finish when supported"), | ||
| 173 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 174 | - _taxonomy_field("use_case", "Use Case", "intended use such as home entertainment, office, charging, security, repair"), | 158 | + _taxonomy_field("product_type", "Product Type", "concise electronics device or component category label", "品类"), |
| 159 | + _taxonomy_field("device_category", "Device Category / Compatibility", "supported platform, component class, or compatible device family when stated", "设备类别 / 兼容性"), | ||
| 160 | + _taxonomy_field("power_voltage", "Power / Voltage", "power, voltage, wattage, or battery spec when supported", "功率 / 电压"), | ||
| 161 | + _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, Bluetooth, Wi-Fi, RF, or smart app control", "连接方式"), | ||
| 162 | + _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant port or interface such as USB-C, AC plug type, HDMI, SATA", "接口 / 端口类型"), | ||
| 163 | + _taxonomy_field("capacity_storage", "Capacity / Storage", "capacity or storage spec such as 256GB, 2TB, 5000mAh", "容量 / 存储"), | ||
| 164 | + _taxonomy_field("key_features", "Key Features", "main product features such as touch control, HD display, noise reduction, smart control", "关键特征"), | ||
| 165 | + _taxonomy_field("material_finish", "Material / Finish", "main housing material or finish when supported", "材质 / 表面处理"), | ||
| 166 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 167 | + _taxonomy_field("use_case", "Use Case", "intended use such as home entertainment, office, charging, security, repair", "使用场景"), | ||
| 175 | ) | 168 | ) |
| 176 | 169 | ||
| 177 | OUTDOOR_TAXONOMY_FIELDS = ( | 170 | OUTDOOR_TAXONOMY_FIELDS = ( |
| 178 | - _taxonomy_field("product_type", "Product Type", "concise outdoor gear category label"), | ||
| 179 | - _taxonomy_field("activity_type", "Activity Type", "primary outdoor activity such as camping, hiking, fishing, climbing, travel"), | ||
| 180 | - _taxonomy_field("season_weather", "Season / Weather", "season or weather suitability when supported"), | ||
| 181 | - _taxonomy_field("material", "Material", "main material such as aluminum, ripstop nylon, stainless steel, EVA"), | ||
| 182 | - _taxonomy_field("capacity_size", "Capacity / Size", "size, length, or capacity when stated"), | ||
| 183 | - _taxonomy_field("protection_resistance", "Protection / Resistance", "resistance or protection such as waterproof, UV resistant, windproof"), | ||
| 184 | - _taxonomy_field("key_features", "Key Features", "primary gear attributes such as foldable, lightweight, insulated, non-slip"), | ||
| 185 | - _taxonomy_field("portability_packability", "Portability / Packability", "carry or storage trait such as collapsible, compact, ultralight, packable"), | ||
| 186 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 187 | - _taxonomy_field("use_scenario", "Use Scenario", "likely use setting such as campsite, trail, survival kit, beach, picnic"), | 171 | + _taxonomy_field("product_type", "Product Type", "concise outdoor gear category label", "品类"), |
| 172 | + _taxonomy_field("activity_type", "Activity Type", "primary outdoor activity such as camping, hiking, fishing, climbing, travel", "活动类型"), | ||
| 173 | + _taxonomy_field("season_weather", "Season / Weather", "season or weather suitability when supported", "适用季节 / 天气"), | ||
| 174 | + _taxonomy_field("material", "Material", "main material such as aluminum, ripstop nylon, stainless steel, EVA", "材质"), | ||
| 175 | + _taxonomy_field("capacity_size", "Capacity / Size", "size, length, or capacity when stated", "容量 / 尺寸"), | ||
| 176 | + _taxonomy_field("protection_resistance", "Protection / Resistance", "resistance or protection such as waterproof, UV resistant, windproof", "防护 / 耐受性"), | ||
| 177 | + _taxonomy_field("key_features", "Key Features", "primary gear attributes such as foldable, lightweight, insulated, non-slip", "关键特征"), | ||
| 178 | + _taxonomy_field("portability_packability", "Portability / Packability", "carry or storage trait such as collapsible, compact, ultralight, packable", "便携 / 收纳性"), | ||
| 179 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 180 | + _taxonomy_field("use_scenario", "Use Scenario", "likely use setting such as campsite, trail, survival kit, beach, picnic", "使用场景"), | ||
| 188 | ) | 181 | ) |
| 189 | 182 | ||
| 190 | HOME_APPLIANCES_TAXONOMY_FIELDS = ( | 183 | HOME_APPLIANCES_TAXONOMY_FIELDS = ( |
| 191 | - _taxonomy_field("product_type", "Product Type", "concise home appliance category label"), | ||
| 192 | - _taxonomy_field("appliance_category", "Appliance Category", "functional class such as kitchen appliance, cleaning appliance, personal care appliance"), | ||
| 193 | - _taxonomy_field("power_voltage", "Power / Voltage", "wattage, voltage, plug type, or power supply when supported"), | ||
| 194 | - _taxonomy_field("capacity_coverage", "Capacity / Coverage", "capacity or coverage metric such as 1.5L, 20L, 40sqm"), | ||
| 195 | - _taxonomy_field("control_method", "Control Method", "operation method such as touch, knob, remote, app control"), | ||
| 196 | - _taxonomy_field("installation_type", "Installation Type", "setup style such as countertop, handheld, portable, wall-mounted, built-in"), | ||
| 197 | - _taxonomy_field("key_features", "Key Features", "main product features such as timer, steam, HEPA filter, self-cleaning"), | ||
| 198 | - _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported"), | ||
| 199 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 200 | - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as cooking, cleaning, grooming, cooling, air treatment"), | 184 | + _taxonomy_field("product_type", "Product Type", "concise home appliance category label", "品类"), |
| 185 | + _taxonomy_field("appliance_category", "Appliance Category", "functional class such as kitchen appliance, cleaning appliance, personal care appliance", "家电类别"), | ||
| 186 | + _taxonomy_field("power_voltage", "Power / Voltage", "wattage, voltage, plug type, or power supply when supported", "功率 / 电压"), | ||
| 187 | + _taxonomy_field("capacity_coverage", "Capacity / Coverage", "capacity or coverage metric such as 1.5L, 20L, 40sqm", "容量 / 覆盖范围"), | ||
| 188 | + _taxonomy_field("control_method", "Control Method", "operation method such as touch, knob, remote, app control", "控制方式"), | ||
| 189 | + _taxonomy_field("installation_type", "Installation Type", "setup style such as countertop, handheld, portable, wall-mounted, built-in", "安装方式"), | ||
| 190 | + _taxonomy_field("key_features", "Key Features", "main product features such as timer, steam, HEPA filter, self-cleaning", "关键特征"), | ||
| 191 | + _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported", "材质 / 表面处理"), | ||
| 192 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 193 | + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as cooking, cleaning, grooming, cooling, air treatment", "使用场景"), | ||
| 201 | ) | 194 | ) |
| 202 | 195 | ||
| 203 | HOME_LIVING_TAXONOMY_FIELDS = ( | 196 | HOME_LIVING_TAXONOMY_FIELDS = ( |
| 204 | - _taxonomy_field("product_type", "Product Type", "concise home and living category label"), | ||
| 205 | - _taxonomy_field("room_placement", "Room / Placement", "intended room or placement such as bedroom, kitchen, bathroom, desktop"), | ||
| 206 | - _taxonomy_field("material", "Material", "main material such as wood, ceramic, cotton, glass, metal"), | ||
| 207 | - _taxonomy_field("style", "Style", "home style such as modern, farmhouse, minimalist, boho, Nordic"), | ||
| 208 | - _taxonomy_field("size_dimensions", "Size / Dimensions", "size or dimensions when stated"), | ||
| 209 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 210 | - _taxonomy_field("pattern_finish", "Pattern / Finish", "surface pattern or finish such as solid, marble, matte, ribbed"), | ||
| 211 | - _taxonomy_field("key_features", "Key Features", "main product features such as stackable, washable, blackout, space-saving"), | ||
| 212 | - _taxonomy_field("assembly_installation", "Assembly / Installation", "assembly or installation trait when supported"), | ||
| 213 | - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as storage, dining, decor, sleep, organization"), | 197 | + _taxonomy_field("product_type", "Product Type", "concise home and living category label", "品类"), |
| 198 | + _taxonomy_field("room_placement", "Room / Placement", "intended room or placement such as bedroom, kitchen, bathroom, desktop", "适用空间 / 摆放位置"), | ||
| 199 | + _taxonomy_field("material", "Material", "main material such as wood, ceramic, cotton, glass, metal", "材质"), | ||
| 200 | + _taxonomy_field("style", "Style", "home style such as modern, farmhouse, minimalist, boho, Nordic", "风格"), | ||
| 201 | + _taxonomy_field("size_dimensions", "Size / Dimensions", "size or dimensions when stated", "尺寸 / 规格"), | ||
| 202 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 203 | + _taxonomy_field("pattern_finish", "Pattern / Finish", "surface pattern or finish such as solid, marble, matte, ribbed", "图案 / 表面处理"), | ||
| 204 | + _taxonomy_field("key_features", "Key Features", "main product features such as stackable, washable, blackout, space-saving", "关键特征"), | ||
| 205 | + _taxonomy_field("assembly_installation", "Assembly / Installation", "assembly or installation trait when supported", "组装 / 安装"), | ||
| 206 | + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as storage, dining, decor, sleep, organization", "使用场景"), | ||
| 214 | ) | 207 | ) |
| 215 | 208 | ||
| 216 | WIGS_TAXONOMY_FIELDS = ( | 209 | WIGS_TAXONOMY_FIELDS = ( |
| 217 | - _taxonomy_field("product_type", "Product Type", "concise wig or hairpiece category label"), | ||
| 218 | - _taxonomy_field("hair_material", "Hair Material", "hair material such as human hair, synthetic fiber, heat-resistant fiber"), | ||
| 219 | - _taxonomy_field("hair_texture", "Hair Texture", "texture or curl pattern such as straight, body wave, curly, kinky"), | ||
| 220 | - _taxonomy_field("hair_length", "Hair Length", "hair length when stated"), | ||
| 221 | - _taxonomy_field("hair_color", "Hair Color", "specific hair color or blend when available"), | ||
| 222 | - _taxonomy_field("cap_construction", "Cap Construction", "cap type such as full lace, lace front, glueless, U part"), | ||
| 223 | - _taxonomy_field("lace_area_part_type", "Lace Area / Part Type", "lace size or part style such as 13x4 lace, middle part, T part"), | ||
| 224 | - _taxonomy_field("density_volume", "Density / Volume", "hair density or fullness when supported"), | ||
| 225 | - _taxonomy_field("style_bang_type", "Style / Bang Type", "style cue such as bob, pixie, layered, with bangs"), | ||
| 226 | - _taxonomy_field("occasion_end_use", "Occasion / End Use", "intended use such as daily wear, cosplay, protective style, party"), | 210 | + _taxonomy_field("product_type", "Product Type", "concise wig or hairpiece category label", "品类"), |
| 211 | + _taxonomy_field("hair_material", "Hair Material", "hair material such as human hair, synthetic fiber, heat-resistant fiber", "发丝材质"), | ||
| 212 | + _taxonomy_field("hair_texture", "Hair Texture", "texture or curl pattern such as straight, body wave, curly, kinky", "发质纹理"), | ||
| 213 | + _taxonomy_field("hair_length", "Hair Length", "hair length when stated", "发长"), | ||
| 214 | + _taxonomy_field("hair_color", "Hair Color", "specific hair color or blend when available", "发色"), | ||
| 215 | + _taxonomy_field("cap_construction", "Cap Construction", "cap type such as full lace, lace front, glueless, U part", "帽网结构"), | ||
| 216 | + _taxonomy_field("lace_area_part_type", "Lace Area / Part Type", "lace size or part style such as 13x4 lace, middle part, T part", "蕾丝面积 / 分缝类型"), | ||
| 217 | + _taxonomy_field("density_volume", "Density / Volume", "hair density or fullness when supported", "密度 / 发量"), | ||
| 218 | + _taxonomy_field("style_bang_type", "Style / Bang Type", "style cue such as bob, pixie, layered, with bangs", "款式 / 刘海类型"), | ||
| 219 | + _taxonomy_field("occasion_end_use", "Occasion / End Use", "intended use such as daily wear, cosplay, protective style, party", "适用场景"), | ||
| 227 | ) | 220 | ) |
| 228 | 221 | ||
| 229 | BEAUTY_TAXONOMY_FIELDS = ( | 222 | BEAUTY_TAXONOMY_FIELDS = ( |
| 230 | - _taxonomy_field("product_type", "Product Type", "concise beauty or cosmetics category label"), | ||
| 231 | - _taxonomy_field("target_area", "Target Area", "target area such as face, lips, eyes, nails, hair, body"), | ||
| 232 | - _taxonomy_field("skin_hair_type", "Skin Type / Hair Type", "suitable skin or hair type when supported"), | ||
| 233 | - _taxonomy_field("finish_effect", "Finish / Effect", "cosmetic finish or effect such as matte, dewy, volumizing, brightening"), | ||
| 234 | - _taxonomy_field("key_ingredients", "Key Ingredients", "notable ingredients when stated"), | ||
| 235 | - _taxonomy_field("shade_color", "Shade / Color", "specific shade or color when available"), | ||
| 236 | - _taxonomy_field("scent", "Scent", "fragrance or scent only when supported"), | ||
| 237 | - _taxonomy_field("formulation", "Formulation", "product form such as cream, serum, powder, gel, stick"), | ||
| 238 | - _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as hydration, anti-aging, long-wear, repair, sun protection"), | ||
| 239 | - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as daily routine, salon, travel, evening makeup"), | 223 | + _taxonomy_field("product_type", "Product Type", "concise beauty or cosmetics category label", "品类"), |
| 224 | + _taxonomy_field("target_area", "Target Area", "target area such as face, lips, eyes, nails, hair, body", "适用部位"), | ||
| 225 | + _taxonomy_field("skin_hair_type", "Skin Type / Hair Type", "suitable skin or hair type when supported", "肤质 / 发质"), | ||
| 226 | + _taxonomy_field("finish_effect", "Finish / Effect", "cosmetic finish or effect such as matte, dewy, volumizing, brightening", "妆效 / 效果"), | ||
| 227 | + _taxonomy_field("key_ingredients", "Key Ingredients", "notable ingredients when stated", "关键成分"), | ||
| 228 | + _taxonomy_field("shade_color", "Shade / Color", "specific shade or color when available", "色号 / 颜色"), | ||
| 229 | + _taxonomy_field("scent", "Scent", "fragrance or scent only when supported", "香味"), | ||
| 230 | + _taxonomy_field("formulation", "Formulation", "product form such as cream, serum, powder, gel, stick", "剂型 / 形态"), | ||
| 231 | + _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as hydration, anti-aging, long-wear, repair, sun protection", "功能"), | ||
| 232 | + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as daily routine, salon, travel, evening makeup", "使用场景"), | ||
| 240 | ) | 233 | ) |
| 241 | 234 | ||
| 242 | ACCESSORIES_TAXONOMY_FIELDS = ( | 235 | ACCESSORIES_TAXONOMY_FIELDS = ( |
| 243 | - _taxonomy_field("product_type", "Product Type", "concise accessory category label such as necklace, watch, belt, hat, or sunglasses"), | ||
| 244 | - _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied"), | ||
| 245 | - _taxonomy_field("material", "Material", "main material such as alloy, leather, stainless steel, acetate, fabric"), | ||
| 246 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 247 | - _taxonomy_field("pattern_finish", "Pattern / Finish", "surface treatment or style finish such as polished, textured, braided, rhinestone"), | ||
| 248 | - _taxonomy_field("closure_fastening", "Closure / Fastening", "fastening method when applicable"), | ||
| 249 | - _taxonomy_field("size_fit", "Size / Fit", "size or fit information such as adjustable, one size, 42mm"), | ||
| 250 | - _taxonomy_field("style", "Style", "style cue such as minimalist, vintage, statement, sporty"), | ||
| 251 | - _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as daily wear, formal, party, travel, sun protection"), | ||
| 252 | - _taxonomy_field("set_pack_size", "Set / Pack Size", "set count or pack size when stated"), | 236 | + _taxonomy_field("product_type", "Product Type", "concise accessory category label such as necklace, watch, belt, hat, or sunglasses", "品类"), |
| 237 | + _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"), | ||
| 238 | + _taxonomy_field("material", "Material", "main material such as alloy, leather, stainless steel, acetate, fabric", "材质"), | ||
| 239 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 240 | + _taxonomy_field("pattern_finish", "Pattern / Finish", "surface treatment or style finish such as polished, textured, braided, rhinestone", "图案 / 表面处理"), | ||
| 241 | + _taxonomy_field("closure_fastening", "Closure / Fastening", "fastening method when applicable", "闭合 / 固定方式"), | ||
| 242 | + _taxonomy_field("size_fit", "Size / Fit", "size or fit information such as adjustable, one size, 42mm", "尺寸 / 适配"), | ||
| 243 | + _taxonomy_field("style", "Style", "style cue such as minimalist, vintage, statement, sporty", "风格"), | ||
| 244 | + _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as daily wear, formal, party, travel, sun protection", "适用场景"), | ||
| 245 | + _taxonomy_field("set_pack_size", "Set / Pack Size", "set count or pack size when stated", "套装 / 规格"), | ||
| 253 | ) | 246 | ) |
| 254 | 247 | ||
| 255 | TOYS_TAXONOMY_FIELDS = ( | 248 | TOYS_TAXONOMY_FIELDS = ( |
| 256 | - _taxonomy_field("product_type", "Product Type", "concise toy category label"), | ||
| 257 | - _taxonomy_field("age_group", "Age Group", "intended age group when clearly implied"), | ||
| 258 | - _taxonomy_field("character_theme", "Character / Theme", "licensed character, theme, or play theme when supported"), | ||
| 259 | - _taxonomy_field("material", "Material", "main toy material such as plush, plastic, wood, silicone"), | ||
| 260 | - _taxonomy_field("power_source", "Power Source", "battery, rechargeable, wind-up, or non-powered when supported"), | ||
| 261 | - _taxonomy_field("interactive_features", "Interactive Features", "interactive functions such as sound, lights, remote control, motion"), | ||
| 262 | - _taxonomy_field("educational_play_value", "Educational / Play Value", "play value such as STEM, pretend play, sensory, puzzle solving"), | ||
| 263 | - _taxonomy_field("piece_count_size", "Piece Count / Size", "piece count or size when stated"), | ||
| 264 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 265 | - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as indoor play, bath time, party favor, outdoor play"), | 249 | + _taxonomy_field("product_type", "Product Type", "concise toy category label", "品类"), |
| 250 | + _taxonomy_field("age_group", "Age Group", "intended age group when clearly implied", "年龄段"), | ||
| 251 | + _taxonomy_field("character_theme", "Character / Theme", "licensed character, theme, or play theme when supported", "角色 / 主题"), | ||
| 252 | + _taxonomy_field("material", "Material", "main toy material such as plush, plastic, wood, silicone", "材质"), | ||
| 253 | + _taxonomy_field("power_source", "Power Source", "battery, rechargeable, wind-up, or non-powered when supported", "供电方式"), | ||
| 254 | + _taxonomy_field("interactive_features", "Interactive Features", "interactive functions such as sound, lights, remote control, motion", "互动功能"), | ||
| 255 | + _taxonomy_field("educational_play_value", "Educational / Play Value", "play value such as STEM, pretend play, sensory, puzzle solving", "教育 / 可玩性"), | ||
| 256 | + _taxonomy_field("piece_count_size", "Piece Count / Size", "piece count or size when stated", "件数 / 尺寸"), | ||
| 257 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 258 | + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as indoor play, bath time, party favor, outdoor play", "使用场景"), | ||
| 266 | ) | 259 | ) |
| 267 | 260 | ||
| 268 | SHOES_TAXONOMY_FIELDS = ( | 261 | SHOES_TAXONOMY_FIELDS = ( |
| 269 | - _taxonomy_field("product_type", "Product Type", "concise footwear category label"), | ||
| 270 | - _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied"), | ||
| 271 | - _taxonomy_field("age_group", "Age Group", "only if clearly implied"), | ||
| 272 | - _taxonomy_field("closure_type", "Closure Type", "fastening method such as lace-up, slip-on, buckle, hook-and-loop"), | ||
| 273 | - _taxonomy_field("toe_shape", "Toe Shape", "toe shape when applicable, e.g. round toe, pointed toe, open toe"), | ||
| 274 | - _taxonomy_field("heel_sole_type", "Heel Height / Sole Type", "heel or sole profile such as flat, block heel, wedge, platform, thick sole"), | ||
| 275 | - _taxonomy_field("upper_material", "Upper Material", "main upper material such as leather, knit, canvas, mesh"), | ||
| 276 | - _taxonomy_field("lining_insole_material", "Lining / Insole Material", "lining or insole material when supported"), | ||
| 277 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 278 | - _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as running, casual, office, hiking, formal"), | 262 | + _taxonomy_field("product_type", "Product Type", "concise footwear category label", "品类"), |
| 263 | + _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"), | ||
| 264 | + _taxonomy_field("age_group", "Age Group", "only if clearly implied", "年龄段"), | ||
| 265 | + _taxonomy_field("closure_type", "Closure Type", "fastening method such as lace-up, slip-on, buckle, hook-and-loop", "闭合方式"), | ||
| 266 | + _taxonomy_field("toe_shape", "Toe Shape", "toe shape when applicable, e.g. round toe, pointed toe, open toe", "鞋头形状"), | ||
| 267 | + _taxonomy_field("heel_sole_type", "Heel Height / Sole Type", "heel or sole profile such as flat, block heel, wedge, platform, thick sole", "跟高 / 鞋底类型"), | ||
| 268 | + _taxonomy_field("upper_material", "Upper Material", "main upper material such as leather, knit, canvas, mesh", "鞋面材质"), | ||
| 269 | + _taxonomy_field("lining_insole_material", "Lining / Insole Material", "lining or insole material when supported", "里料 / 鞋垫材质"), | ||
| 270 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 271 | + _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as running, casual, office, hiking, formal", "适用场景"), | ||
| 279 | ) | 272 | ) |
| 280 | 273 | ||
| 281 | SPORTS_TAXONOMY_FIELDS = ( | 274 | SPORTS_TAXONOMY_FIELDS = ( |
| 282 | - _taxonomy_field("product_type", "Product Type", "concise sports product category label"), | ||
| 283 | - _taxonomy_field("sport_activity", "Sport / Activity", "primary sport or activity such as fitness, yoga, basketball, cycling, swimming"), | ||
| 284 | - _taxonomy_field("skill_level", "Skill Level", "target user level when supported, e.g. beginner, training, professional"), | ||
| 285 | - _taxonomy_field("material", "Material", "main material such as EVA, carbon fiber, neoprene, latex"), | ||
| 286 | - _taxonomy_field("size_capacity", "Size / Capacity", "size, weight, resistance level, or capacity when stated"), | ||
| 287 | - _taxonomy_field("protection_support", "Protection / Support", "support or protection function such as ankle support, shock absorption, impact protection"), | ||
| 288 | - _taxonomy_field("key_features", "Key Features", "main features such as anti-slip, adjustable, foldable, quick-dry"), | ||
| 289 | - _taxonomy_field("power_source", "Power Source", "battery, electric, or non-powered when applicable"), | ||
| 290 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 291 | - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as gym, home workout, field training, competition"), | 275 | + _taxonomy_field("product_type", "Product Type", "concise sports product category label", "品类"), |
| 276 | + _taxonomy_field("sport_activity", "Sport / Activity", "primary sport or activity such as fitness, yoga, basketball, cycling, swimming", "运动 / 活动"), | ||
| 277 | + _taxonomy_field("skill_level", "Skill Level", "target user level when supported, e.g. beginner, training, professional", "适用水平"), | ||
| 278 | + _taxonomy_field("material", "Material", "main material such as EVA, carbon fiber, neoprene, latex", "材质"), | ||
| 279 | + _taxonomy_field("size_capacity", "Size / Capacity", "size, weight, resistance level, or capacity when stated", "尺寸 / 容量"), | ||
| 280 | + _taxonomy_field("protection_support", "Protection / Support", "support or protection function such as ankle support, shock absorption, impact protection", "防护 / 支撑"), | ||
| 281 | + _taxonomy_field("key_features", "Key Features", "main features such as anti-slip, adjustable, foldable, quick-dry", "关键特征"), | ||
| 282 | + _taxonomy_field("power_source", "Power Source", "battery, electric, or non-powered when applicable", "供电方式"), | ||
| 283 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 284 | + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as gym, home workout, field training, competition", "使用场景"), | ||
| 292 | ) | 285 | ) |
| 293 | 286 | ||
| 294 | OTHERS_TAXONOMY_FIELDS = ( | 287 | OTHERS_TAXONOMY_FIELDS = ( |
| 295 | - _taxonomy_field("product_type", "Product Type", "concise product category label, not a full marketing title"), | ||
| 296 | - _taxonomy_field("product_category", "Product Category", "broader retail grouping when the specific product type is narrow"), | ||
| 297 | - _taxonomy_field("target_user", "Target User", "intended user, audience, or recipient when clearly implied"), | ||
| 298 | - _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredients when supported"), | ||
| 299 | - _taxonomy_field("key_features", "Key Features", "primary product attributes or standout features"), | ||
| 300 | - _taxonomy_field("functional_benefits", "Functional Benefits", "practical benefits or performance advantages when supported"), | ||
| 301 | - _taxonomy_field("size_capacity", "Size / Capacity", "size, count, weight, or capacity when stated"), | ||
| 302 | - _taxonomy_field("color", "Color", "specific color name when available"), | ||
| 303 | - _taxonomy_field("style_theme", "Style / Theme", "overall style, design theme, or visual direction when supported"), | ||
| 304 | - _taxonomy_field("use_scenario", "Use Scenario", "likely use occasion or application setting when supported"), | 288 | + _taxonomy_field("product_type", "Product Type", "concise product category label, not a full marketing title", "品类"), |
| 289 | + _taxonomy_field("product_category", "Product Category", "broader retail grouping when the specific product type is narrow", "商品类别"), | ||
| 290 | + _taxonomy_field("target_user", "Target User", "intended user, audience, or recipient when clearly implied", "适用人群"), | ||
| 291 | + _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredients when supported", "材质 / 成分"), | ||
| 292 | + _taxonomy_field("key_features", "Key Features", "primary product attributes or standout features", "关键特征"), | ||
| 293 | + _taxonomy_field("functional_benefits", "Functional Benefits", "practical benefits or performance advantages when supported", "功能"), | ||
| 294 | + _taxonomy_field("size_capacity", "Size / Capacity", "size, count, weight, or capacity when stated", "尺寸 / 容量"), | ||
| 295 | + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), | ||
| 296 | + _taxonomy_field("style_theme", "Style / Theme", "overall style, design theme, or visual direction when supported", "风格 / 主题"), | ||
| 297 | + _taxonomy_field("use_scenario", "Use Scenario", "likely use occasion or application setting when supported", "使用场景"), | ||
| 305 | ) | 298 | ) |
| 306 | 299 | ||
| 307 | CATEGORY_TAXONOMY_PROFILES: Dict[str, Dict[str, Any]] = { | 300 | CATEGORY_TAXONOMY_PROFILES: Dict[str, Dict[str, Any]] = { |
| 308 | "apparel": _make_taxonomy_profile( | 301 | "apparel": _make_taxonomy_profile( |
| 309 | "apparel", | 302 | "apparel", |
| 310 | APPAREL_TAXONOMY_FIELDS, | 303 | APPAREL_TAXONOMY_FIELDS, |
| 311 | - aliases=("服装", "服饰", "apparel", "clothing", "fashion"), | ||
| 312 | - output_languages=("zh", "en"), | ||
| 313 | - zh_headers=tuple(field["zh_label"] for field in APPAREL_TAXONOMY_FIELDS), | ||
| 314 | ), | 304 | ), |
| 315 | "3c": _make_taxonomy_profile( | 305 | "3c": _make_taxonomy_profile( |
| 316 | "3C", | 306 | "3C", |
| 317 | THREE_C_TAXONOMY_FIELDS, | 307 | THREE_C_TAXONOMY_FIELDS, |
| 318 | - aliases=("3c", "数码", "phone accessories", "computer peripherals", "smart wearables", "audio", "gaming gear"), | ||
| 319 | ), | 308 | ), |
| 320 | "bags": _make_taxonomy_profile( | 309 | "bags": _make_taxonomy_profile( |
| 321 | "bags", | 310 | "bags", |
| 322 | BAGS_TAXONOMY_FIELDS, | 311 | BAGS_TAXONOMY_FIELDS, |
| 323 | - aliases=("bags", "bag", "包", "箱包", "handbag", "backpack", "wallet", "luggage"), | ||
| 324 | ), | 312 | ), |
| 325 | "pet_supplies": _make_taxonomy_profile( | 313 | "pet_supplies": _make_taxonomy_profile( |
| 326 | "pet supplies", | 314 | "pet supplies", |
| 327 | PET_SUPPLIES_TAXONOMY_FIELDS, | 315 | PET_SUPPLIES_TAXONOMY_FIELDS, |
| 328 | - aliases=("pet", "宠物", "pet supplies", "pet food", "pet toys", "pet care"), | ||
| 329 | ), | 316 | ), |
| 330 | "electronics": _make_taxonomy_profile( | 317 | "electronics": _make_taxonomy_profile( |
| 331 | "electronics", | 318 | "electronics", |
| 332 | ELECTRONICS_TAXONOMY_FIELDS, | 319 | ELECTRONICS_TAXONOMY_FIELDS, |
| 333 | - aliases=("electronics", "电子", "electronic components", "consumer electronics", "digital devices"), | ||
| 334 | ), | 320 | ), |
| 335 | "outdoor": _make_taxonomy_profile( | 321 | "outdoor": _make_taxonomy_profile( |
| 336 | "outdoor products", | 322 | "outdoor products", |
| 337 | OUTDOOR_TAXONOMY_FIELDS, | 323 | OUTDOOR_TAXONOMY_FIELDS, |
| 338 | - aliases=("outdoor", "户外", "camping", "hiking", "fishing", "travel accessories"), | ||
| 339 | ), | 324 | ), |
| 340 | "home_appliances": _make_taxonomy_profile( | 325 | "home_appliances": _make_taxonomy_profile( |
| 341 | "home appliances", | 326 | "home appliances", |
| 342 | HOME_APPLIANCES_TAXONOMY_FIELDS, | 327 | HOME_APPLIANCES_TAXONOMY_FIELDS, |
| 343 | - aliases=("home appliances", "家电", "电器", "kitchen appliances", "cleaning appliances", "smart home devices"), | ||
| 344 | ), | 328 | ), |
| 345 | "home_living": _make_taxonomy_profile( | 329 | "home_living": _make_taxonomy_profile( |
| 346 | "home and living", | 330 | "home and living", |
| 347 | HOME_LIVING_TAXONOMY_FIELDS, | 331 | HOME_LIVING_TAXONOMY_FIELDS, |
| 348 | - aliases=("home", "living", "家居", "家具", "家纺", "home decor", "kitchenware"), | ||
| 349 | ), | 332 | ), |
| 350 | "wigs": _make_taxonomy_profile( | 333 | "wigs": _make_taxonomy_profile( |
| 351 | "wigs", | 334 | "wigs", |
| 352 | WIGS_TAXONOMY_FIELDS, | 335 | WIGS_TAXONOMY_FIELDS, |
| 353 | - aliases=("wig", "wigs", "假发", "hairpiece"), | ||
| 354 | ), | 336 | ), |
| 355 | "beauty": _make_taxonomy_profile( | 337 | "beauty": _make_taxonomy_profile( |
| 356 | "beauty and cosmetics", | 338 | "beauty and cosmetics", |
| 357 | BEAUTY_TAXONOMY_FIELDS, | 339 | BEAUTY_TAXONOMY_FIELDS, |
| 358 | - aliases=("beauty", "cosmetics", "美容", "美妆", "makeup", "skincare", "nail care"), | ||
| 359 | ), | 340 | ), |
| 360 | "accessories": _make_taxonomy_profile( | 341 | "accessories": _make_taxonomy_profile( |
| 361 | "accessories", | 342 | "accessories", |
| 362 | ACCESSORIES_TAXONOMY_FIELDS, | 343 | ACCESSORIES_TAXONOMY_FIELDS, |
| 363 | - aliases=("accessories", "配饰", "jewelry", "watches", "belts", "scarves", "hats", "sunglasses"), | ||
| 364 | ), | 344 | ), |
| 365 | "toys": _make_taxonomy_profile( | 345 | "toys": _make_taxonomy_profile( |
| 366 | "toys", | 346 | "toys", |
| 367 | TOYS_TAXONOMY_FIELDS, | 347 | TOYS_TAXONOMY_FIELDS, |
| 368 | - aliases=("toys", "toy", "玩具", "plush", "action figures", "puzzles", "educational toys"), | ||
| 369 | ), | 348 | ), |
| 370 | "shoes": _make_taxonomy_profile( | 349 | "shoes": _make_taxonomy_profile( |
| 371 | "shoes", | 350 | "shoes", |
| 372 | SHOES_TAXONOMY_FIELDS, | 351 | SHOES_TAXONOMY_FIELDS, |
| 373 | - aliases=("shoes", "shoe", "鞋", "sneakers", "boots", "sandals", "heels"), | ||
| 374 | ), | 352 | ), |
| 375 | "sports": _make_taxonomy_profile( | 353 | "sports": _make_taxonomy_profile( |
| 376 | "sports products", | 354 | "sports products", |
| 377 | SPORTS_TAXONOMY_FIELDS, | 355 | SPORTS_TAXONOMY_FIELDS, |
| 378 | - aliases=("sports", "sport", "运动", "fitness", "cycling", "team sports", "water sports"), | ||
| 379 | ), | 356 | ), |
| 380 | "others": _make_taxonomy_profile( | 357 | "others": _make_taxonomy_profile( |
| 381 | "general merchandise", | 358 | "general merchandise", |
| 382 | OTHERS_TAXONOMY_FIELDS, | 359 | OTHERS_TAXONOMY_FIELDS, |
| 383 | - aliases=("others", "other", "其他", "general merchandise"), | ||
| 384 | ), | 360 | ), |
| 385 | } | 361 | } |
| 386 | 362 | ||
| 387 | -CATEGORY_TAXONOMY_PROFILE_NAMES = tuple(CATEGORY_TAXONOMY_PROFILES.keys()) | ||
| 388 | TAXONOMY_SHARED_ANALYSIS_INSTRUCTION = CATEGORY_TAXONOMY_PROFILES["apparel"]["shared_instruction"] | 363 | TAXONOMY_SHARED_ANALYSIS_INSTRUCTION = CATEGORY_TAXONOMY_PROFILES["apparel"]["shared_instruction"] |
| 389 | TAXONOMY_MARKDOWN_TABLE_HEADERS_EN = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"]["en"] | 364 | TAXONOMY_MARKDOWN_TABLE_HEADERS_EN = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"]["en"] |
| 390 | TAXONOMY_LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"] | 365 | TAXONOMY_LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"] |
| @@ -0,0 +1,173 @@ | @@ -0,0 +1,173 @@ | ||
| 1 | +# 内容富化模块说明 | ||
| 2 | + | ||
| 3 | +本文说明商品内容富化模块的职责、入口、输出结构,以及当前 taxonomy profile 的设计约束。 | ||
| 4 | + | ||
| 5 | +## 1. 模块目标 | ||
| 6 | + | ||
| 7 | +内容富化模块负责基于商品文本调用 LLM,生成以下索引字段: | ||
| 8 | + | ||
| 9 | +- `qanchors` | ||
| 10 | +- `enriched_tags` | ||
| 11 | +- `enriched_attributes` | ||
| 12 | +- `enriched_taxonomy_attributes` | ||
| 13 | + | ||
| 14 | +模块追求的设计原则: | ||
| 15 | + | ||
| 16 | +- 单一职责:只负责内容理解与结构化输出,不负责 CSV 读写 | ||
| 17 | +- 输出对齐 ES mapping:返回结构可直接写入 `search_products` | ||
| 18 | +- 配置化扩展:taxonomy profile 通过数据配置扩展,而不是散落条件分支 | ||
| 19 | +- 代码精简:只面向正常使用方式,避免为了不合理调用堆叠补丁逻辑 | ||
| 20 | + | ||
| 21 | +## 2. 主要文件 | ||
| 22 | + | ||
| 23 | +- [product_enrich.py](/data/saas-search/indexer/product_enrich.py) | ||
| 24 | + 运行时主逻辑,负责批处理、缓存、prompt 组装、LLM 调用、markdown 解析、输出整理 | ||
| 25 | +- [product_enrich_prompts.py](/data/saas-search/indexer/product_enrich_prompts.py) | ||
| 26 | + prompt 模板与 taxonomy profile 配置 | ||
| 27 | +- [document_transformer.py](/data/saas-search/indexer/document_transformer.py) | ||
| 28 | + 在内部索引构建链路中调用内容富化模块,把结果回填到 ES doc | ||
| 29 | +- [taxonomy.md](/data/saas-search/indexer/taxonomy.md) | ||
| 30 | + taxonomy 设计说明与字段清单 | ||
| 31 | + | ||
| 32 | +## 3. 对外入口 | ||
| 33 | + | ||
| 34 | +### 3.1 Python 入口 | ||
| 35 | + | ||
| 36 | +核心入口: | ||
| 37 | + | ||
| 38 | +```python | ||
| 39 | +build_index_content_fields( | ||
| 40 | + items, | ||
| 41 | + tenant_id=None, | ||
| 42 | + enrichment_scopes=None, | ||
| 43 | + category_taxonomy_profile=None, | ||
| 44 | +) | ||
| 45 | +``` | ||
| 46 | + | ||
| 47 | +输入最小要求: | ||
| 48 | + | ||
| 49 | +- `id` 或 `spu_id` | ||
| 50 | +- `title` | ||
| 51 | + | ||
| 52 | +可选输入: | ||
| 53 | + | ||
| 54 | +- `brief` | ||
| 55 | +- `description` | ||
| 56 | +- `image_url` | ||
| 57 | + | ||
| 58 | +关键参数: | ||
| 59 | + | ||
| 60 | +- `enrichment_scopes` | ||
| 61 | + 可选 `generic`、`category_taxonomy` | ||
| 62 | +- `category_taxonomy_profile` | ||
| 63 | + taxonomy profile;默认 `apparel` | ||
| 64 | + | ||
| 65 | +### 3.2 HTTP 入口 | ||
| 66 | + | ||
| 67 | +API 路由: | ||
| 68 | + | ||
| 69 | +- `POST /indexer/enrich-content` | ||
| 70 | + | ||
| 71 | +对应文档: | ||
| 72 | + | ||
| 73 | +- [搜索API对接指南-05-索引接口(Indexer)](/data/saas-search/docs/搜索API对接指南-05-索引接口(Indexer).md) | ||
| 74 | +- [搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation)](/data/saas-search/docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md) | ||
| 75 | + | ||
| 76 | +## 4. 输出结构 | ||
| 77 | + | ||
| 78 | +返回结果与 ES mapping 对齐: | ||
| 79 | + | ||
| 80 | +```json | ||
| 81 | +{ | ||
| 82 | + "id": "223167", | ||
| 83 | + "qanchors": { | ||
| 84 | + "zh": ["短袖T恤", "纯棉"], | ||
| 85 | + "en": ["t-shirt", "cotton"] | ||
| 86 | + }, | ||
| 87 | + "enriched_tags": { | ||
| 88 | + "zh": ["短袖", "纯棉"], | ||
| 89 | + "en": ["short sleeve", "cotton"] | ||
| 90 | + }, | ||
| 91 | + "enriched_attributes": [ | ||
| 92 | + { | ||
| 93 | + "name": "enriched_tags", | ||
| 94 | + "value": { | ||
| 95 | + "zh": ["短袖", "纯棉"], | ||
| 96 | + "en": ["short sleeve", "cotton"] | ||
| 97 | + } | ||
| 98 | + } | ||
| 99 | + ], | ||
| 100 | + "enriched_taxonomy_attributes": [ | ||
| 101 | + { | ||
| 102 | + "name": "Product Type", | ||
| 103 | + "value": { | ||
| 104 | + "zh": ["T恤"], | ||
| 105 | + "en": ["t-shirt"] | ||
| 106 | + } | ||
| 107 | + } | ||
| 108 | + ] | ||
| 109 | +} | ||
| 110 | +``` | ||
| 111 | + | ||
| 112 | +说明: | ||
| 113 | + | ||
| 114 | +- `generic` 部分固定输出核心索引语言 `zh`、`en` | ||
| 115 | +- `taxonomy` 部分同样统一输出 `zh`、`en` | ||
| 116 | + | ||
| 117 | +## 5. Taxonomy profile | ||
| 118 | + | ||
| 119 | +当前支持: | ||
| 120 | + | ||
| 121 | +- `apparel` | ||
| 122 | +- `3c` | ||
| 123 | +- `bags` | ||
| 124 | +- `pet_supplies` | ||
| 125 | +- `electronics` | ||
| 126 | +- `outdoor` | ||
| 127 | +- `home_appliances` | ||
| 128 | +- `home_living` | ||
| 129 | +- `wigs` | ||
| 130 | +- `beauty` | ||
| 131 | +- `accessories` | ||
| 132 | +- `toys` | ||
| 133 | +- `shoes` | ||
| 134 | +- `sports` | ||
| 135 | +- `others` | ||
| 136 | + | ||
| 137 | +统一约束: | ||
| 138 | + | ||
| 139 | +- 所有 profile 都返回 `zh` + `en` | ||
| 140 | +- profile 只决定 taxonomy 字段集合,不再决定输出语言 | ||
| 141 | +- 所有 profile 都配置中英文字段名,prompt/header 结构保持一致 | ||
| 142 | + | ||
| 143 | +## 6. 内部索引链路的当前约束 | ||
| 144 | + | ||
| 145 | +在内部 ES 文档构建链路里,`document_transformer` 当前调用内容富化时,taxonomy profile 暂时固定传: | ||
| 146 | + | ||
| 147 | +```python | ||
| 148 | +category_taxonomy_profile="apparel" | ||
| 149 | +``` | ||
| 150 | + | ||
| 151 | +这是一种显式、可控、代码更干净的临时策略。 | ||
| 152 | + | ||
| 153 | +当前代码里已保留 TODO: | ||
| 154 | + | ||
| 155 | +- 后续从数据库读取租户真实所属行业 | ||
| 156 | +- 再用该行业替换固定的 `apparel` | ||
| 157 | + | ||
| 158 | +当前不做“根据商品类目文本自动猜 profile”的隐式逻辑,避免增加冗余代码与不必要的不确定性。 | ||
| 159 | + | ||
| 160 | +## 7. 缓存与批处理 | ||
| 161 | + | ||
| 162 | +缓存键由以下信息共同决定: | ||
| 163 | + | ||
| 164 | +- `analysis_kind` | ||
| 165 | +- `target_lang` | ||
| 166 | +- prompt/schema 版本指纹 | ||
| 167 | +- prompt 实际输入文本 | ||
| 168 | + | ||
| 169 | +批处理规则: | ||
| 170 | + | ||
| 171 | +- 单次 LLM 调用最多 20 条 | ||
| 172 | +- 上层允许传更大批次,模块内部自动拆批 | ||
| 173 | +- uncached batch 可并发执行 |
indexer/taxonomy.md
| @@ -175,8 +175,7 @@ Input product list: | @@ -175,8 +175,7 @@ Input product list: | ||
| 175 | ## 2. Other taxonomy profiles | 175 | ## 2. Other taxonomy profiles |
| 176 | 176 | ||
| 177 | 说明: | 177 | 说明: |
| 178 | -- `apparel` 继续返回 `zh` + `en`。 | ||
| 179 | -- 其他 profile 只返回 `en`,并且只定义英文列名。 | 178 | +- 所有 profile 统一返回 `zh` + `en`。 |
| 180 | - 代码中的 profile slug 与下面保持一致。 | 179 | - 代码中的 profile slug 与下面保持一致。 |
| 181 | 180 | ||
| 182 | | Profile | Core columns (`en`) | | 181 | | Profile | Core columns (`en`) | |
tests/test_llm_enrichment_batch_fill.py
| @@ -10,8 +10,14 @@ from indexer.document_transformer import SPUDocumentTransformer | @@ -10,8 +10,14 @@ from indexer.document_transformer import SPUDocumentTransformer | ||
| 10 | def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | 10 | def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): |
| 11 | seen_calls: List[Dict[str, Any]] = [] | 11 | seen_calls: List[Dict[str, Any]] = [] |
| 12 | 12 | ||
| 13 | - def _fake_build_index_content_fields(items, tenant_id=None): | ||
| 14 | - seen_calls.append({"n": len(items), "tenant_id": tenant_id}) | 13 | + def _fake_build_index_content_fields(items, tenant_id=None, category_taxonomy_profile=None): |
| 14 | + seen_calls.append( | ||
| 15 | + { | ||
| 16 | + "n": len(items), | ||
| 17 | + "tenant_id": tenant_id, | ||
| 18 | + "category_taxonomy_profile": category_taxonomy_profile, | ||
| 19 | + } | ||
| 20 | + ) | ||
| 15 | return [ | 21 | return [ |
| 16 | { | 22 | { |
| 17 | "id": item["id"], | 23 | "id": item["id"], |
| @@ -53,7 +59,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | @@ -53,7 +59,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): | ||
| 53 | 59 | ||
| 54 | transformer.fill_llm_attributes_batch(docs, rows) | 60 | transformer.fill_llm_attributes_batch(docs, rows) |
| 55 | 61 | ||
| 56 | - assert seen_calls == [{"n": 45, "tenant_id": "162"}] | 62 | + assert seen_calls == [{"n": 45, "tenant_id": "162", "category_taxonomy_profile": "apparel"}] |
| 57 | 63 | ||
| 58 | assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"] | 64 | assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"] |
| 59 | assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] | 65 | assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] |
tests/test_product_enrich_partial_mode.py
| @@ -559,15 +559,7 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() | @@ -559,15 +559,7 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() | ||
| 559 | ], | 559 | ], |
| 560 | } | 560 | } |
| 561 | ] | 561 | ] |
| 562 | - | ||
| 563 | - | ||
| 564 | -def test_detect_category_taxonomy_profile_matches_category_hints(): | ||
| 565 | - assert product_enrich.detect_category_taxonomy_profile({"category1_name": "玩具"}) == "toys" | ||
| 566 | - assert product_enrich.detect_category_taxonomy_profile({"category": "Beauty & Cosmetics"}) == "beauty" | ||
| 567 | - assert product_enrich.detect_category_taxonomy_profile({"category_path": "Home Appliances / Kitchen"}) == "home_appliances" | ||
| 568 | - | ||
| 569 | - | ||
| 570 | -def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_apparel_returns_en_only(): | 562 | +def test_build_index_content_fields_non_apparel_taxonomy_returns_en_only(): |
| 571 | seen_calls = [] | 563 | seen_calls = [] |
| 572 | 564 | ||
| 573 | def fake_analyze_products( | 565 | def fake_analyze_products( |
| @@ -580,40 +572,6 @@ def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_appa | @@ -580,40 +572,6 @@ def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_appa | ||
| 580 | ): | 572 | ): |
| 581 | seen_calls.append((analysis_kind, target_lang, category_taxonomy_profile, tuple(p["id"] for p in products))) | 573 | seen_calls.append((analysis_kind, target_lang, category_taxonomy_profile, tuple(p["id"] for p in products))) |
| 582 | if analysis_kind == "taxonomy": | 574 | if analysis_kind == "taxonomy": |
| 583 | - if category_taxonomy_profile == "apparel": | ||
| 584 | - return [ | ||
| 585 | - { | ||
| 586 | - "id": products[0]["id"], | ||
| 587 | - "lang": target_lang, | ||
| 588 | - "title_input": products[0]["title"], | ||
| 589 | - "product_type": f"{target_lang}-dress", | ||
| 590 | - "target_gender": f"{target_lang}-women", | ||
| 591 | - "age_group": "", | ||
| 592 | - "season": "", | ||
| 593 | - "fit": "", | ||
| 594 | - "silhouette": "", | ||
| 595 | - "neckline": "", | ||
| 596 | - "sleeve_length_type": "", | ||
| 597 | - "sleeve_style": "", | ||
| 598 | - "strap_type": "", | ||
| 599 | - "rise_waistline": "", | ||
| 600 | - "leg_shape": "", | ||
| 601 | - "skirt_shape": "", | ||
| 602 | - "length_type": "", | ||
| 603 | - "closure_type": "", | ||
| 604 | - "design_details": "", | ||
| 605 | - "fabric": "", | ||
| 606 | - "material_composition": "", | ||
| 607 | - "fabric_properties": "", | ||
| 608 | - "clothing_features": "", | ||
| 609 | - "functional_benefits": "", | ||
| 610 | - "color": "", | ||
| 611 | - "color_family": "", | ||
| 612 | - "print_pattern": "", | ||
| 613 | - "occasion_end_use": "", | ||
| 614 | - "style_aesthetic": "", | ||
| 615 | - } | ||
| 616 | - ] | ||
| 617 | assert category_taxonomy_profile == "toys" | 575 | assert category_taxonomy_profile == "toys" |
| 618 | assert target_lang == "en" | 576 | assert target_lang == "en" |
| 619 | return [ | 577 | return [ |
| @@ -655,21 +613,30 @@ def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_appa | @@ -655,21 +613,30 @@ def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_appa | ||
| 655 | 613 | ||
| 656 | with mock.patch.object(product_enrich, "analyze_products", side_effect=fake_analyze_products): | 614 | with mock.patch.object(product_enrich, "analyze_products", side_effect=fake_analyze_products): |
| 657 | result = product_enrich.build_index_content_fields( | 615 | result = product_enrich.build_index_content_fields( |
| 658 | - items=[ | ||
| 659 | - {"spu_id": "1", "title": "dress", "category_taxonomy_profile": "apparel"}, | ||
| 660 | - {"spu_id": "2", "title": "toy", "category_taxonomy_profile": "toys"}, | ||
| 661 | - ], | 616 | + items=[{"spu_id": "2", "title": "toy"}], |
| 662 | tenant_id="170", | 617 | tenant_id="170", |
| 663 | - category_taxonomy_profile="apparel", | 618 | + category_taxonomy_profile="toys", |
| 664 | ) | 619 | ) |
| 665 | 620 | ||
| 666 | - assert result[0]["enriched_taxonomy_attributes"] == [ | ||
| 667 | - {"name": "Product Type", "value": {"zh": ["zh-dress"], "en": ["en-dress"]}}, | ||
| 668 | - {"name": "Target Gender", "value": {"zh": ["zh-women"], "en": ["en-women"]}}, | ||
| 669 | - ] | ||
| 670 | - assert result[1]["enriched_taxonomy_attributes"] == [ | ||
| 671 | - {"name": "Product Type", "value": {"en": ["doll set"]}}, | ||
| 672 | - {"name": "Age Group", "value": {"en": ["kids"]}}, | 621 | + assert result == [ |
| 622 | + { | ||
| 623 | + "id": "2", | ||
| 624 | + "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, | ||
| 625 | + "enriched_tags": {"zh": ["zh-tag"], "en": ["en-tag"]}, | ||
| 626 | + "enriched_attributes": [ | ||
| 627 | + { | ||
| 628 | + "name": "enriched_tags", | ||
| 629 | + "value": { | ||
| 630 | + "zh": ["zh-tag"], | ||
| 631 | + "en": ["en-tag"], | ||
| 632 | + }, | ||
| 633 | + } | ||
| 634 | + ], | ||
| 635 | + "enriched_taxonomy_attributes": [ | ||
| 636 | + {"name": "Product Type", "value": {"en": ["doll set"]}}, | ||
| 637 | + {"name": "Age Group", "value": {"en": ["kids"]}}, | ||
| 638 | + ], | ||
| 639 | + } | ||
| 673 | ] | 640 | ] |
| 674 | assert ("taxonomy", "zh", "toys", ("2",)) not in seen_calls | 641 | assert ("taxonomy", "zh", "toys", ("2",)) not in seen_calls |
| 675 | assert ("taxonomy", "en", "toys", ("2",)) in seen_calls | 642 | assert ("taxonomy", "en", "toys", ("2",)) in seen_calls |