diff --git a/config/config.yaml b/config/config.yaml index ef77711..578bd67 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -114,6 +114,7 @@ field_boosts: qanchors: 1.0 enriched_tags: 1.0 enriched_attributes.value: 1.5 + enriched_taxonomy_attributes.value: 0.3 category_name_text: 2.0 category_path: 2.0 keywords: 2.0 @@ -194,6 +195,7 @@ query_config: - qanchors - enriched_tags - enriched_attributes.value + - enriched_taxonomy_attributes.value - option1_values - option2_values - option3_values @@ -252,6 +254,7 @@ query_config: # - qanchors # - enriched_tags # - enriched_attributes + # - enriched_taxonomy_attributes.value - min_price - compare_at_price - image_url diff --git a/docs/搜索API对接指南-05-索引接口(Indexer).md b/docs/搜索API对接指南-05-索引接口(Indexer).md index 0b444c5..70a65ee 100644 --- a/docs/搜索API对接指南-05-索引接口(Indexer).md +++ b/docs/搜索API对接指南-05-索引接口(Indexer).md @@ -668,9 +668,9 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ - `others` 说明: -- `apparel` 仍返回 `zh` + `en` 两种 taxonomy 值。 -- 其余 profile 的 `enriched_taxonomy_attributes.value` 只返回 `en`,以控制字段体积并保持结构简单。 -- Indexer 内部构建 ES 文档时,如果调用链没有显式指定 profile,会优先根据商品的类目字段自动推断 taxonomy profile;外部调用 `/indexer/enrich-content` 时仍以请求中的 `category_taxonomy_profile` 为准。 +- 所有 profile 的 `enriched_taxonomy_attributes.value` 都统一返回 `zh` + `en`。 +- 外部调用 `/indexer/enrich-content` 时,以请求中的 `category_taxonomy_profile` 为准。 +- 当前 Indexer 内部构建 ES 文档时,taxonomy profile 暂时固定使用 `apparel`;代码里已保留 TODO,后续从数据库读取该租户真实所属行业后再替换。 #### 请求参数 @@ -726,8 +726,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ - 接口不接受语言控制参数。 - 返回哪些语言、返回哪些语义维度,统一由 `indexer.product_enrich` 内部逻辑决定。 -- 当前为了与 `search_products` mapping 对齐,通用增强字段只包含核心索引语言 `zh`、`en`。 -- taxonomy 字段中,`apparel` 返回 `zh`、`en`;其他 profile 仅返回 `en`。 +- 当前为了与 `search_products` mapping 对齐,通用增强字段与 taxonomy 字段都统一只返回核心索引语言 `zh`、`en`。 批量请求建议: - **全量**:强烈建议 尽可能 **20 个 SPU/doc** 攒成一个批次后再请求一次。 @@ -787,7 +786,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | `results[].qanchors` | object | 与 ES `qanchors` 字段同结构,按语言键返回短语数组 | | `results[].enriched_tags` | object | 与 ES `enriched_tags` 字段同结构,按语言键返回标签数组 | | `results[].enriched_attributes` | array | 与 ES `enriched_attributes` nested 字段同结构,每项为 `{ "name", "value": { "zh"?: "...", "en"?: "..." } }` | -| `results[].enriched_taxonomy_attributes` | array | 与 ES `enriched_taxonomy_attributes` nested 字段同结构。`apparel` 每项通常为 `{ "name", "value": { "zh"?: [...], "en"?: [...] } }`;其他 profile 仅返回 `{ "name", "value": { "en": [...] } }` | +| `results[].enriched_taxonomy_attributes` | array | 与 ES `enriched_taxonomy_attributes` nested 字段同结构。每项通常为 `{ "name", "value": { "zh"?: [...], "en"?: [...] } }` | | `results[].error` | string | 若该条处理失败(如 LLM 异常),会在此字段返回错误信息 | **错误响应**: diff --git a/docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md b/docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md index b3167e1..30ee77c 100644 --- a/docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md +++ b/docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @@ -444,7 +444,7 @@ curl "http://localhost:6006/health" - **Base URL**: Indexer 服务地址,如 `http://localhost:6004` - **路径**: `POST /indexer/enrich-content` -- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`,用于拼装 ES 文档。支持通过 `enrichment_scopes` 选择执行 `generic` / `category_taxonomy`,并通过 `category_taxonomy_profile` 选择对应大类的 taxonomy prompt/profile;默认执行 `generic + category_taxonomy(apparel)`。当前支持的 taxonomy profile 包括 `apparel`、`3c`、`bags`、`pet_supplies`、`electronics`、`outdoor`、`home_appliances`、`home_living`、`wigs`、`beauty`、`accessories`、`toys`、`shoes`、`sports`、`others`。其中 `apparel` 的 taxonomy 输出为 `zh` + `en`,其余 profile 的 taxonomy 输出仅返回 `en`。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。 +- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`,用于拼装 ES 文档。支持通过 `enrichment_scopes` 选择执行 `generic` / `category_taxonomy`,并通过 `category_taxonomy_profile` 选择对应大类的 taxonomy prompt/profile;默认执行 `generic + category_taxonomy(apparel)`。当前支持的 taxonomy profile 包括 `apparel`、`3c`、`bags`、`pet_supplies`、`electronics`、`outdoor`、`home_appliances`、`home_living`、`wigs`、`beauty`、`accessories`、`toys`、`shoes`、`sports`、`others`。所有 profile 的 taxonomy 输出都统一返回 `zh` + `en`,`category_taxonomy_profile` 只决定字段集合。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。 请求/响应格式、示例及错误码见 [-05-索引接口(Indexer)](./搜索API对接指南-05-索引接口(Indexer).md#58-内容理解字段生成接口)。 diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py index e0fb19e..72dcdf3 100644 --- a/indexer/document_transformer.py +++ b/indexer/document_transformer.py @@ -259,13 +259,6 @@ class SPUDocumentTransformer: title = str(row.get("title") or "").strip() if not spu_id or not title: continue - category_path_obj = docs[i].get("category_path") or {} - resolved_category_path = "" - if isinstance(category_path_obj, dict): - resolved_category_path = next( - (str(value).strip() for value in category_path_obj.values() if str(value).strip()), - "", - ) id_to_idx[spu_id] = i items.append( { @@ -274,9 +267,6 @@ class SPUDocumentTransformer: "brief": str(row.get("brief") or "").strip(), "description": str(row.get("description") or "").strip(), "image_url": str(row.get("image_src") or "").strip(), - "category": str(row.get("category") or "").strip(), - "category_path": resolved_category_path, - "category1_name": str(docs[i].get("category1_name") or "").strip(), } ) if not items: @@ -284,7 +274,12 @@ class SPUDocumentTransformer: tenant_id = str(docs[0].get("tenant_id") or "").strip() or None try: - results = build_index_content_fields(items=items, tenant_id=tenant_id) + # TODO: 从数据库读取该 tenant 的真实行业,并据此替换当前默认的 apparel profile。 + results = build_index_content_fields( + items=items, + tenant_id=tenant_id, + category_taxonomy_profile="apparel", + ) except Exception as e: logger.warning("LLM batch attribute fill failed: %s", e) return @@ -679,6 +674,7 @@ class SPUDocumentTransformer: tenant_id = doc.get("tenant_id") try: + # TODO: 从数据库读取该 tenant 的真实行业,并据此替换当前默认的 apparel profile。 results = build_index_content_fields( items=[ { @@ -687,19 +683,10 @@ class SPUDocumentTransformer: "brief": str(spu_row.get("brief") or "").strip(), "description": str(spu_row.get("description") or "").strip(), "image_url": str(spu_row.get("image_src") or "").strip(), - "category": str(spu_row.get("category") or "").strip(), - "category_path": next( - ( - str(value).strip() - for value in (doc.get("category_path") or {}).values() - if str(value).strip() - ), - "", - ), - "category1_name": str(doc.get("category1_name") or "").strip(), } ], tenant_id=str(tenant_id), + category_taxonomy_profile="apparel", ) except Exception as e: logger.warning("LLM attribute fill failed for SPU %s: %s", spu_id, e) diff --git a/indexer/product_enrich.py b/indexer/product_enrich.py index 848287b..7dca838 100644 --- a/indexer/product_enrich.py +++ b/indexer/product_enrich.py @@ -18,7 +18,7 @@ from dataclasses import dataclass, field from collections import OrderedDict from datetime import datetime from concurrent.futures import ThreadPoolExecutor -from typing import List, Dict, Tuple, Any, Optional +from typing import List, Dict, Tuple, Any, Optional, FrozenSet import redis import requests @@ -146,8 +146,22 @@ if _missing_prompt_langs: ) -# 多值字段分隔:英文逗号、中文逗号、顿号,及历史约定的 ; | / 与空白 +# 多值字段分隔 _MULTI_VALUE_FIELD_SPLIT_RE = re.compile(r"[,、,;|/\n\t]+") +# 表格单元格中视为「无内容」的占位 +_MARKDOWN_EMPTY_CELL_LITERALS: Tuple[str, ...] = ("-","–", "—", "none", "null", "n/a", "无") +_MARKDOWN_EMPTY_CELL_TOKENS_CF: FrozenSet[str] = frozenset( + lit.casefold() for lit in _MARKDOWN_EMPTY_CELL_LITERALS +) + +def _normalize_markdown_table_cell(raw: Optional[str]) -> str: + """strip;将占位符统一视为空字符串。""" + s = str(raw or "").strip() + if not s: + return "" + if s.casefold() in _MARKDOWN_EMPTY_CELL_TOKENS_CF: + return "" + return s _CORE_INDEX_LANGUAGES = ("zh", "en") _DEFAULT_ENRICHMENT_SCOPES = ("generic", "category_taxonomy") _DEFAULT_CATEGORY_TAXONOMY_PROFILE = "apparel" @@ -195,19 +209,12 @@ class AnalysisSchema: markdown_table_headers: Dict[str, List[str]] result_fields: Tuple[str, ...] meaningful_fields: Tuple[str, ...] - output_languages: Tuple[str, ...] = ("zh", "en") cache_version: str = "v1" field_aliases: Dict[str, Tuple[str, ...]] = field(default_factory=dict) - fallback_headers: Optional[List[str]] = None quality_fields: Tuple[str, ...] = () def get_headers(self, target_lang: str) -> Optional[List[str]]: - headers = self.markdown_table_headers.get(target_lang) - if headers: - return headers - if self.fallback_headers: - return self.fallback_headers - return None + return self.markdown_table_headers.get(target_lang) _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { @@ -217,7 +224,6 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { markdown_table_headers=LANGUAGE_MARKDOWN_TABLE_HEADERS, result_fields=_CONTENT_ANALYSIS_RESULT_FIELDS, meaningful_fields=_CONTENT_ANALYSIS_MEANINGFUL_FIELDS, - output_languages=_CORE_INDEX_LANGUAGES, cache_version="v2", field_aliases=_CONTENT_ANALYSIS_FIELD_ALIASES, quality_fields=_CONTENT_ANALYSIS_QUALITY_FIELDS, @@ -225,17 +231,13 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = { } def _build_taxonomy_profile_schema(profile: str, config: Dict[str, Any]) -> AnalysisSchema: - result_fields = tuple(field["key"] for field in config["fields"]) - headers = config["markdown_table_headers"] return AnalysisSchema( name=f"taxonomy:{profile}", shared_instruction=config["shared_instruction"], - markdown_table_headers=headers, - result_fields=result_fields, - meaningful_fields=result_fields, - output_languages=tuple(config["output_languages"]), + markdown_table_headers=config["markdown_table_headers"], + result_fields=tuple(field["key"] for field in config["fields"]), + meaningful_fields=tuple(field["key"] for field in config["fields"]), cache_version="v1", - fallback_headers=headers.get("en") if len(headers) > 1 else None, ) @@ -254,29 +256,6 @@ def get_supported_category_taxonomy_profiles() -> Tuple[str, ...]: return tuple(_CATEGORY_TAXONOMY_PROFILE_SCHEMAS.keys()) -def _normalize_category_hint(text: Any) -> str: - value = str(text or "").strip().lower() - if not value: - return "" - value = value.replace("_", " ").replace(">", " ").replace("/", " ") - value = re.sub(r"\s+", " ", value) - return value - - -_CATEGORY_TAXONOMY_PROFILE_ALIAS_MATCHERS: Tuple[Tuple[str, str], ...] = tuple( - sorted( - ( - (_normalize_category_hint(alias), profile) - for profile, config in CATEGORY_TAXONOMY_PROFILES.items() - for alias in (profile, *tuple(config.get("aliases") or ())) - if _normalize_category_hint(alias) - ), - key=lambda item: len(item[0]), - reverse=True, - ) -) - - def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str] = None) -> str: profile = str(category_taxonomy_profile or _DEFAULT_CATEGORY_TAXONOMY_PROFILE).strip() if profile not in _CATEGORY_TAXONOMY_PROFILE_SCHEMAS: @@ -287,41 +266,6 @@ def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str return profile -def detect_category_taxonomy_profile(item: Dict[str, Any]) -> Optional[str]: - """ - 根据商品已有类目信息猜测 taxonomy profile。 - 未命中时返回 None,由上层决定是否回退到默认 profile。 - """ - category_hints = ( - item.get("category_taxonomy_profile"), - item.get("category1_name"), - item.get("category_name_text"), - item.get("category"), - item.get("category_path"), - ) - for hint in category_hints: - normalized_hint = _normalize_category_hint(hint) - if not normalized_hint: - continue - for alias, profile in _CATEGORY_TAXONOMY_PROFILE_ALIAS_MATCHERS: - if alias and alias in normalized_hint: - return profile - return None - - -def _resolve_category_taxonomy_profile( - item: Dict[str, Any], - fallback_profile: Optional[str] = None, -) -> str: - explicit_profile = str(item.get("category_taxonomy_profile") or "").strip() - if explicit_profile: - return _normalize_category_taxonomy_profile(explicit_profile) - detected_profile = detect_category_taxonomy_profile(item) - if detected_profile: - return detected_profile - return _normalize_category_taxonomy_profile(fallback_profile) - - def _get_analysis_schema( analysis_kind: str, *, @@ -342,17 +286,6 @@ def _get_taxonomy_attribute_field_map( return _CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS[profile] -def _get_analysis_output_languages( - analysis_kind: str, - *, - category_taxonomy_profile: Optional[str] = None, -) -> Tuple[str, ...]: - return _get_analysis_schema( - analysis_kind, - category_taxonomy_profile=category_taxonomy_profile, - ).output_languages - - def _normalize_enrichment_scopes( enrichment_scopes: Optional[List[str]] = None, ) -> Tuple[str, ...]: @@ -562,11 +495,6 @@ def _normalize_index_content_item(item: Dict[str, Any]) -> Dict[str, str]: "brief": str(item.get("brief") or "").strip(), "description": str(item.get("description") or "").strip(), "image_url": str(item.get("image_url") or "").strip(), - "category": str(item.get("category") or "").strip(), - "category_path": str(item.get("category_path") or "").strip(), - "category_name_text": str(item.get("category_name_text") or "").strip(), - "category1_name": str(item.get("category1_name") or "").strip(), - "category_taxonomy_profile": str(item.get("category_taxonomy_profile") or "").strip(), } @@ -584,8 +512,7 @@ def build_index_content_fields( - `title` - 可选 `brief` / `description` / `image_url` - 可选 `enrichment_scopes`,默认同时执行 `generic` 与 `category_taxonomy` - - 可选 `category_taxonomy_profile`;若不传,则优先根据 item 自带的类目字段推断,否则回退到默认 `apparel` - - 可选类目提示字段:`category` / `category_path` / `category_name_text` / `category1_name` + - 可选 `category_taxonomy_profile`,默认 `apparel` 返回项结构: - `id` @@ -600,21 +527,10 @@ def build_index_content_fields( - `enriched_tags.{lang}` 为标签数组 """ requested_enrichment_scopes = _normalize_enrichment_scopes(enrichment_scopes) - fallback_taxonomy_profile = ( - _normalize_category_taxonomy_profile(category_taxonomy_profile) - if category_taxonomy_profile - else None - ) + normalized_taxonomy_profile = _normalize_category_taxonomy_profile(category_taxonomy_profile) normalized_items = [_normalize_index_content_item(item) for item in items] if not normalized_items: return [] - taxonomy_profile_by_id = { - item["id"]: _resolve_category_taxonomy_profile( - item, - fallback_profile=fallback_taxonomy_profile, - ) - for item in normalized_items - } results_by_id: Dict[str, Dict[str, Any]] = { item["id"]: { @@ -627,7 +543,7 @@ def build_index_content_fields( for item in normalized_items } - for lang in _get_analysis_output_languages("content"): + for lang in _CORE_INDEX_LANGUAGES: if "generic" in requested_enrichment_scopes: try: rows = analyze_products( @@ -636,7 +552,7 @@ def build_index_content_fields( batch_size=BATCH_SIZE, tenant_id=tenant_id, analysis_kind="content", - category_taxonomy_profile=fallback_taxonomy_profile, + category_taxonomy_profile=normalized_taxonomy_profile, ) except Exception as e: logger.warning("build_index_content_fields content enrichment failed for lang=%s: %s", lang, e) @@ -654,48 +570,40 @@ def build_index_content_fields( _apply_index_content_row(results_by_id[item_id], row=row, lang=lang) if "category_taxonomy" in requested_enrichment_scopes: - items_by_profile: Dict[str, List[Dict[str, str]]] = {} - for item in normalized_items: - items_by_profile.setdefault(taxonomy_profile_by_id[item["id"]], []).append(item) - - for taxonomy_profile, profile_items in items_by_profile.items(): - for lang in _get_analysis_output_languages( - "taxonomy", - category_taxonomy_profile=taxonomy_profile, - ): - try: - taxonomy_rows = analyze_products( - products=profile_items, - target_lang=lang, - batch_size=BATCH_SIZE, - tenant_id=tenant_id, - analysis_kind="taxonomy", - category_taxonomy_profile=taxonomy_profile, - ) - except Exception as e: - logger.warning( - "build_index_content_fields taxonomy enrichment failed for profile=%s lang=%s: %s", - taxonomy_profile, - lang, - e, - ) - for item in profile_items: - results_by_id[item["id"]].setdefault("error", str(e)) - continue + for lang in _CORE_INDEX_LANGUAGES: + try: + taxonomy_rows = analyze_products( + products=normalized_items, + target_lang=lang, + batch_size=BATCH_SIZE, + tenant_id=tenant_id, + analysis_kind="taxonomy", + category_taxonomy_profile=normalized_taxonomy_profile, + ) + except Exception as e: + logger.warning( + "build_index_content_fields taxonomy enrichment failed for profile=%s lang=%s: %s", + normalized_taxonomy_profile, + lang, + e, + ) + for item in normalized_items: + results_by_id[item["id"]].setdefault("error", str(e)) + continue - for row in taxonomy_rows or []: - item_id = str(row.get("id") or "").strip() - if not item_id or item_id not in results_by_id: - continue - if row.get("error"): - results_by_id[item_id].setdefault("error", row["error"]) - continue - _apply_index_taxonomy_row( - results_by_id[item_id], - row=row, - lang=lang, - category_taxonomy_profile=taxonomy_profile, - ) + for row in taxonomy_rows or []: + item_id = str(row.get("id") or "").strip() + if not item_id or item_id not in results_by_id: + continue + if row.get("error"): + results_by_id[item_id].setdefault("error", row["error"]) + continue + _apply_index_taxonomy_row( + results_by_id[item_id], + row=row, + lang=lang, + category_taxonomy_profile=normalized_taxonomy_profile, + ) return [results_by_id[item["id"]] for item in normalized_items] @@ -1173,7 +1081,8 @@ def parse_markdown_table( if len(parts) >= 2: row = {"seq_no": parts[0]} for field_index, field_name in enumerate(schema.result_fields, start=1): - row[field_name] = parts[field_index] if len(parts) > field_index else "" + cell = parts[field_index] if len(parts) > field_index else "" + row[field_name] = _normalize_markdown_table_cell(cell) data.append(row) return data diff --git a/indexer/product_enrich_prompts.py b/indexer/product_enrich_prompts.py index 8c328cd..6c2d05a 100644 --- a/indexer/product_enrich_prompts.py +++ b/indexer/product_enrich_prompts.py @@ -60,11 +60,9 @@ def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[st "", "Rules:", "- Keep the same row order and row count as input.", - "- Infer only from the provided product text.", - "- Leave blank if not applicable or not reasonably supported.", + "- Leave blank if not applicable, unmentioned, or unsupported.", "- Use concise, standardized ecommerce wording.", - "- Do not combine different attribute dimensions in one field.", - "- If multiple values are needed, use the delimiter required by the localization setting.", + "- If multiple values, separate with commas.", "", "Input product list:", ] @@ -75,19 +73,14 @@ def _build_taxonomy_shared_instruction(profile_label: str, fields: Tuple[Dict[st def _make_taxonomy_profile( profile_label: str, fields: Tuple[Dict[str, str], ...], - *, - aliases: Tuple[str, ...], - output_languages: Tuple[str, ...] = ("en",), - zh_headers: Tuple[str, ...] = (), ) -> Dict[str, Any]: - headers = {"en": ["No.", *[field["label"] for field in fields]]} - if zh_headers: - headers["zh"] = ["序号", *zh_headers] + headers = { + "en": ["No.", *[field["label"] for field in fields]], + "zh": ["序号", *[field["zh_label"] for field in fields]], + } return { "profile_label": profile_label, "fields": fields, - "aliases": aliases, - "output_languages": output_languages, "shared_instruction": _build_taxonomy_shared_instruction(profile_label, fields), "markdown_table_headers": headers, } @@ -123,268 +116,250 @@ APPAREL_TAXONOMY_FIELDS = ( ) THREE_C_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise 3C accessory or peripheral category label"), - _taxonomy_field("compatible_device", "Compatible Device / Model", "supported device family, series, model, or form factor when clearly stated"), - _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, wireless, Bluetooth, Wi-Fi, NFC, or 2.4G"), - _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant connector or port, e.g. USB-C, Lightning, HDMI, AUX, RJ45"), - _taxonomy_field("power_charging", "Power Source / Charging", "charging or power mode, e.g. battery powered, fast charging, rechargeable, plug-in"), - _taxonomy_field("key_features", "Key Features", "primary hardware features such as noise cancelling, foldable, magnetic, backlit, waterproof"), - _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("pack_size", "Pack Size", "unit count or bundle size when stated"), - _taxonomy_field("use_case", "Use Case", "intended usage such as travel, office, gaming, car, charging, streaming"), + _taxonomy_field("product_type", "Product Type", "concise 3C accessory or peripheral category label", "品类"), + _taxonomy_field("compatible_device", "Compatible Device / Model", "supported device family, series, model, or form factor when clearly stated", "适配设备 / 型号"), + _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, wireless, Bluetooth, Wi-Fi, NFC, or 2.4G", "连接方式"), + _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant connector or port, e.g. USB-C, Lightning, HDMI, AUX, RJ45", "接口 / 端口类型"), + _taxonomy_field("power_charging", "Power Source / Charging", "charging or power mode, e.g. battery powered, fast charging, rechargeable, plug-in", "供电 / 充电方式"), + _taxonomy_field("key_features", "Key Features", "primary hardware features such as noise cancelling, foldable, magnetic, backlit, waterproof", "关键特征"), + _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported", "材质 / 表面处理"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("pack_size", "Pack Size", "unit count or bundle size when stated", "包装规格"), + _taxonomy_field("use_case", "Use Case", "intended usage such as travel, office, gaming, car, charging, streaming", "使用场景"), ) BAGS_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise bag category such as backpack, tote bag, crossbody bag, luggage, or wallet"), - _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied"), - _taxonomy_field("carry_style", "Carry Style", "how the bag is worn or carried, e.g. handheld, shoulder, crossbody, backpack"), - _taxonomy_field("size_capacity", "Size / Capacity", "size tier or capacity when supported, e.g. mini, large capacity, 20L"), - _taxonomy_field("material", "Material", "main bag material such as leather, nylon, canvas, PU, straw"), - _taxonomy_field("closure_type", "Closure Type", "bag closure such as zipper, flap, buckle, drawstring, magnetic snap"), - _taxonomy_field("structure_compartments", "Structure / Compartments", "organizational structure such as multi-pocket, laptop sleeve, card slots, expandable"), - _taxonomy_field("strap_handle_type", "Strap / Handle Type", "strap or handle design such as chain strap, top handle, adjustable strap"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as commute, travel, evening, school, casual"), + _taxonomy_field("product_type", "Product Type", "concise bag category such as backpack, tote bag, crossbody bag, luggage, or wallet", "品类"), + _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"), + _taxonomy_field("carry_style", "Carry Style", "how the bag is worn or carried, e.g. handheld, shoulder, crossbody, backpack", "携带方式"), + _taxonomy_field("size_capacity", "Size / Capacity", "size tier or capacity when supported, e.g. mini, large capacity, 20L", "尺寸 / 容量"), + _taxonomy_field("material", "Material", "main bag material such as leather, nylon, canvas, PU, straw", "材质"), + _taxonomy_field("closure_type", "Closure Type", "bag closure such as zipper, flap, buckle, drawstring, magnetic snap", "闭合方式"), + _taxonomy_field("structure_compartments", "Structure / Compartments", "organizational structure such as multi-pocket, laptop sleeve, card slots, expandable", "结构 / 分层"), + _taxonomy_field("strap_handle_type", "Strap / Handle Type", "strap or handle design such as chain strap, top handle, adjustable strap", "肩带 / 提手类型"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as commute, travel, evening, school, casual", "适用场景"), ) PET_SUPPLIES_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise pet supplies category label"), - _taxonomy_field("pet_type", "Pet Type", "target pet such as dog, cat, bird, fish, hamster"), - _taxonomy_field("breed_size", "Breed Size", "pet size or breed size when stated, e.g. small breed, large dogs"), - _taxonomy_field("life_stage", "Life Stage", "pet age stage when supported, e.g. puppy, kitten, adult, senior"), - _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredient composition when supported"), - _taxonomy_field("flavor_scent", "Flavor / Scent", "flavor or scent when applicable"), - _taxonomy_field("key_features", "Key Features", "primary attributes such as interactive, leak-proof, orthopedic, washable, elevated"), - _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as dental care, calming, digestion support, joint support"), - _taxonomy_field("size_capacity", "Size / Capacity", "size, count, or net content when stated"), - _taxonomy_field("use_scenario", "Use Scenario", "usage such as feeding, training, grooming, travel, indoor play"), + _taxonomy_field("product_type", "Product Type", "concise pet supplies category label", "品类"), + _taxonomy_field("pet_type", "Pet Type", "target pet such as dog, cat, bird, fish, hamster", "宠物类型"), + _taxonomy_field("breed_size", "Breed Size", "pet size or breed size when stated, e.g. small breed, large dogs", "体型 / 品种大小"), + _taxonomy_field("life_stage", "Life Stage", "pet age stage when supported, e.g. puppy, kitten, adult, senior", "成长阶段"), + _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredient composition when supported", "材质 / 成分"), + _taxonomy_field("flavor_scent", "Flavor / Scent", "flavor or scent when applicable", "口味 / 气味"), + _taxonomy_field("key_features", "Key Features", "primary attributes such as interactive, leak-proof, orthopedic, washable, elevated", "关键特征"), + _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as dental care, calming, digestion support, joint support", "功能"), + _taxonomy_field("size_capacity", "Size / Capacity", "size, count, or net content when stated", "尺寸 / 容量"), + _taxonomy_field("use_scenario", "Use Scenario", "usage such as feeding, training, grooming, travel, indoor play", "使用场景"), ) ELECTRONICS_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise electronics device or component category label"), - _taxonomy_field("device_category", "Device Category / Compatibility", "supported platform, component class, or compatible device family when stated"), - _taxonomy_field("power_voltage", "Power / Voltage", "power, voltage, wattage, or battery spec when supported"), - _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, Bluetooth, Wi-Fi, RF, or smart app control"), - _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant port or interface such as USB-C, AC plug type, HDMI, SATA"), - _taxonomy_field("capacity_storage", "Capacity / Storage", "capacity or storage spec such as 256GB, 2TB, 5000mAh"), - _taxonomy_field("key_features", "Key Features", "main product features such as touch control, HD display, noise reduction, smart control"), - _taxonomy_field("material_finish", "Material / Finish", "main housing material or finish when supported"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("use_case", "Use Case", "intended use such as home entertainment, office, charging, security, repair"), + _taxonomy_field("product_type", "Product Type", "concise electronics device or component category label", "品类"), + _taxonomy_field("device_category", "Device Category / Compatibility", "supported platform, component class, or compatible device family when stated", "设备类别 / 兼容性"), + _taxonomy_field("power_voltage", "Power / Voltage", "power, voltage, wattage, or battery spec when supported", "功率 / 电压"), + _taxonomy_field("connectivity", "Connectivity", "connection method such as wired, Bluetooth, Wi-Fi, RF, or smart app control", "连接方式"), + _taxonomy_field("interface_port_type", "Interface / Port Type", "relevant port or interface such as USB-C, AC plug type, HDMI, SATA", "接口 / 端口类型"), + _taxonomy_field("capacity_storage", "Capacity / Storage", "capacity or storage spec such as 256GB, 2TB, 5000mAh", "容量 / 存储"), + _taxonomy_field("key_features", "Key Features", "main product features such as touch control, HD display, noise reduction, smart control", "关键特征"), + _taxonomy_field("material_finish", "Material / Finish", "main housing material or finish when supported", "材质 / 表面处理"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("use_case", "Use Case", "intended use such as home entertainment, office, charging, security, repair", "使用场景"), ) OUTDOOR_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise outdoor gear category label"), - _taxonomy_field("activity_type", "Activity Type", "primary outdoor activity such as camping, hiking, fishing, climbing, travel"), - _taxonomy_field("season_weather", "Season / Weather", "season or weather suitability when supported"), - _taxonomy_field("material", "Material", "main material such as aluminum, ripstop nylon, stainless steel, EVA"), - _taxonomy_field("capacity_size", "Capacity / Size", "size, length, or capacity when stated"), - _taxonomy_field("protection_resistance", "Protection / Resistance", "resistance or protection such as waterproof, UV resistant, windproof"), - _taxonomy_field("key_features", "Key Features", "primary gear attributes such as foldable, lightweight, insulated, non-slip"), - _taxonomy_field("portability_packability", "Portability / Packability", "carry or storage trait such as collapsible, compact, ultralight, packable"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("use_scenario", "Use Scenario", "likely use setting such as campsite, trail, survival kit, beach, picnic"), + _taxonomy_field("product_type", "Product Type", "concise outdoor gear category label", "品类"), + _taxonomy_field("activity_type", "Activity Type", "primary outdoor activity such as camping, hiking, fishing, climbing, travel", "活动类型"), + _taxonomy_field("season_weather", "Season / Weather", "season or weather suitability when supported", "适用季节 / 天气"), + _taxonomy_field("material", "Material", "main material such as aluminum, ripstop nylon, stainless steel, EVA", "材质"), + _taxonomy_field("capacity_size", "Capacity / Size", "size, length, or capacity when stated", "容量 / 尺寸"), + _taxonomy_field("protection_resistance", "Protection / Resistance", "resistance or protection such as waterproof, UV resistant, windproof", "防护 / 耐受性"), + _taxonomy_field("key_features", "Key Features", "primary gear attributes such as foldable, lightweight, insulated, non-slip", "关键特征"), + _taxonomy_field("portability_packability", "Portability / Packability", "carry or storage trait such as collapsible, compact, ultralight, packable", "便携 / 收纳性"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("use_scenario", "Use Scenario", "likely use setting such as campsite, trail, survival kit, beach, picnic", "使用场景"), ) HOME_APPLIANCES_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise home appliance category label"), - _taxonomy_field("appliance_category", "Appliance Category", "functional class such as kitchen appliance, cleaning appliance, personal care appliance"), - _taxonomy_field("power_voltage", "Power / Voltage", "wattage, voltage, plug type, or power supply when supported"), - _taxonomy_field("capacity_coverage", "Capacity / Coverage", "capacity or coverage metric such as 1.5L, 20L, 40sqm"), - _taxonomy_field("control_method", "Control Method", "operation method such as touch, knob, remote, app control"), - _taxonomy_field("installation_type", "Installation Type", "setup style such as countertop, handheld, portable, wall-mounted, built-in"), - _taxonomy_field("key_features", "Key Features", "main product features such as timer, steam, HEPA filter, self-cleaning"), - _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as cooking, cleaning, grooming, cooling, air treatment"), + _taxonomy_field("product_type", "Product Type", "concise home appliance category label", "品类"), + _taxonomy_field("appliance_category", "Appliance Category", "functional class such as kitchen appliance, cleaning appliance, personal care appliance", "家电类别"), + _taxonomy_field("power_voltage", "Power / Voltage", "wattage, voltage, plug type, or power supply when supported", "功率 / 电压"), + _taxonomy_field("capacity_coverage", "Capacity / Coverage", "capacity or coverage metric such as 1.5L, 20L, 40sqm", "容量 / 覆盖范围"), + _taxonomy_field("control_method", "Control Method", "operation method such as touch, knob, remote, app control", "控制方式"), + _taxonomy_field("installation_type", "Installation Type", "setup style such as countertop, handheld, portable, wall-mounted, built-in", "安装方式"), + _taxonomy_field("key_features", "Key Features", "main product features such as timer, steam, HEPA filter, self-cleaning", "关键特征"), + _taxonomy_field("material_finish", "Material / Finish", "main material or exterior finish when supported", "材质 / 表面处理"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as cooking, cleaning, grooming, cooling, air treatment", "使用场景"), ) HOME_LIVING_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise home and living category label"), - _taxonomy_field("room_placement", "Room / Placement", "intended room or placement such as bedroom, kitchen, bathroom, desktop"), - _taxonomy_field("material", "Material", "main material such as wood, ceramic, cotton, glass, metal"), - _taxonomy_field("style", "Style", "home style such as modern, farmhouse, minimalist, boho, Nordic"), - _taxonomy_field("size_dimensions", "Size / Dimensions", "size or dimensions when stated"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("pattern_finish", "Pattern / Finish", "surface pattern or finish such as solid, marble, matte, ribbed"), - _taxonomy_field("key_features", "Key Features", "main product features such as stackable, washable, blackout, space-saving"), - _taxonomy_field("assembly_installation", "Assembly / Installation", "assembly or installation trait when supported"), - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as storage, dining, decor, sleep, organization"), + _taxonomy_field("product_type", "Product Type", "concise home and living category label", "品类"), + _taxonomy_field("room_placement", "Room / Placement", "intended room or placement such as bedroom, kitchen, bathroom, desktop", "适用空间 / 摆放位置"), + _taxonomy_field("material", "Material", "main material such as wood, ceramic, cotton, glass, metal", "材质"), + _taxonomy_field("style", "Style", "home style such as modern, farmhouse, minimalist, boho, Nordic", "风格"), + _taxonomy_field("size_dimensions", "Size / Dimensions", "size or dimensions when stated", "尺寸 / 规格"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("pattern_finish", "Pattern / Finish", "surface pattern or finish such as solid, marble, matte, ribbed", "图案 / 表面处理"), + _taxonomy_field("key_features", "Key Features", "main product features such as stackable, washable, blackout, space-saving", "关键特征"), + _taxonomy_field("assembly_installation", "Assembly / Installation", "assembly or installation trait when supported", "组装 / 安装"), + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as storage, dining, decor, sleep, organization", "使用场景"), ) WIGS_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise wig or hairpiece category label"), - _taxonomy_field("hair_material", "Hair Material", "hair material such as human hair, synthetic fiber, heat-resistant fiber"), - _taxonomy_field("hair_texture", "Hair Texture", "texture or curl pattern such as straight, body wave, curly, kinky"), - _taxonomy_field("hair_length", "Hair Length", "hair length when stated"), - _taxonomy_field("hair_color", "Hair Color", "specific hair color or blend when available"), - _taxonomy_field("cap_construction", "Cap Construction", "cap type such as full lace, lace front, glueless, U part"), - _taxonomy_field("lace_area_part_type", "Lace Area / Part Type", "lace size or part style such as 13x4 lace, middle part, T part"), - _taxonomy_field("density_volume", "Density / Volume", "hair density or fullness when supported"), - _taxonomy_field("style_bang_type", "Style / Bang Type", "style cue such as bob, pixie, layered, with bangs"), - _taxonomy_field("occasion_end_use", "Occasion / End Use", "intended use such as daily wear, cosplay, protective style, party"), + _taxonomy_field("product_type", "Product Type", "concise wig or hairpiece category label", "品类"), + _taxonomy_field("hair_material", "Hair Material", "hair material such as human hair, synthetic fiber, heat-resistant fiber", "发丝材质"), + _taxonomy_field("hair_texture", "Hair Texture", "texture or curl pattern such as straight, body wave, curly, kinky", "发质纹理"), + _taxonomy_field("hair_length", "Hair Length", "hair length when stated", "发长"), + _taxonomy_field("hair_color", "Hair Color", "specific hair color or blend when available", "发色"), + _taxonomy_field("cap_construction", "Cap Construction", "cap type such as full lace, lace front, glueless, U part", "帽网结构"), + _taxonomy_field("lace_area_part_type", "Lace Area / Part Type", "lace size or part style such as 13x4 lace, middle part, T part", "蕾丝面积 / 分缝类型"), + _taxonomy_field("density_volume", "Density / Volume", "hair density or fullness when supported", "密度 / 发量"), + _taxonomy_field("style_bang_type", "Style / Bang Type", "style cue such as bob, pixie, layered, with bangs", "款式 / 刘海类型"), + _taxonomy_field("occasion_end_use", "Occasion / End Use", "intended use such as daily wear, cosplay, protective style, party", "适用场景"), ) BEAUTY_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise beauty or cosmetics category label"), - _taxonomy_field("target_area", "Target Area", "target area such as face, lips, eyes, nails, hair, body"), - _taxonomy_field("skin_hair_type", "Skin Type / Hair Type", "suitable skin or hair type when supported"), - _taxonomy_field("finish_effect", "Finish / Effect", "cosmetic finish or effect such as matte, dewy, volumizing, brightening"), - _taxonomy_field("key_ingredients", "Key Ingredients", "notable ingredients when stated"), - _taxonomy_field("shade_color", "Shade / Color", "specific shade or color when available"), - _taxonomy_field("scent", "Scent", "fragrance or scent only when supported"), - _taxonomy_field("formulation", "Formulation", "product form such as cream, serum, powder, gel, stick"), - _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as hydration, anti-aging, long-wear, repair, sun protection"), - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as daily routine, salon, travel, evening makeup"), + _taxonomy_field("product_type", "Product Type", "concise beauty or cosmetics category label", "品类"), + _taxonomy_field("target_area", "Target Area", "target area such as face, lips, eyes, nails, hair, body", "适用部位"), + _taxonomy_field("skin_hair_type", "Skin Type / Hair Type", "suitable skin or hair type when supported", "肤质 / 发质"), + _taxonomy_field("finish_effect", "Finish / Effect", "cosmetic finish or effect such as matte, dewy, volumizing, brightening", "妆效 / 效果"), + _taxonomy_field("key_ingredients", "Key Ingredients", "notable ingredients when stated", "关键成分"), + _taxonomy_field("shade_color", "Shade / Color", "specific shade or color when available", "色号 / 颜色"), + _taxonomy_field("scent", "Scent", "fragrance or scent only when supported", "香味"), + _taxonomy_field("formulation", "Formulation", "product form such as cream, serum, powder, gel, stick", "剂型 / 形态"), + _taxonomy_field("functional_benefits", "Functional Benefits", "benefits such as hydration, anti-aging, long-wear, repair, sun protection", "功能"), + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as daily routine, salon, travel, evening makeup", "使用场景"), ) ACCESSORIES_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise accessory category label such as necklace, watch, belt, hat, or sunglasses"), - _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied"), - _taxonomy_field("material", "Material", "main material such as alloy, leather, stainless steel, acetate, fabric"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("pattern_finish", "Pattern / Finish", "surface treatment or style finish such as polished, textured, braided, rhinestone"), - _taxonomy_field("closure_fastening", "Closure / Fastening", "fastening method when applicable"), - _taxonomy_field("size_fit", "Size / Fit", "size or fit information such as adjustable, one size, 42mm"), - _taxonomy_field("style", "Style", "style cue such as minimalist, vintage, statement, sporty"), - _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as daily wear, formal, party, travel, sun protection"), - _taxonomy_field("set_pack_size", "Set / Pack Size", "set count or pack size when stated"), + _taxonomy_field("product_type", "Product Type", "concise accessory category label such as necklace, watch, belt, hat, or sunglasses", "品类"), + _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"), + _taxonomy_field("material", "Material", "main material such as alloy, leather, stainless steel, acetate, fabric", "材质"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("pattern_finish", "Pattern / Finish", "surface treatment or style finish such as polished, textured, braided, rhinestone", "图案 / 表面处理"), + _taxonomy_field("closure_fastening", "Closure / Fastening", "fastening method when applicable", "闭合 / 固定方式"), + _taxonomy_field("size_fit", "Size / Fit", "size or fit information such as adjustable, one size, 42mm", "尺寸 / 适配"), + _taxonomy_field("style", "Style", "style cue such as minimalist, vintage, statement, sporty", "风格"), + _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as daily wear, formal, party, travel, sun protection", "适用场景"), + _taxonomy_field("set_pack_size", "Set / Pack Size", "set count or pack size when stated", "套装 / 规格"), ) TOYS_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise toy category label"), - _taxonomy_field("age_group", "Age Group", "intended age group when clearly implied"), - _taxonomy_field("character_theme", "Character / Theme", "licensed character, theme, or play theme when supported"), - _taxonomy_field("material", "Material", "main toy material such as plush, plastic, wood, silicone"), - _taxonomy_field("power_source", "Power Source", "battery, rechargeable, wind-up, or non-powered when supported"), - _taxonomy_field("interactive_features", "Interactive Features", "interactive functions such as sound, lights, remote control, motion"), - _taxonomy_field("educational_play_value", "Educational / Play Value", "play value such as STEM, pretend play, sensory, puzzle solving"), - _taxonomy_field("piece_count_size", "Piece Count / Size", "piece count or size when stated"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as indoor play, bath time, party favor, outdoor play"), + _taxonomy_field("product_type", "Product Type", "concise toy category label", "品类"), + _taxonomy_field("age_group", "Age Group", "intended age group when clearly implied", "年龄段"), + _taxonomy_field("character_theme", "Character / Theme", "licensed character, theme, or play theme when supported", "角色 / 主题"), + _taxonomy_field("material", "Material", "main toy material such as plush, plastic, wood, silicone", "材质"), + _taxonomy_field("power_source", "Power Source", "battery, rechargeable, wind-up, or non-powered when supported", "供电方式"), + _taxonomy_field("interactive_features", "Interactive Features", "interactive functions such as sound, lights, remote control, motion", "互动功能"), + _taxonomy_field("educational_play_value", "Educational / Play Value", "play value such as STEM, pretend play, sensory, puzzle solving", "教育 / 可玩性"), + _taxonomy_field("piece_count_size", "Piece Count / Size", "piece count or size when stated", "件数 / 尺寸"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as indoor play, bath time, party favor, outdoor play", "使用场景"), ) SHOES_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise footwear category label"), - _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied"), - _taxonomy_field("age_group", "Age Group", "only if clearly implied"), - _taxonomy_field("closure_type", "Closure Type", "fastening method such as lace-up, slip-on, buckle, hook-and-loop"), - _taxonomy_field("toe_shape", "Toe Shape", "toe shape when applicable, e.g. round toe, pointed toe, open toe"), - _taxonomy_field("heel_sole_type", "Heel Height / Sole Type", "heel or sole profile such as flat, block heel, wedge, platform, thick sole"), - _taxonomy_field("upper_material", "Upper Material", "main upper material such as leather, knit, canvas, mesh"), - _taxonomy_field("lining_insole_material", "Lining / Insole Material", "lining or insole material when supported"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as running, casual, office, hiking, formal"), + _taxonomy_field("product_type", "Product Type", "concise footwear category label", "品类"), + _taxonomy_field("target_gender", "Target Gender", "intended gender only if clearly implied", "目标性别"), + _taxonomy_field("age_group", "Age Group", "only if clearly implied", "年龄段"), + _taxonomy_field("closure_type", "Closure Type", "fastening method such as lace-up, slip-on, buckle, hook-and-loop", "闭合方式"), + _taxonomy_field("toe_shape", "Toe Shape", "toe shape when applicable, e.g. round toe, pointed toe, open toe", "鞋头形状"), + _taxonomy_field("heel_sole_type", "Heel Height / Sole Type", "heel or sole profile such as flat, block heel, wedge, platform, thick sole", "跟高 / 鞋底类型"), + _taxonomy_field("upper_material", "Upper Material", "main upper material such as leather, knit, canvas, mesh", "鞋面材质"), + _taxonomy_field("lining_insole_material", "Lining / Insole Material", "lining or insole material when supported", "里料 / 鞋垫材质"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("occasion_end_use", "Occasion / End Use", "likely use such as running, casual, office, hiking, formal", "适用场景"), ) SPORTS_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise sports product category label"), - _taxonomy_field("sport_activity", "Sport / Activity", "primary sport or activity such as fitness, yoga, basketball, cycling, swimming"), - _taxonomy_field("skill_level", "Skill Level", "target user level when supported, e.g. beginner, training, professional"), - _taxonomy_field("material", "Material", "main material such as EVA, carbon fiber, neoprene, latex"), - _taxonomy_field("size_capacity", "Size / Capacity", "size, weight, resistance level, or capacity when stated"), - _taxonomy_field("protection_support", "Protection / Support", "support or protection function such as ankle support, shock absorption, impact protection"), - _taxonomy_field("key_features", "Key Features", "main features such as anti-slip, adjustable, foldable, quick-dry"), - _taxonomy_field("power_source", "Power Source", "battery, electric, or non-powered when applicable"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("use_scenario", "Use Scenario", "intended use such as gym, home workout, field training, competition"), + _taxonomy_field("product_type", "Product Type", "concise sports product category label", "品类"), + _taxonomy_field("sport_activity", "Sport / Activity", "primary sport or activity such as fitness, yoga, basketball, cycling, swimming", "运动 / 活动"), + _taxonomy_field("skill_level", "Skill Level", "target user level when supported, e.g. beginner, training, professional", "适用水平"), + _taxonomy_field("material", "Material", "main material such as EVA, carbon fiber, neoprene, latex", "材质"), + _taxonomy_field("size_capacity", "Size / Capacity", "size, weight, resistance level, or capacity when stated", "尺寸 / 容量"), + _taxonomy_field("protection_support", "Protection / Support", "support or protection function such as ankle support, shock absorption, impact protection", "防护 / 支撑"), + _taxonomy_field("key_features", "Key Features", "main features such as anti-slip, adjustable, foldable, quick-dry", "关键特征"), + _taxonomy_field("power_source", "Power Source", "battery, electric, or non-powered when applicable", "供电方式"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("use_scenario", "Use Scenario", "intended use such as gym, home workout, field training, competition", "使用场景"), ) OTHERS_TAXONOMY_FIELDS = ( - _taxonomy_field("product_type", "Product Type", "concise product category label, not a full marketing title"), - _taxonomy_field("product_category", "Product Category", "broader retail grouping when the specific product type is narrow"), - _taxonomy_field("target_user", "Target User", "intended user, audience, or recipient when clearly implied"), - _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredients when supported"), - _taxonomy_field("key_features", "Key Features", "primary product attributes or standout features"), - _taxonomy_field("functional_benefits", "Functional Benefits", "practical benefits or performance advantages when supported"), - _taxonomy_field("size_capacity", "Size / Capacity", "size, count, weight, or capacity when stated"), - _taxonomy_field("color", "Color", "specific color name when available"), - _taxonomy_field("style_theme", "Style / Theme", "overall style, design theme, or visual direction when supported"), - _taxonomy_field("use_scenario", "Use Scenario", "likely use occasion or application setting when supported"), + _taxonomy_field("product_type", "Product Type", "concise product category label, not a full marketing title", "品类"), + _taxonomy_field("product_category", "Product Category", "broader retail grouping when the specific product type is narrow", "商品类别"), + _taxonomy_field("target_user", "Target User", "intended user, audience, or recipient when clearly implied", "适用人群"), + _taxonomy_field("material_ingredients", "Material / Ingredients", "main material or ingredients when supported", "材质 / 成分"), + _taxonomy_field("key_features", "Key Features", "primary product attributes or standout features", "关键特征"), + _taxonomy_field("functional_benefits", "Functional Benefits", "practical benefits or performance advantages when supported", "功能"), + _taxonomy_field("size_capacity", "Size / Capacity", "size, count, weight, or capacity when stated", "尺寸 / 容量"), + _taxonomy_field("color", "Color", "specific color name when available", "主颜色"), + _taxonomy_field("style_theme", "Style / Theme", "overall style, design theme, or visual direction when supported", "风格 / 主题"), + _taxonomy_field("use_scenario", "Use Scenario", "likely use occasion or application setting when supported", "使用场景"), ) CATEGORY_TAXONOMY_PROFILES: Dict[str, Dict[str, Any]] = { "apparel": _make_taxonomy_profile( "apparel", APPAREL_TAXONOMY_FIELDS, - aliases=("服装", "服饰", "apparel", "clothing", "fashion"), - output_languages=("zh", "en"), - zh_headers=tuple(field["zh_label"] for field in APPAREL_TAXONOMY_FIELDS), ), "3c": _make_taxonomy_profile( "3C", THREE_C_TAXONOMY_FIELDS, - aliases=("3c", "数码", "phone accessories", "computer peripherals", "smart wearables", "audio", "gaming gear"), ), "bags": _make_taxonomy_profile( "bags", BAGS_TAXONOMY_FIELDS, - aliases=("bags", "bag", "包", "箱包", "handbag", "backpack", "wallet", "luggage"), ), "pet_supplies": _make_taxonomy_profile( "pet supplies", PET_SUPPLIES_TAXONOMY_FIELDS, - aliases=("pet", "宠物", "pet supplies", "pet food", "pet toys", "pet care"), ), "electronics": _make_taxonomy_profile( "electronics", ELECTRONICS_TAXONOMY_FIELDS, - aliases=("electronics", "电子", "electronic components", "consumer electronics", "digital devices"), ), "outdoor": _make_taxonomy_profile( "outdoor products", OUTDOOR_TAXONOMY_FIELDS, - aliases=("outdoor", "户外", "camping", "hiking", "fishing", "travel accessories"), ), "home_appliances": _make_taxonomy_profile( "home appliances", HOME_APPLIANCES_TAXONOMY_FIELDS, - aliases=("home appliances", "家电", "电器", "kitchen appliances", "cleaning appliances", "smart home devices"), ), "home_living": _make_taxonomy_profile( "home and living", HOME_LIVING_TAXONOMY_FIELDS, - aliases=("home", "living", "家居", "家具", "家纺", "home decor", "kitchenware"), ), "wigs": _make_taxonomy_profile( "wigs", WIGS_TAXONOMY_FIELDS, - aliases=("wig", "wigs", "假发", "hairpiece"), ), "beauty": _make_taxonomy_profile( "beauty and cosmetics", BEAUTY_TAXONOMY_FIELDS, - aliases=("beauty", "cosmetics", "美容", "美妆", "makeup", "skincare", "nail care"), ), "accessories": _make_taxonomy_profile( "accessories", ACCESSORIES_TAXONOMY_FIELDS, - aliases=("accessories", "配饰", "jewelry", "watches", "belts", "scarves", "hats", "sunglasses"), ), "toys": _make_taxonomy_profile( "toys", TOYS_TAXONOMY_FIELDS, - aliases=("toys", "toy", "玩具", "plush", "action figures", "puzzles", "educational toys"), ), "shoes": _make_taxonomy_profile( "shoes", SHOES_TAXONOMY_FIELDS, - aliases=("shoes", "shoe", "鞋", "sneakers", "boots", "sandals", "heels"), ), "sports": _make_taxonomy_profile( "sports products", SPORTS_TAXONOMY_FIELDS, - aliases=("sports", "sport", "运动", "fitness", "cycling", "team sports", "water sports"), ), "others": _make_taxonomy_profile( "general merchandise", OTHERS_TAXONOMY_FIELDS, - aliases=("others", "other", "其他", "general merchandise"), ), } -CATEGORY_TAXONOMY_PROFILE_NAMES = tuple(CATEGORY_TAXONOMY_PROFILES.keys()) TAXONOMY_SHARED_ANALYSIS_INSTRUCTION = CATEGORY_TAXONOMY_PROFILES["apparel"]["shared_instruction"] TAXONOMY_MARKDOWN_TABLE_HEADERS_EN = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"]["en"] TAXONOMY_LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = CATEGORY_TAXONOMY_PROFILES["apparel"]["markdown_table_headers"] diff --git a/indexer/product_enrich模块说明.md b/indexer/product_enrich模块说明.md new file mode 100644 index 0000000..1428746 --- /dev/null +++ b/indexer/product_enrich模块说明.md @@ -0,0 +1,173 @@ +# 内容富化模块说明 + +本文说明商品内容富化模块的职责、入口、输出结构,以及当前 taxonomy profile 的设计约束。 + +## 1. 模块目标 + +内容富化模块负责基于商品文本调用 LLM,生成以下索引字段: + +- `qanchors` +- `enriched_tags` +- `enriched_attributes` +- `enriched_taxonomy_attributes` + +模块追求的设计原则: + +- 单一职责:只负责内容理解与结构化输出,不负责 CSV 读写 +- 输出对齐 ES mapping:返回结构可直接写入 `search_products` +- 配置化扩展:taxonomy profile 通过数据配置扩展,而不是散落条件分支 +- 代码精简:只面向正常使用方式,避免为了不合理调用堆叠补丁逻辑 + +## 2. 主要文件 + +- [product_enrich.py](/data/saas-search/indexer/product_enrich.py) + 运行时主逻辑,负责批处理、缓存、prompt 组装、LLM 调用、markdown 解析、输出整理 +- [product_enrich_prompts.py](/data/saas-search/indexer/product_enrich_prompts.py) + prompt 模板与 taxonomy profile 配置 +- [document_transformer.py](/data/saas-search/indexer/document_transformer.py) + 在内部索引构建链路中调用内容富化模块,把结果回填到 ES doc +- [taxonomy.md](/data/saas-search/indexer/taxonomy.md) + taxonomy 设计说明与字段清单 + +## 3. 对外入口 + +### 3.1 Python 入口 + +核心入口: + +```python +build_index_content_fields( + items, + tenant_id=None, + enrichment_scopes=None, + category_taxonomy_profile=None, +) +``` + +输入最小要求: + +- `id` 或 `spu_id` +- `title` + +可选输入: + +- `brief` +- `description` +- `image_url` + +关键参数: + +- `enrichment_scopes` + 可选 `generic`、`category_taxonomy` +- `category_taxonomy_profile` + taxonomy profile;默认 `apparel` + +### 3.2 HTTP 入口 + +API 路由: + +- `POST /indexer/enrich-content` + +对应文档: + +- [搜索API对接指南-05-索引接口(Indexer)](/data/saas-search/docs/搜索API对接指南-05-索引接口(Indexer).md) +- [搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation)](/data/saas-search/docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md) + +## 4. 输出结构 + +返回结果与 ES mapping 对齐: + +```json +{ + "id": "223167", + "qanchors": { + "zh": ["短袖T恤", "纯棉"], + "en": ["t-shirt", "cotton"] + }, + "enriched_tags": { + "zh": ["短袖", "纯棉"], + "en": ["short sleeve", "cotton"] + }, + "enriched_attributes": [ + { + "name": "enriched_tags", + "value": { + "zh": ["短袖", "纯棉"], + "en": ["short sleeve", "cotton"] + } + } + ], + "enriched_taxonomy_attributes": [ + { + "name": "Product Type", + "value": { + "zh": ["T恤"], + "en": ["t-shirt"] + } + } + ] +} +``` + +说明: + +- `generic` 部分固定输出核心索引语言 `zh`、`en` +- `taxonomy` 部分同样统一输出 `zh`、`en` + +## 5. Taxonomy profile + +当前支持: + +- `apparel` +- `3c` +- `bags` +- `pet_supplies` +- `electronics` +- `outdoor` +- `home_appliances` +- `home_living` +- `wigs` +- `beauty` +- `accessories` +- `toys` +- `shoes` +- `sports` +- `others` + +统一约束: + +- 所有 profile 都返回 `zh` + `en` +- profile 只决定 taxonomy 字段集合,不再决定输出语言 +- 所有 profile 都配置中英文字段名,prompt/header 结构保持一致 + +## 6. 内部索引链路的当前约束 + +在内部 ES 文档构建链路里,`document_transformer` 当前调用内容富化时,taxonomy profile 暂时固定传: + +```python +category_taxonomy_profile="apparel" +``` + +这是一种显式、可控、代码更干净的临时策略。 + +当前代码里已保留 TODO: + +- 后续从数据库读取租户真实所属行业 +- 再用该行业替换固定的 `apparel` + +当前不做“根据商品类目文本自动猜 profile”的隐式逻辑,避免增加冗余代码与不必要的不确定性。 + +## 7. 缓存与批处理 + +缓存键由以下信息共同决定: + +- `analysis_kind` +- `target_lang` +- prompt/schema 版本指纹 +- prompt 实际输入文本 + +批处理规则: + +- 单次 LLM 调用最多 20 条 +- 上层允许传更大批次,模块内部自动拆批 +- uncached batch 可并发执行 diff --git a/indexer/taxonomy.md b/indexer/taxonomy.md index 7dfd351..fa78a77 100644 --- a/indexer/taxonomy.md +++ b/indexer/taxonomy.md @@ -175,8 +175,7 @@ Input product list: ## 2. Other taxonomy profiles 说明: -- `apparel` 继续返回 `zh` + `en`。 -- 其他 profile 只返回 `en`,并且只定义英文列名。 +- 所有 profile 统一返回 `zh` + `en`。 - 代码中的 profile slug 与下面保持一致。 | Profile | Core columns (`en`) | diff --git a/tests/test_llm_enrichment_batch_fill.py b/tests/test_llm_enrichment_batch_fill.py index eee5b20..5d882c0 100644 --- a/tests/test_llm_enrichment_batch_fill.py +++ b/tests/test_llm_enrichment_batch_fill.py @@ -10,8 +10,14 @@ from indexer.document_transformer import SPUDocumentTransformer def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): seen_calls: List[Dict[str, Any]] = [] - def _fake_build_index_content_fields(items, tenant_id=None): - seen_calls.append({"n": len(items), "tenant_id": tenant_id}) + def _fake_build_index_content_fields(items, tenant_id=None, category_taxonomy_profile=None): + seen_calls.append( + { + "n": len(items), + "tenant_id": tenant_id, + "category_taxonomy_profile": category_taxonomy_profile, + } + ) return [ { "id": item["id"], @@ -53,7 +59,7 @@ def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch): transformer.fill_llm_attributes_batch(docs, rows) - assert seen_calls == [{"n": 45, "tenant_id": "162"}] + assert seen_calls == [{"n": 45, "tenant_id": "162", "category_taxonomy_profile": "apparel"}] assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"] assert docs[0]["qanchors"]["en"] == ["en-anchor-0"] diff --git a/tests/test_product_enrich_partial_mode.py b/tests/test_product_enrich_partial_mode.py index 7405f90..156145b 100644 --- a/tests/test_product_enrich_partial_mode.py +++ b/tests/test_product_enrich_partial_mode.py @@ -559,15 +559,7 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output() ], } ] - - -def test_detect_category_taxonomy_profile_matches_category_hints(): - assert product_enrich.detect_category_taxonomy_profile({"category1_name": "玩具"}) == "toys" - assert product_enrich.detect_category_taxonomy_profile({"category": "Beauty & Cosmetics"}) == "beauty" - assert product_enrich.detect_category_taxonomy_profile({"category_path": "Home Appliances / Kitchen"}) == "home_appliances" - - -def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_apparel_returns_en_only(): +def test_build_index_content_fields_non_apparel_taxonomy_returns_en_only(): seen_calls = [] def fake_analyze_products( @@ -580,40 +572,6 @@ def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_appa ): seen_calls.append((analysis_kind, target_lang, category_taxonomy_profile, tuple(p["id"] for p in products))) if analysis_kind == "taxonomy": - if category_taxonomy_profile == "apparel": - return [ - { - "id": products[0]["id"], - "lang": target_lang, - "title_input": products[0]["title"], - "product_type": f"{target_lang}-dress", - "target_gender": f"{target_lang}-women", - "age_group": "", - "season": "", - "fit": "", - "silhouette": "", - "neckline": "", - "sleeve_length_type": "", - "sleeve_style": "", - "strap_type": "", - "rise_waistline": "", - "leg_shape": "", - "skirt_shape": "", - "length_type": "", - "closure_type": "", - "design_details": "", - "fabric": "", - "material_composition": "", - "fabric_properties": "", - "clothing_features": "", - "functional_benefits": "", - "color": "", - "color_family": "", - "print_pattern": "", - "occasion_end_use": "", - "style_aesthetic": "", - } - ] assert category_taxonomy_profile == "toys" assert target_lang == "en" return [ @@ -655,21 +613,30 @@ def test_build_index_content_fields_routes_taxonomy_by_item_profile_and_non_appa with mock.patch.object(product_enrich, "analyze_products", side_effect=fake_analyze_products): result = product_enrich.build_index_content_fields( - items=[ - {"spu_id": "1", "title": "dress", "category_taxonomy_profile": "apparel"}, - {"spu_id": "2", "title": "toy", "category_taxonomy_profile": "toys"}, - ], + items=[{"spu_id": "2", "title": "toy"}], tenant_id="170", - category_taxonomy_profile="apparel", + category_taxonomy_profile="toys", ) - assert result[0]["enriched_taxonomy_attributes"] == [ - {"name": "Product Type", "value": {"zh": ["zh-dress"], "en": ["en-dress"]}}, - {"name": "Target Gender", "value": {"zh": ["zh-women"], "en": ["en-women"]}}, - ] - assert result[1]["enriched_taxonomy_attributes"] == [ - {"name": "Product Type", "value": {"en": ["doll set"]}}, - {"name": "Age Group", "value": {"en": ["kids"]}}, + assert result == [ + { + "id": "2", + "qanchors": {"zh": ["zh-anchor"], "en": ["en-anchor"]}, + "enriched_tags": {"zh": ["zh-tag"], "en": ["en-tag"]}, + "enriched_attributes": [ + { + "name": "enriched_tags", + "value": { + "zh": ["zh-tag"], + "en": ["en-tag"], + }, + } + ], + "enriched_taxonomy_attributes": [ + {"name": "Product Type", "value": {"en": ["doll set"]}}, + {"name": "Age Group", "value": {"en": ["kids"]}}, + ], + } ] assert ("taxonomy", "zh", "toys", ("2",)) not in seen_calls assert ("taxonomy", "en", "toys", ("2",)) in seen_calls -- libgit2 0.21.2