Commit 2703b6ea8b5d2ecdc442f11ad26279cc2dce41e6

Authored by tangwang
1 parent 5aaf0c7d

refactor(indexer): 将 analysis_kinds 拆分为 enrichment_scopes +

category_taxonomy_profile

- 原 analysis_kinds
  混用了“增强类型”(content/taxonomy)与“品类特定配置”,不利于扩展不同品类的
taxonomy 分析(如 3C、家居等)
- 新增 enrichment_scopes 参数:支持 generic(通用增强,产出
  qanchors/enriched_tags/enriched_attributes)和
category_taxonomy(品类增强,产出 enriched_taxonomy_attributes)
- 新增 category_taxonomy_profile 参数:指定品类增强使用哪套
  profile(当前内置 apparel),每套 profile 包含独立的
prompt、输出列定义、解析规则及缓存版本
- 保留 analysis_kinds 作为兼容别名,避免破坏现有调用方
- 重构内部 taxonomy 分析为 profile registry 模式:新增
  _get_taxonomy_schema(profile_name) 函数,根据 profile 动态返回对应的
AnalysisSchema
- 缓存 key 现在按“分析类型 + profile + schema 指纹 +
  输入字段哈希”隔离,确保不同品类、不同 prompt 版本自动失效
- 更新 API 文档及微服务接口文档,明确新参数语义与使用示例

技术细节:
- 修改入口:api/routes/indexer.py 中 enrich-content
  端点,解析新参数并向下传递
- 核心逻辑:indexer/product_enrich.py 中 enrich_products_batch 增加
  profile 参数;_process_batch_for_schema 根据 scope 和 profile 动态获取
schema
- 兼容层:若请求同时提供 analysis_kinds,则映射为
  enrichment_scopes(content→generic,taxonomy→category_taxonomy),category_taxonomy_profile
默认为 "apparel"
- 测试覆盖:新增 enrichment_scopes 组合、profile 切换及兼容模式测试
api/routes/indexer.py
... ... @@ -88,20 +88,41 @@ class EnrichContentItem(BaseModel):
88 88  
89 89 class EnrichContentRequest(BaseModel):
90 90 """
91   - 内容理解字段生成请求:根据商品标题批量生成 qanchors、enriched_attributes、tags、taxonomy attributes
  91 + 内容理解字段生成请求:根据商品标题批量生成通用增强字段与品类 taxonomy 字段
92 92 供外部 indexer 在自行组织 doc 时调用,与翻译、向量化等微服务并列。
93 93 """
94 94 tenant_id: str = Field(..., description="租户 ID,用于请求路由与结果归属,不参与缓存键")
95 95 items: List[EnrichContentItem] = Field(..., description="待分析的 SPU 列表(spu_id + title,可附带 brief/description/image_url)")
96   - analysis_kinds: List[Literal["content", "taxonomy"]] = Field(
97   - default_factory=lambda: ["content", "taxonomy"],
  96 + enrichment_scopes: Optional[List[Literal["generic", "category_taxonomy"]]] = Field(
  97 + default=None,
98 98 description=(
99   - "要执行的分析族。"
100   - "`content` 返回 qanchors/enriched_tags/enriched_attributes;"
101   - "`taxonomy` 返回 enriched_taxonomy_attributes。"
  99 + "要执行的增强范围。"
  100 + "`generic` 返回 qanchors/enriched_tags/enriched_attributes;"
  101 + "`category_taxonomy` 返回 enriched_taxonomy_attributes。"
102 102 "默认两者都执行。"
103 103 ),
104 104 )
  105 + category_taxonomy_profile: str = Field(
  106 + "apparel",
  107 + description=(
  108 + "品类 taxonomy profile。当前默认且已支持的是 `apparel`。"
  109 + "未来可扩展为 `electronics` 等。"
  110 + ),
  111 + )
  112 + analysis_kinds: Optional[List[Literal["content", "taxonomy"]]] = Field(
  113 + default=None,
  114 + description="Deprecated alias of enrichment_scopes. `content` -> `generic`, `taxonomy` -> `category_taxonomy`.",
  115 + )
  116 +
  117 + def resolved_enrichment_scopes(self) -> List[str]:
  118 + if self.enrichment_scopes:
  119 + return list(self.enrichment_scopes)
  120 + if self.analysis_kinds:
  121 + mapped = []
  122 + for item in self.analysis_kinds:
  123 + mapped.append("generic" if item == "content" else "category_taxonomy")
  124 + return mapped
  125 + return ["generic", "category_taxonomy"]
105 126  
106 127  
107 128 @router.post("/reindex")
... ... @@ -452,7 +473,8 @@ async def build_docs_from_db(request: BuildDocsFromDbRequest):
452 473 def _run_enrich_content(
453 474 tenant_id: str,
454 475 items: List[Dict[str, str]],
455   - analysis_kinds: Optional[List[str]] = None,
  476 + enrichment_scopes: Optional[List[str]] = None,
  477 + category_taxonomy_profile: str = "apparel",
456 478 ) -> List[Dict[str, Any]]:
457 479 """
458 480 同步执行内容理解,返回与 ES mapping 对齐的字段结构。
... ... @@ -463,7 +485,8 @@ def _run_enrich_content(
463 485 results = build_index_content_fields(
464 486 items=items,
465 487 tenant_id=tenant_id,
466   - analysis_kinds=analysis_kinds,
  488 + enrichment_scopes=enrichment_scopes,
  489 + category_taxonomy_profile=category_taxonomy_profile,
467 490 )
468 491 return [
469 492 {
... ... @@ -481,7 +504,7 @@ def _run_enrich_content(
481 504 @router.post("/enrich-content")
482 505 async def enrich_content(request: EnrichContentRequest):
483 506 """
484   - 内容理解字段生成接口:根据商品标题批量生成 qanchors、enriched_attributes、tags、taxonomy attributes
  507 + 内容理解字段生成接口:根据商品标题批量生成通用增强字段与品类 taxonomy 字段
485 508  
486 509 使用场景:
487 510 - 外部 indexer 采用「微服务组合」方式自己组织 doc 时,可调用本接口获取 LLM 生成的
... ... @@ -511,17 +534,20 @@ async def enrich_content(request: EnrichContentRequest):
511 534 for it in request.items
512 535 ]
513 536 loop = asyncio.get_event_loop()
  537 + enrichment_scopes = request.resolved_enrichment_scopes()
514 538 result = await loop.run_in_executor(
515 539 None,
516 540 lambda: _run_enrich_content(
517 541 tenant_id=request.tenant_id,
518 542 items=items_payload,
519   - analysis_kinds=request.analysis_kinds,
  543 + enrichment_scopes=enrichment_scopes,
  544 + category_taxonomy_profile=request.category_taxonomy_profile,
520 545 ),
521 546 )
522 547 return {
523 548 "tenant_id": request.tenant_id,
524   - "analysis_kinds": request.analysis_kinds,
  549 + "enrichment_scopes": enrichment_scopes,
  550 + "category_taxonomy_profile": request.category_taxonomy_profile,
525 551 "results": result,
526 552 "total": len(result),
527 553 }
... ...
docs/issues/issue-2026-04-08-eval框架主指标ERR的问题以及bm25调参-done-0408.md
... ... @@ -92,3 +92,29 @@ curl -s -u "$AUTH" -X GET "$ES/${INDEX}/_settings?filter_path=**.similarity&pret
92 92 ```bash
93 93 curl -s -u "$AUTH" -X GET "$ES/_cat/indices/search_products_tenant_*?h=index&v"
94 94 ```
  95 +
  96 +
  97 +
  98 +实验结果:
  99 +已完成四轮实验,并把 `search_products_tenant_163` 当前索引参数留在了最优组:`b=0.5, k1=1.0`。刚刚已校验生效。
  100 +
  101 +对比结果如下:
  102 +
  103 +- 基线当前线上组 `b=0.10, k1=0.30`:`0.604555`
  104 + 报告:[batch_20260408T055948Z_00b6a8aa3d.md](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T055948Z_00b6a8aa3d.md)
  105 +- 复跑 `b=0.10, k1=0.30`:`0.604298`
  106 + 报告:[batch_20260408T064011Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064011Z_00b6a8aa3d.json)
  107 +- 实验 `b=0.20, k1=0.60`:`0.604274`
  108 + 报告:[batch_20260408T064123Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064123Z_00b6a8aa3d.json)
  109 +- 实验 `b=0.50, k1=1.0`:`0.606041`
  110 + 报告:[batch_20260408T064216Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064216Z_00b6a8aa3d.json)
  111 +- 实验 `b=0.10, k1=0.75`:`0.603438`
  112 + 报告:[batch_20260408T064304Z_00b6a8aa3d.json](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260408T064304Z_00b6a8aa3d.json)
  113 +- 老基线 `b=0.0, k1=0.0`:`0.602598`
  114 + 报告:[batch_20260407T150946Z_00b6a8aa3d.md](/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260407T150946Z_00b6a8aa3d.md)
  115 +
  116 +结论是这四组里 `b=0.50, k1=1.0` 最好,相比你给的当前基线 `0.604555` 提升了 `+0.001486`,相比 `0/0` 提升了 `+0.003443`。`0.20/0.60` 基本和 `0.10/0.30` 持平,`0.10/0.75` 反而更差,说明这轮里单独加大 `k1` 不如同时把 `b` 和 `k1` 一起放宽。
  117 +
  118 +补充一点:我只改了现有索引 `search_products_tenant_163` 的 index-level setting,没有改仓库里的默认映射。[search_products.json](/data/saas-search/mappings/search_products.json) 现在仍是 `0.1/0.3`。如果你要,我下一步可以直接帮你把映射默认值也改成 `0.5/1.0`,并顺手批量同步其他 `search_products_tenant_*` 索引。
  119 +
  120 +
... ...
docs/搜索API对接指南-05-索引接口(Indexer).md
... ... @@ -655,7 +655,8 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \
655 655 ```json
656 656 {
657 657 "tenant_id": "170",
658   - "analysis_kinds": ["content", "taxonomy"],
  658 + "enrichment_scopes": ["generic", "category_taxonomy"],
  659 + "category_taxonomy_profile": "apparel",
659 660 "items": [
660 661 {
661 662 "spu_id": "223167",
... ... @@ -676,7 +677,8 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \
676 677 | 参数 | 类型 | 必填 | 默认值 | 说明 |
677 678 |------|------|------|--------|------|
678 679 | `tenant_id` | string | Y | - | 租户 ID。目前仅用于记录日志,不产生实际作用|
679   -| `analysis_kinds` | array[string] | N | `["content", "taxonomy"]` | 选择要执行的分析族。`content` 生成 `qanchors`/`enriched_tags`/`enriched_attributes`,`taxonomy` 生成 `enriched_taxonomy_attributes` |
  680 +| `enrichment_scopes` | array[string] | N | `["generic", "category_taxonomy"]` | 选择要执行的增强范围。`generic` 生成 `qanchors`/`enriched_tags`/`enriched_attributes`,`category_taxonomy` 生成 `enriched_taxonomy_attributes` |
  681 +| `category_taxonomy_profile` | string | N | `apparel` | 品类 taxonomy profile。当前内置为服装大类 `apparel`,后续可扩展到其他大类 |
680 682 | `items` | array | Y | - | 待分析列表;**单次最多 50 条** |
681 683  
682 684 `items[]` 字段说明:
... ... @@ -691,8 +693,8 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \
691 693  
692 694 缓存说明:
693 695  
694   -- 内容缓存按 **分析族拆分**,即 `content` 与 `taxonomy` 使用不同的缓存命名空间,互不污染、可独立演进。
695   -- 缓存键由 `analysis_kind + target_lang + prompt/schema 版本指纹 + prompt 输入文本 hash` 构成。
  696 +- 内容缓存按 **增强范围 + taxonomy profile** 拆分;`generic` 与 `category_taxonomy:apparel` 等使用不同缓存命名空间,互不污染、可独立演进。
  697 +- 缓存键由 `analysis_kind + target_lang + prompt/schema 版本指纹 + prompt 输入文本 hash` 构成;对 category taxonomy 来说,profile 会进入 schema 标识与版本指纹。
696 698 - 当前真正参与 prompt 输入的字段是:`title`、`brief`、`description`;这些字段任一变化,都会落到新的缓存 key。
697 699 - `prompt/schema 版本指纹` 会综合 system prompt、shared instruction、localized table headers、result fields、user instruction template 等信息生成;因此只要提示词或输出契约变化,旧缓存会自然失效。
698 700 - `tenant_id`、`spu_id` 只用于请求归属与结果回填,不参与缓存键。
... ... @@ -714,7 +716,8 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \
714 716 ```json
715 717 {
716 718 "tenant_id": "170",
717   - "analysis_kinds": ["content", "taxonomy"],
  719 + "enrichment_scopes": ["generic", "category_taxonomy"],
  720 + "category_taxonomy_profile": "apparel",
718 721 "total": 2,
719 722 "results": [
720 723 {
... ... @@ -755,7 +758,8 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \
755 758  
756 759 | 字段 | 类型 | 说明 |
757 760 |------|------|------|
758   -| `analysis_kinds` | array | 实际执行的分析族列表 |
  761 +| `enrichment_scopes` | array | 实际执行的增强范围列表 |
  762 +| `category_taxonomy_profile` | string | 实际使用的品类 taxonomy profile |
759 763 | `results` | array | 与请求 `items` 一一对应,每项含 `spu_id`、`qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes` |
760 764 | `results[].qanchors` | object | 与 ES `qanchors` 字段同结构,按语言键返回短语数组 |
761 765 | `results[].enriched_tags` | object | 与 ES `enriched_tags` 字段同结构,按语言键返回标签数组 |
... ... @@ -774,7 +778,8 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \
774 778 -H "Content-Type: application/json" \
775 779 -d '{
776 780 "tenant_id": "163",
777   - "analysis_kinds": ["content", "taxonomy"],
  781 + "enrichment_scopes": ["generic", "category_taxonomy"],
  782 + "category_taxonomy_profile": "apparel",
778 783 "items": [
779 784 {
780 785 "spu_id": "223167",
... ...
docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md
... ... @@ -444,7 +444,7 @@ curl "http://localhost:6006/health"
444 444  
445 445 - **Base URL**: Indexer 服务地址,如 `http://localhost:6004`
446 446 - **路径**: `POST /indexer/enrich-content`
447   -- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`,用于拼装 ES 文档。支持通过 `analysis_kinds` 选择执行 `content` / `taxonomy`;默认两者都执行。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。
  447 +- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`enriched_tags`、`enriched_taxonomy_attributes`,用于拼装 ES 文档。支持通过 `enrichment_scopes` 选择执行 `generic` / `category_taxonomy`,并通过 `category_taxonomy_profile` 选择对应大类的 taxonomy prompt/profile;默认执行 `generic + category_taxonomy(apparel)`。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。
448 448  
449 449 请求/响应格式、示例及错误码见 [-05-索引接口(Indexer)](./搜索API对接指南-05-索引接口(Indexer).md#58-内容理解字段生成接口)。
450 450  
... ...
indexer/product_enrich.py
... ... @@ -151,7 +151,8 @@ if _missing_prompt_langs:
151 151 # 多值字段分隔:英文逗号、中文逗号、顿号,及历史约定的 ; | / 与空白
152 152 _MULTI_VALUE_FIELD_SPLIT_RE = re.compile(r"[,、,;|/\n\t]+")
153 153 _CORE_INDEX_LANGUAGES = ("zh", "en")
154   -_DEFAULT_ANALYSIS_KINDS = ("content", "taxonomy")
  154 +_DEFAULT_ENRICHMENT_SCOPES = ("generic", "category_taxonomy")
  155 +_DEFAULT_CATEGORY_TAXONOMY_PROFILE = "apparel"
155 156 _CONTENT_ANALYSIS_ATTRIBUTE_FIELD_MAP = (
156 157 ("tags", "enriched_tags"),
157 158 ("target_audience", "target_audience"),
... ... @@ -187,7 +188,7 @@ _CONTENT_ANALYSIS_FIELD_ALIASES = {
187 188 "tags": ("tags", "enriched_tags"),
188 189 }
189 190 _CONTENT_ANALYSIS_QUALITY_FIELDS = ("title", "category_path", "anchor_text")
190   -_TAXONOMY_ANALYSIS_ATTRIBUTE_FIELD_MAP = (
  191 +_APPAREL_TAXONOMY_ATTRIBUTE_FIELD_MAP = (
191 192 ("product_type", "Product Type"),
192 193 ("target_gender", "Target Gender"),
193 194 ("age_group", "Age Group"),
... ... @@ -215,8 +216,8 @@ _TAXONOMY_ANALYSIS_ATTRIBUTE_FIELD_MAP = (
215 216 ("occasion_end_use", "Occasion / End Use"),
216 217 ("style_aesthetic", "Style Aesthetic"),
217 218 )
218   -_TAXONOMY_ANALYSIS_RESULT_FIELDS = tuple(
219   - field_name for field_name, _ in _TAXONOMY_ANALYSIS_ATTRIBUTE_FIELD_MAP
  219 +_APPAREL_TAXONOMY_ANALYSIS_RESULT_FIELDS = tuple(
  220 + field_name for field_name, _ in _APPAREL_TAXONOMY_ATTRIBUTE_FIELD_MAP
220 221 )
221 222  
222 223  
... ... @@ -252,37 +253,66 @@ _ANALYSIS_SCHEMAS: Dict[str, AnalysisSchema] = {
252 253 field_aliases=_CONTENT_ANALYSIS_FIELD_ALIASES,
253 254 quality_fields=_CONTENT_ANALYSIS_QUALITY_FIELDS,
254 255 ),
255   - "taxonomy": AnalysisSchema(
256   - name="taxonomy",
  256 +}
  257 +
  258 +_CATEGORY_TAXONOMY_PROFILE_SCHEMAS: Dict[str, AnalysisSchema] = {
  259 + "apparel": AnalysisSchema(
  260 + name="taxonomy:apparel",
257 261 shared_instruction=TAXONOMY_SHARED_ANALYSIS_INSTRUCTION,
258 262 markdown_table_headers=TAXONOMY_LANGUAGE_MARKDOWN_TABLE_HEADERS,
259   - result_fields=_TAXONOMY_ANALYSIS_RESULT_FIELDS,
260   - meaningful_fields=_TAXONOMY_ANALYSIS_RESULT_FIELDS,
  263 + result_fields=_APPAREL_TAXONOMY_ANALYSIS_RESULT_FIELDS,
  264 + meaningful_fields=_APPAREL_TAXONOMY_ANALYSIS_RESULT_FIELDS,
261 265 cache_version="v1",
262 266 fallback_headers=TAXONOMY_MARKDOWN_TABLE_HEADERS_EN,
263 267 ),
264 268 }
265 269  
  270 +_CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS: Dict[str, Tuple[Tuple[str, str], ...]] = {
  271 + "apparel": _APPAREL_TAXONOMY_ATTRIBUTE_FIELD_MAP,
  272 +}
  273 +
266 274  
267   -def _get_analysis_schema(analysis_kind: str) -> AnalysisSchema:
268   - schema = _ANALYSIS_SCHEMAS.get(analysis_kind)
269   - if schema is None:
270   - raise ValueError(f"Unsupported analysis_kind: {analysis_kind}")
271   - return schema
  275 +def _normalize_category_taxonomy_profile(category_taxonomy_profile: Optional[str] = None) -> str:
  276 + profile = str(category_taxonomy_profile or _DEFAULT_CATEGORY_TAXONOMY_PROFILE).strip()
  277 + if profile not in _CATEGORY_TAXONOMY_PROFILE_SCHEMAS:
  278 + raise ValueError(f"Unsupported category_taxonomy_profile: {profile}")
  279 + return profile
272 280  
273 281  
274   -def _normalize_analysis_kinds(
275   - analysis_kinds: Optional[List[str]] = None,
  282 +def _get_analysis_schema(
  283 + analysis_kind: str,
  284 + *,
  285 + category_taxonomy_profile: Optional[str] = None,
  286 +) -> AnalysisSchema:
  287 + if analysis_kind == "content":
  288 + return _ANALYSIS_SCHEMAS["content"]
  289 + if analysis_kind == "taxonomy":
  290 + profile = _normalize_category_taxonomy_profile(category_taxonomy_profile)
  291 + return _CATEGORY_TAXONOMY_PROFILE_SCHEMAS[profile]
  292 + raise ValueError(f"Unsupported analysis_kind: {analysis_kind}")
  293 +
  294 +
  295 +def _get_taxonomy_attribute_field_map(
  296 + category_taxonomy_profile: Optional[str] = None,
  297 +) -> Tuple[Tuple[str, str], ...]:
  298 + profile = _normalize_category_taxonomy_profile(category_taxonomy_profile)
  299 + return _CATEGORY_TAXONOMY_PROFILE_ATTRIBUTE_FIELD_MAPS[profile]
  300 +
  301 +
  302 +def _normalize_enrichment_scopes(
  303 + enrichment_scopes: Optional[List[str]] = None,
276 304 ) -> Tuple[str, ...]:
277   - requested = _DEFAULT_ANALYSIS_KINDS if not analysis_kinds else tuple(analysis_kinds)
  305 + requested = _DEFAULT_ENRICHMENT_SCOPES if not enrichment_scopes else tuple(enrichment_scopes)
278 306 normalized: List[str] = []
279 307 seen = set()
280   - for analysis_kind in requested:
281   - schema = _get_analysis_schema(str(analysis_kind).strip())
282   - if schema.name in seen:
  308 + for enrichment_scope in requested:
  309 + scope = str(enrichment_scope).strip()
  310 + if scope not in {"generic", "category_taxonomy"}:
  311 + raise ValueError(f"Unsupported enrichment_scope: {scope}")
  312 + if scope in seen:
283 313 continue
284   - seen.add(schema.name)
285   - normalized.append(schema.name)
  314 + seen.add(scope)
  315 + normalized.append(scope)
286 316 return tuple(normalized)
287 317  
288 318  
... ... @@ -448,7 +478,13 @@ def _apply_index_content_row(result: Dict[str, Any], row: Dict[str, Any], lang:
448 478 _append_lang_phrase_map(result["enriched_tags"], lang=lang, raw_value=raw)
449 479  
450 480  
451   -def _apply_index_taxonomy_row(result: Dict[str, Any], row: Dict[str, Any], lang: str) -> None:
  481 +def _apply_index_taxonomy_row(
  482 + result: Dict[str, Any],
  483 + row: Dict[str, Any],
  484 + lang: str,
  485 + *,
  486 + category_taxonomy_profile: Optional[str] = None,
  487 +) -> None:
452 488 if not row or row.get("error"):
453 489 return
454 490  
... ... @@ -456,8 +492,11 @@ def _apply_index_taxonomy_row(result: Dict[str, Any], row: Dict[str, Any], lang:
456 492 result["enriched_taxonomy_attributes"],
457 493 row=row,
458 494 lang=lang,
459   - schema=_get_analysis_schema("taxonomy"),
460   - field_map=_TAXONOMY_ANALYSIS_ATTRIBUTE_FIELD_MAP,
  495 + schema=_get_analysis_schema(
  496 + "taxonomy",
  497 + category_taxonomy_profile=category_taxonomy_profile,
  498 + ),
  499 + field_map=_get_taxonomy_attribute_field_map(category_taxonomy_profile),
461 500 )
462 501  
463 502  
... ... @@ -475,7 +514,8 @@ def _normalize_index_content_item(item: Dict[str, Any]) -> Dict[str, str]:
475 514 def build_index_content_fields(
476 515 items: List[Dict[str, Any]],
477 516 tenant_id: Optional[str] = None,
478   - analysis_kinds: Optional[List[str]] = None,
  517 + enrichment_scopes: Optional[List[str]] = None,
  518 + category_taxonomy_profile: Optional[str] = None,
479 519 ) -> List[Dict[str, Any]]:
480 520 """
481 521 高层入口:生成与 ES mapping 对齐的内容理解字段。
... ... @@ -484,7 +524,8 @@ def build_index_content_fields(
484 524 - `id` 或 `spu_id`
485 525 - `title`
486 526 - 可选 `brief` / `description` / `image_url`
487   - - 可选 `analysis_kinds`,默认同时执行 `content` 与 `taxonomy`
  527 + - 可选 `enrichment_scopes`,默认同时执行 `generic` 与 `category_taxonomy`
  528 + - 可选 `category_taxonomy_profile`,默认 `apparel`
488 529  
489 530 返回项结构:
490 531 - `id`
... ... @@ -498,7 +539,8 @@ def build_index_content_fields(
498 539 - `qanchors.{lang}` 为短语数组
499 540 - `enriched_tags.{lang}` 为标签数组
500 541 """
501   - requested_analysis_kinds = _normalize_analysis_kinds(analysis_kinds)
  542 + requested_enrichment_scopes = _normalize_enrichment_scopes(enrichment_scopes)
  543 + normalized_taxonomy_profile = _normalize_category_taxonomy_profile(category_taxonomy_profile)
502 544 normalized_items = [_normalize_index_content_item(item) for item in items]
503 545 if not normalized_items:
504 546 return []
... ... @@ -515,7 +557,7 @@ def build_index_content_fields(
515 557 }
516 558  
517 559 for lang in _CORE_INDEX_LANGUAGES:
518   - if "content" in requested_analysis_kinds:
  560 + if "generic" in requested_enrichment_scopes:
519 561 try:
520 562 rows = analyze_products(
521 563 products=normalized_items,
... ... @@ -523,6 +565,7 @@ def build_index_content_fields(
523 565 batch_size=BATCH_SIZE,
524 566 tenant_id=tenant_id,
525 567 analysis_kind="content",
  568 + category_taxonomy_profile=normalized_taxonomy_profile,
526 569 )
527 570 except Exception as e:
528 571 logger.warning("build_index_content_fields content enrichment failed for lang=%s: %s", lang, e)
... ... @@ -539,7 +582,7 @@ def build_index_content_fields(
539 582 continue
540 583 _apply_index_content_row(results_by_id[item_id], row=row, lang=lang)
541 584  
542   - if "taxonomy" in requested_analysis_kinds:
  585 + if "category_taxonomy" in requested_enrichment_scopes:
543 586 try:
544 587 taxonomy_rows = analyze_products(
545 588 products=normalized_items,
... ... @@ -547,6 +590,7 @@ def build_index_content_fields(
547 590 batch_size=BATCH_SIZE,
548 591 tenant_id=tenant_id,
549 592 analysis_kind="taxonomy",
  593 + category_taxonomy_profile=normalized_taxonomy_profile,
550 594 )
551 595 except Exception as e:
552 596 logger.warning(
... ... @@ -565,7 +609,12 @@ def build_index_content_fields(
565 609 if row.get("error"):
566 610 results_by_id[item_id].setdefault("error", row["error"])
567 611 continue
568   - _apply_index_taxonomy_row(results_by_id[item_id], row=row, lang=lang)
  612 + _apply_index_taxonomy_row(
  613 + results_by_id[item_id],
  614 + row=row,
  615 + lang=lang,
  616 + category_taxonomy_profile=normalized_taxonomy_profile,
  617 + )
569 618  
570 619 return [results_by_id[item["id"]] for item in normalized_items]
571 620  
... ... @@ -636,9 +685,13 @@ def _make_analysis_cache_key(
636 685 product: Dict[str, Any],
637 686 target_lang: str,
638 687 analysis_kind: str,
  688 + category_taxonomy_profile: Optional[str] = None,
639 689 ) -> str:
640 690 """构造缓存 key,仅由分析类型、prompt 实际输入文本内容与目标语言决定。"""
641   - schema = _get_analysis_schema(analysis_kind)
  691 + schema = _get_analysis_schema(
  692 + analysis_kind,
  693 + category_taxonomy_profile=category_taxonomy_profile,
  694 + )
642 695 prompt_input = _build_prompt_input_text(product)
643 696 h = hashlib.md5(prompt_input.encode("utf-8")).hexdigest()
644 697 prompt_contract = {
... ... @@ -672,12 +725,21 @@ def _get_cached_analysis_result(
672 725 product: Dict[str, Any],
673 726 target_lang: str,
674 727 analysis_kind: str,
  728 + category_taxonomy_profile: Optional[str] = None,
675 729 ) -> Optional[Dict[str, Any]]:
676 730 if not _anchor_redis:
677 731 return None
678   - schema = _get_analysis_schema(analysis_kind)
  732 + schema = _get_analysis_schema(
  733 + analysis_kind,
  734 + category_taxonomy_profile=category_taxonomy_profile,
  735 + )
679 736 try:
680   - key = _make_analysis_cache_key(product, target_lang, analysis_kind)
  737 + key = _make_analysis_cache_key(
  738 + product,
  739 + target_lang,
  740 + analysis_kind,
  741 + category_taxonomy_profile=category_taxonomy_profile,
  742 + )
681 743 raw = _anchor_redis.get(key)
682 744 if not raw:
683 745 return None
... ... @@ -707,10 +769,14 @@ def _set_cached_analysis_result(
707 769 target_lang: str,
708 770 result: Dict[str, Any],
709 771 analysis_kind: str,
  772 + category_taxonomy_profile: Optional[str] = None,
710 773 ) -> None:
711 774 if not _anchor_redis:
712 775 return
713   - schema = _get_analysis_schema(analysis_kind)
  776 + schema = _get_analysis_schema(
  777 + analysis_kind,
  778 + category_taxonomy_profile=category_taxonomy_profile,
  779 + )
714 780 try:
715 781 normalized = _normalize_analysis_result(
716 782 result,
... ... @@ -720,7 +786,12 @@ def _set_cached_analysis_result(
720 786 )
721 787 if not _has_meaningful_analysis_content(normalized, schema):
722 788 return
723   - key = _make_analysis_cache_key(product, target_lang, analysis_kind)
  789 + key = _make_analysis_cache_key(
  790 + product,
  791 + target_lang,
  792 + analysis_kind,
  793 + category_taxonomy_profile=category_taxonomy_profile,
  794 + )
724 795 ttl = ANCHOR_CACHE_EXPIRE_DAYS * 24 * 3600
725 796 _anchor_redis.setex(key, ttl, json.dumps(normalized, ensure_ascii=False))
726 797 except Exception as e:
... ... @@ -775,9 +846,13 @@ def create_prompt(
775 846 products: List[Dict[str, str]],
776 847 target_lang: str = "zh",
777 848 analysis_kind: str = "content",
  849 + category_taxonomy_profile: Optional[str] = None,
778 850 ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
779 851 """根据目标语言创建共享上下文、本地化输出要求和 Partial Mode assistant 前缀。"""
780   - schema = _get_analysis_schema(analysis_kind)
  852 + schema = _get_analysis_schema(
  853 + analysis_kind,
  854 + category_taxonomy_profile=category_taxonomy_profile,
  855 + )
781 856 markdown_table_headers = schema.get_headers(target_lang)
782 857 if not markdown_table_headers:
783 858 logger.warning(
... ... @@ -978,9 +1053,13 @@ def call_llm(
978 1053 def parse_markdown_table(
979 1054 markdown_content: str,
980 1055 analysis_kind: str = "content",
  1056 + category_taxonomy_profile: Optional[str] = None,
981 1057 ) -> List[Dict[str, str]]:
982 1058 """解析markdown表格内容"""
983   - schema = _get_analysis_schema(analysis_kind)
  1059 + schema = _get_analysis_schema(
  1060 + analysis_kind,
  1061 + category_taxonomy_profile=category_taxonomy_profile,
  1062 + )
984 1063 lines = markdown_content.strip().split("\n")
985 1064 data = []
986 1065 data_started = False
... ... @@ -1025,8 +1104,12 @@ def _log_parsed_result_quality(
1025 1104 target_lang: str,
1026 1105 batch_num: int,
1027 1106 analysis_kind: str,
  1107 + category_taxonomy_profile: Optional[str] = None,
1028 1108 ) -> None:
1029   - schema = _get_analysis_schema(analysis_kind)
  1109 + schema = _get_analysis_schema(
  1110 + analysis_kind,
  1111 + category_taxonomy_profile=category_taxonomy_profile,
  1112 + )
1030 1113 expected = len(batch_data)
1031 1114 actual = len(parsed_results)
1032 1115 if actual != expected:
... ... @@ -1071,9 +1154,13 @@ def process_batch(
1071 1154 batch_num: int,
1072 1155 target_lang: str = "zh",
1073 1156 analysis_kind: str = "content",
  1157 + category_taxonomy_profile: Optional[str] = None,
1074 1158 ) -> List[Dict[str, Any]]:
1075 1159 """处理一个批次的数据"""
1076   - schema = _get_analysis_schema(analysis_kind)
  1160 + schema = _get_analysis_schema(
  1161 + analysis_kind,
  1162 + category_taxonomy_profile=category_taxonomy_profile,
  1163 + )
1077 1164 logger.info(f"\n{'#' * 80}")
1078 1165 logger.info(
1079 1166 "Processing Batch %s (%s items, kind=%s)",
... ... @@ -1087,6 +1174,7 @@ def process_batch(
1087 1174 batch_data,
1088 1175 target_lang=target_lang,
1089 1176 analysis_kind=analysis_kind,
  1177 + category_taxonomy_profile=category_taxonomy_profile,
1090 1178 )
1091 1179  
1092 1180 # 如果提示词创建失败(例如不支持的 target_lang),本次批次整体失败,不再继续调用 LLM
... ... @@ -1119,13 +1207,18 @@ def process_batch(
1119 1207 )
1120 1208  
1121 1209 # 解析结果
1122   - parsed_results = parse_markdown_table(raw_response, analysis_kind=analysis_kind)
  1210 + parsed_results = parse_markdown_table(
  1211 + raw_response,
  1212 + analysis_kind=analysis_kind,
  1213 + category_taxonomy_profile=category_taxonomy_profile,
  1214 + )
1123 1215 _log_parsed_result_quality(
1124 1216 batch_data,
1125 1217 parsed_results,
1126 1218 target_lang,
1127 1219 batch_num,
1128 1220 analysis_kind,
  1221 + category_taxonomy_profile,
1129 1222 )
1130 1223  
1131 1224 logger.info(f"\nParsed Results ({len(parsed_results)} items):")
... ... @@ -1190,6 +1283,7 @@ def analyze_products(
1190 1283 batch_size: Optional[int] = None,
1191 1284 tenant_id: Optional[str] = None,
1192 1285 analysis_kind: str = "content",
  1286 + category_taxonomy_profile: Optional[str] = None,
1193 1287 ) -> List[Dict[str, Any]]:
1194 1288 """
1195 1289 库调用入口:根据输入+语言,返回锚文本及各维度信息。
... ... @@ -1205,7 +1299,10 @@ def analyze_products(
1205 1299 if not products:
1206 1300 return []
1207 1301  
1208   - _get_analysis_schema(analysis_kind)
  1302 + _get_analysis_schema(
  1303 + analysis_kind,
  1304 + category_taxonomy_profile=category_taxonomy_profile,
  1305 + )
1209 1306 results_by_index: List[Optional[Dict[str, Any]]] = [None] * len(products)
1210 1307 uncached_items: List[Tuple[int, Dict[str, str]]] = []
1211 1308  
... ... @@ -1215,7 +1312,12 @@ def analyze_products(
1215 1312 uncached_items.append((idx, product))
1216 1313 continue
1217 1314  
1218   - cached = _get_cached_analysis_result(product, target_lang, analysis_kind)
  1315 + cached = _get_cached_analysis_result(
  1316 + product,
  1317 + target_lang,
  1318 + analysis_kind,
  1319 + category_taxonomy_profile=category_taxonomy_profile,
  1320 + )
1219 1321 if cached:
1220 1322 logger.info(
1221 1323 f"[analyze_products] Cache hit for title='{title[:50]}...', "
... ... @@ -1255,6 +1357,7 @@ def analyze_products(
1255 1357 batch_num=batch_num,
1256 1358 target_lang=target_lang,
1257 1359 analysis_kind=analysis_kind,
  1360 + category_taxonomy_profile=category_taxonomy_profile,
1258 1361 )
1259 1362  
1260 1363 for (original_idx, product), item in zip(batch_slice, batch_results):
... ... @@ -1266,7 +1369,13 @@ def analyze_products(
1266 1369 # 不缓存错误结果,避免放大临时故障
1267 1370 continue
1268 1371 try:
1269   - _set_cached_analysis_result(product, target_lang, item, analysis_kind)
  1372 + _set_cached_analysis_result(
  1373 + product,
  1374 + target_lang,
  1375 + item,
  1376 + analysis_kind,
  1377 + category_taxonomy_profile=category_taxonomy_profile,
  1378 + )
1270 1379 except Exception:
1271 1380 # 已在内部记录 warning
1272 1381 pass
... ... @@ -1293,6 +1402,7 @@ def analyze_products(
1293 1402 batch_num=batch_num,
1294 1403 target_lang=target_lang,
1295 1404 analysis_kind=analysis_kind,
  1405 + category_taxonomy_profile=category_taxonomy_profile,
1296 1406 )
1297 1407  
1298 1408 # 按 batch_num 回填,确保输出稳定(results_by_index 是按原始 input index 映射的)
... ... @@ -1307,7 +1417,13 @@ def analyze_products(
1307 1417 # 不缓存错误结果,避免放大临时故障
1308 1418 continue
1309 1419 try:
1310   - _set_cached_analysis_result(product, target_lang, item, analysis_kind)
  1420 + _set_cached_analysis_result(
  1421 + product,
  1422 + target_lang,
  1423 + item,
  1424 + analysis_kind,
  1425 + category_taxonomy_profile=category_taxonomy_profile,
  1426 + )
1311 1427 except Exception:
1312 1428 # 已在内部记录 warning
1313 1429 pass
... ...
indexer/taxonomy.md
1 1  
2   -服装大类的taxonomy
  2 +# Cross-Border E-commerce Core Categories 大类
  3 +
  4 +## 1. 3C
  5 +Phone accessories, computer peripherals, smart wearables, audio & video, smart home, gaming gear. 手机配件、电脑周边、智能穿戴、影音娱乐、智能家居、游戏设备。
  6 +
  7 +## 2. Bags 包
  8 +Handbags, backpacks, wallets, luggage, crossbody bags, tote bags. 手提包、双肩包、钱包、行李箱、斜挎包、托特包。
  9 +
  10 +## 3. Pet Supplies 宠物用品
  11 +Pet food, pet toys, pet care products, pet grooming, pet clothing, smart pet devices. 宠物食品、宠物玩具、宠物护理用品、宠物美容、宠物服装、智能宠物设备。
  12 +
  13 +## 4. Electronics 电子产品
  14 +Consumer electronics, home appliances, digital devices, cables & chargers, batteries, electronic components. 消费电子产品、家用电器、数码设备、线材充电器、电池、电子元器件。
  15 +
  16 +## 5. Clothing 服装
  17 +Women's wear, men's wear, kid's wear, underwear, outerwear, activewear. 女装、男装、童装、内衣、外套、运动服装。
  18 +
  19 +## 6. Outdoor 户外用品
  20 +Camping gear, hiking equipment, fishing supplies, outdoor clothing, travel accessories, survival tools. 露营装备、徒步用品、渔具、户外服装、旅行配件、求生工具。
  21 +
  22 +## 7. Home Appliances 家电/电器
  23 +Kitchen appliances, cleaning appliances, personal care appliances, heating & cooling, smart home devices. 厨房电器、清洁电器、个护电器、冷暖设备、智能家居设备。
  24 +
  25 +## 8. Home & Living 家居
  26 +Furniture, home textiles, lighting, kitchenware, storage, home decor. 家具、家纺、灯具、厨具、收纳、家居装饰。
  27 +
  28 +## 9. Wigs 假发
  29 +
  30 +## 10. Beauty & Cosmetics 美容美妆
  31 +Skincare, makeup, nail care, beauty tools, hair care, fragrances. 护肤品、彩妆、美甲、美容工具、护发、香水。
  32 +
  33 +## 11. Accessories 配饰
  34 +Jewelry, watches, belts, scarves, hats, sunglasses, hair accessories. 珠宝、手表、腰带、围巾、帽子、太阳镜、发饰。
  35 +
  36 +## 12. Toys 玩具
  37 +Educational toys, plush toys, action figures, puzzles, outdoor toys, DIY toys. 益智玩具、毛绒玩具、可动人偶、拼图、户外玩具、DIY玩具。
  38 +
  39 +## 13. Shoes 鞋子
  40 +Sneakers, boots, sandals, heels, flats, sports shoes. 运动鞋、靴子、凉鞋、高跟鞋、平底鞋、球鞋。
  41 +
  42 +## 14. Sports 运动产品
  43 +Fitness equipment, sports gear, team sports, racquet sports, water sports, cycling. 健身器材、运动装备、团队运动、球拍运动、水上运动、骑行。
  44 +
  45 +## 15. Others 其他
  46 +
  47 +# 各个大类的taxonomy
  48 +## 1. Clothing & Apparel 服装
3 49  
4 50 ### A. Product Classification
5 51  
... ...
tests/ci/test_service_api_contracts.py
... ... @@ -348,10 +348,12 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
348 348 def _fake_build_index_content_fields(
349 349 items: List[Dict[str, str]],
350 350 tenant_id: str | None = None,
351   - analysis_kinds: List[str] | None = None,
  351 + enrichment_scopes: List[str] | None = None,
  352 + category_taxonomy_profile: str = "apparel",
352 353 ):
353 354 assert tenant_id == "162"
354   - assert analysis_kinds == ["content", "taxonomy"]
  355 + assert enrichment_scopes == ["generic", "category_taxonomy"]
  356 + assert category_taxonomy_profile == "apparel"
355 357 return [
356 358 {
357 359 "id": p["spu_id"],
... ... @@ -376,6 +378,8 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
376 378 "/indexer/enrich-content",
377 379 json={
378 380 "tenant_id": "162",
  381 + "enrichment_scopes": ["generic", "category_taxonomy"],
  382 + "category_taxonomy_profile": "apparel",
379 383 "items": [
380 384 {"spu_id": "1001", "title": "T-shirt"},
381 385 {"spu_id": "1002", "title": "Toy"},
... ... @@ -385,7 +389,8 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
385 389 assert response.status_code == 200
386 390 data = response.json()
387 391 assert data["tenant_id"] == "162"
388   - assert data["analysis_kinds"] == ["content", "taxonomy"]
  392 + assert data["enrichment_scopes"] == ["generic", "category_taxonomy"]
  393 + assert data["category_taxonomy_profile"] == "apparel"
389 394 assert data["total"] == 2
390 395 assert len(data["results"]) == 2
391 396 assert data["results"][0]["spu_id"] == "1001"
... ... @@ -403,6 +408,52 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
403 408 }
404 409  
405 410  
  411 +def test_indexer_enrich_content_contract_accepts_deprecated_analysis_kinds(indexer_client: TestClient, monkeypatch):
  412 + import indexer.product_enrich as process_products
  413 +
  414 + seen: Dict[str, Any] = {}
  415 +
  416 + def _fake_build_index_content_fields(
  417 + items: List[Dict[str, str]],
  418 + tenant_id: str | None = None,
  419 + enrichment_scopes: List[str] | None = None,
  420 + category_taxonomy_profile: str = "apparel",
  421 + ):
  422 + seen["tenant_id"] = tenant_id
  423 + seen["enrichment_scopes"] = enrichment_scopes
  424 + seen["category_taxonomy_profile"] = category_taxonomy_profile
  425 + return [
  426 + {
  427 + "id": items[0]["spu_id"],
  428 + "qanchors": {},
  429 + "enriched_tags": {},
  430 + "enriched_attributes": [],
  431 + "enriched_taxonomy_attributes": [],
  432 + }
  433 + ]
  434 +
  435 + monkeypatch.setattr(process_products, "build_index_content_fields", _fake_build_index_content_fields)
  436 +
  437 + response = indexer_client.post(
  438 + "/indexer/enrich-content",
  439 + json={
  440 + "tenant_id": "162",
  441 + "analysis_kinds": ["taxonomy"],
  442 + "items": [{"spu_id": "1001", "title": "T-shirt"}],
  443 + },
  444 + )
  445 +
  446 + assert response.status_code == 200
  447 + data = response.json()
  448 + assert seen == {
  449 + "tenant_id": "162",
  450 + "enrichment_scopes": ["category_taxonomy"],
  451 + "category_taxonomy_profile": "apparel",
  452 + }
  453 + assert data["enrichment_scopes"] == ["category_taxonomy"]
  454 + assert data["category_taxonomy_profile"] == "apparel"
  455 +
  456 +
406 457 def test_indexer_documents_contract(indexer_client: TestClient):
407 458 """POST /indexer/documents: tenant_id + spu_ids, returns success/failed lists (no ES write)."""
408 459 response = indexer_client.post(
... ...
tests/test_process_products_batching.py
... ... @@ -18,8 +18,10 @@ def test_analyze_products_caps_batch_size_to_20(monkeypatch):
18 18 batch_num: int,
19 19 target_lang: str = "zh",
20 20 analysis_kind: str = "content",
  21 + category_taxonomy_profile=None,
21 22 ):
22 23 assert analysis_kind == "content"
  24 + assert category_taxonomy_profile is None
23 25 seen_batch_sizes.append(len(batch_data))
24 26 return [
25 27 {
... ... @@ -64,8 +66,10 @@ def test_analyze_products_uses_min_batch_size_1(monkeypatch):
64 66 batch_num: int,
65 67 target_lang: str = "zh",
66 68 analysis_kind: str = "content",
  69 + category_taxonomy_profile=None,
67 70 ):
68 71 assert analysis_kind == "content"
  72 + assert category_taxonomy_profile is None
69 73 seen_batch_sizes.append(len(batch_data))
70 74 return [
71 75 {
... ...
tests/test_product_enrich_partial_mode.py
... ... @@ -295,16 +295,36 @@ def test_analyze_products_uses_product_level_cache_across_batch_requests():
295 295 product.get("image_url", ""),
296 296 )
297 297  
298   - def fake_get_cached_analysis_result(product, target_lang, analysis_kind="content"):
  298 + def fake_get_cached_analysis_result(
  299 + product,
  300 + target_lang,
  301 + analysis_kind="content",
  302 + category_taxonomy_profile=None,
  303 + ):
299 304 assert analysis_kind == "content"
  305 + assert category_taxonomy_profile is None
300 306 return cache_store.get(_cache_key(product, target_lang))
301 307  
302   - def fake_set_cached_analysis_result(product, target_lang, result, analysis_kind="content"):
  308 + def fake_set_cached_analysis_result(
  309 + product,
  310 + target_lang,
  311 + result,
  312 + analysis_kind="content",
  313 + category_taxonomy_profile=None,
  314 + ):
303 315 assert analysis_kind == "content"
  316 + assert category_taxonomy_profile is None
304 317 cache_store[_cache_key(product, target_lang)] = result
305 318  
306   - def fake_process_batch(batch_data, batch_num, target_lang="zh", analysis_kind="content"):
  319 + def fake_process_batch(
  320 + batch_data,
  321 + batch_num,
  322 + target_lang="zh",
  323 + analysis_kind="content",
  324 + category_taxonomy_profile=None,
  325 + ):
307 326 assert analysis_kind == "content"
  327 + assert category_taxonomy_profile is None
308 328 process_calls.append(
309 329 {
310 330 "batch_num": batch_num,
... ... @@ -400,7 +420,7 @@ def test_analyze_products_reuses_cached_content_with_current_product_identity():
400 420 with mock.patch.object(product_enrich, "API_KEY", "fake-key"), mock.patch.object(
401 421 product_enrich,
402 422 "_get_cached_analysis_result",
403   - wraps=lambda product, target_lang, analysis_kind="content": product_enrich._normalize_analysis_result(
  423 + wraps=lambda product, target_lang, analysis_kind="content", category_taxonomy_profile=None: product_enrich._normalize_analysis_result(
404 424 cached_result,
405 425 product=product,
406 426 target_lang=target_lang,
... ... @@ -443,8 +463,10 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output()
443 463 batch_size=None,
444 464 tenant_id=None,
445 465 analysis_kind="content",
  466 + category_taxonomy_profile=None,
446 467 ):
447 468 if analysis_kind == "taxonomy":
  469 + assert category_taxonomy_profile == "apparel"
448 470 return [
449 471 {
450 472 "id": products[0]["id"],
... ... @@ -478,6 +500,7 @@ def test_build_index_content_fields_maps_internal_tags_to_enriched_tags_output()
478 500 "style_aesthetic": "",
479 501 }
480 502 ]
  503 + assert category_taxonomy_profile == "apparel"
481 504 return [
482 505 {
483 506 "id": products[0]["id"],
... ...