Commit d7d48f5238c6c83665e359cda931dbc17dc748f8

Authored by tangwang
1 parent 62b7972c

改动(mapping + 灌入结构)

mappings/search_products.json:把原来的 title_zh/title_en/brief_zh/... 改成 按语言 key 的对象结构( /products/_doc/1 { "title": {"en":...} } )
同时在这些字段下 预置了全部 analyzer 语言:
arabic, armenian, basque, brazilian, bulgarian, catalan, chinese, cjk, czech, danish, dutch, english, finnish, french, galician, german, greek, hindi, hungarian, indonesian, italian, norwegian, persian, portuguese, romanian, russian, spanish, swedish, turkish, thai

实现为 type: object + properties,同时满足“按语言灌入”和“按语言 analyzer”。
索引灌入(全量/增量/transformer)已同步改完
indexer/document_transformer.py:输出从 title_zh/title_en/... 改为:
title: {<primary_lang>: 原文, en?: 翻译, zh?: 翻译}
brief/description/vendor 同理
category_path/category_name_text 也改为语言对象(避免查询侧继续依赖旧字段)
indexer/incremental_service.py:embedding 取值从 title_en/title_zh 改为从 title 对象里优先取 en,否则取 zh,否则取任一可用语言。
查询侧与配置、API/文档已同步
search/es_query_builder.py:查询字段统一改成点路径:title.zh / title.en / vendor.zh / vendor.zh.keyword / category_name_text.zh 等。
config/config.yaml:field boosts / indexes 里的字段名同步为新点路径。
API & formatter:
api/result_formatter.py 已支持新结构(并保留对旧 *_zh/_en 的兼容兜底)。
api/models.py、相关 docs/examples 里的 vendor_zh.keyword 等已更新为 vendor.zh.keyword。
文档/脚本:docs/、README.md、scripts/ 里所有旧字段名引用已批量替换为新结构。
CLAUDE.md
... ... @@ -256,12 +256,12 @@ The system uses centralized configuration through `config/config.yaml`:
256 256 {
257 257 "tenant_id": "keyword", // Multi-tenant isolation
258 258 "spu_id": "keyword", // Product identifier
259   - "title_zh/en": "text", // Multi-language titles
260   - "brief_zh/en": "text", // Short descriptions
261   - "description_zh/en": "text", // Detailed descriptions
262   - "vendor_zh/en": "text", // Supplier/brand with keyword subfield
263   - "category_path_zh/en": "text", // Hierarchical category paths
264   - "category_name_zh/en": "text", // Category names for search
  259 + "title.zh/en": "text", // Multi-language titles
  260 + "brief.zh/en": "text", // Short descriptions
  261 + "description.zh/en": "text", // Detailed descriptions
  262 + "vendor.zh/en": "text", // Supplier/brand with keyword subfield
  263 + "category_path.zh/en": "text", // Hierarchical category paths
  264 + "category_name_text.zh/en": "text", // Category names for search
265 265 "category1/2/3_name": "keyword", // Multi-level category filtering
266 266 "tags": "keyword", // Product tags
267 267 "specifications": "nested", // Product variants (color, size, etc.)
... ... @@ -318,11 +318,11 @@ The system uses centralized configuration through `config/config.yaml`:
318 318 **Field Boost Configuration**:
319 319 ```yaml
320 320 field_boosts:
321   - title_zh/en: 3.0 # Highest priority
322   - brief_zh/en: 1.5 # Medium priority
323   - description_zh/en: 1.0 # Lower priority
324   - vendor_zh/en: 1.5 # Brand emphasis
325   - category_path_zh/en: 1.5 # Category relevance
  321 + title.zh/en: 3.0 # Highest priority
  322 + brief.zh/en: 1.5 # Medium priority
  323 + description.zh/en: 1.0 # Lower priority
  324 + vendor.zh/en: 1.5 # Brand emphasis
  325 + category_path.zh/en: 1.5 # Category relevance
326 326 tags: 1.0 # Tag matching
327 327 ```
328 328  
... ...
README.md
... ... @@ -15,8 +15,8 @@
15 15 "query" : {
16 16 "dis_max" : {
17 17 "queries" : [
18   - {"match" : { "title_en" : xxx }},
19   - {"match" : { "title_zh" : xxx }},
  18 + {"match" : { "title.en" : xxx }},
  19 + {"match" : { "title.zh" : xxx }},
20 20 {"match" : { "title_xx" : xxx }}
21 21 ],
22 22 "tie_breakler" : 0.8
... ...
api/models.py
... ... @@ -86,7 +86,7 @@ class SearchRequest(BaseModel):
86 86 "examples": [
87 87 {
88 88 "category_name": ["手机", "电子产品"],
89   - "vendor_zh.keyword": "奇乐",
  89 + "vendor.zh.keyword": "奇乐",
90 90 "specifications": {"name": "颜色", "value": "白色"}
91 91 },
92 92 {
... ...
api/result_formatter.py
... ... @@ -32,13 +32,22 @@ class ResultFormatter:
32 32 lang = "en"
33 33  
34 34 def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
35   - """从 *_zh / *_en 字段中按语言选择一个值,若目标语言缺失则回退到另一种。"""
36   - zh_val = src.get(f"{base}_zh")
37   - en_val = src.get(f"{base}_en")
  35 + """
  36 + 从多语言对象字段中按语言选择一个值:
  37 + - 新结构: {base: {"zh": "...", "en": "...", ...}}
  38 + - 兼容旧结构: {base_zh: "...", base_en: "..."}
  39 + 若目标语言缺失则回退到另一种。
  40 + """
  41 + obj = src.get(base)
  42 + if isinstance(obj, dict):
  43 + zh_val = obj.get("zh")
  44 + en_val = obj.get("en")
  45 + else:
  46 + zh_val = src.get(f"{base}_zh")
  47 + en_val = src.get(f"{base}_en")
38 48 if lang == "zh":
39 49 return zh_val or en_val
40   - else:
41   - return en_val or zh_val
  50 + return en_val or zh_val
42 51  
43 52 for hit in es_hits:
44 53 source = hit.get('_source', {})
... ... @@ -60,7 +69,7 @@ class ResultFormatter:
60 69 description = pick_lang_field(source, "description")
61 70 vendor = pick_lang_field(source, "vendor")
62 71 category_path = pick_lang_field(source, "category_path")
63   - category_name = pick_lang_field(source, "category_name")
  72 + category_name = pick_lang_field(source, "category_name_text") or source.get("category_name")
64 73  
65 74 # Extract SKUs
66 75 skus = []
... ...
config/config.yaml
... ... @@ -15,20 +15,20 @@ es_settings:
15 15 # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义)
16 16 field_boosts:
17 17 # 文本相关性字段
18   - title_zh: 3.0
19   - brief_zh: 1.5
20   - description_zh: 1.0
21   - vendor_zh: 1.5
22   - title_en: 3.0
23   - brief_en: 1.5
24   - description_en: 1.0
25   - vendor_en: 1.5
  18 + "title.zh": 3.0
  19 + "brief.zh": 1.5
  20 + "description.zh": 1.0
  21 + "vendor.zh": 1.5
  22 + "title.en": 3.0
  23 + "brief.en": 1.5
  24 + "description.en": 1.0
  25 + "vendor.en": 1.5
26 26  
27 27 # 分类相关字段
28   - category_path_zh: 1.5
29   - category_name_zh: 1.5
30   - category_path_en: 1.5
31   - category_name_en: 1.5
  28 + "category_path.zh": 1.5
  29 + "category_name_text.zh": 1.5
  30 + "category_path.en": 1.5
  31 + "category_name_text.en": 1.5
32 32  
33 33 # 标签和属性值字段
34 34 tags: 1.0
... ... @@ -42,33 +42,33 @@ indexes:
42 42 - name: "default"
43 43 label: "默认搜索"
44 44 fields:
45   - - "title_zh"
46   - - "brief_zh"
47   - - "description_zh"
48   - - "vendor_zh"
  45 + - "title.zh"
  46 + - "brief.zh"
  47 + - "description.zh"
  48 + - "vendor.zh"
49 49 - "tags"
50   - - "category_path_zh"
51   - - "category_name_zh"
  50 + - "category_path.zh"
  51 + - "category_name_text.zh"
52 52 - "option1_values"
53 53 boost: 1.0
54 54  
55 55 - name: "title"
56 56 label: "标题搜索"
57 57 fields:
58   - - "title_zh"
  58 + - "title.zh"
59 59 boost: 2.0
60 60  
61 61 - name: "vendor"
62 62 label: "品牌搜索"
63 63 fields:
64   - - "vendor_zh"
  64 + - "vendor.zh"
65 65 boost: 1.5
66 66  
67 67 - name: "category"
68 68 label: "类目搜索"
69 69 fields:
70   - - "category_path_zh"
71   - - "category_name_zh"
  70 + - "category_path.zh"
  71 + - "category_name_text.zh"
72 72 boost: 1.5
73 73  
74 74 - name: "tags"
... ...
config/utils.py
... ... @@ -17,7 +17,7 @@ def get_match_fields_for_index(config: SearchConfig, index_name: str = &quot;default&quot;
17 17 index_name: Name of the index domain (default: "default")
18 18  
19 19 Returns:
20   - List of field names with boost, e.g., ["title_zh^3.0", "brief_zh^1.5"]
  20 + List of field names with boost, e.g., ["title.zh^3.0", "brief.zh^1.5"]
21 21 """
22 22 # Find the index config
23 23 index_config = None
... ...
docs/MySQL到ES字段映射说明-业务版.md
... ... @@ -28,14 +28,14 @@
28 28  
29 29 | ES 字段 | 数据来源表 | 表中字段 | 转换说明 |
30 30 |---------|-----------|----------|----------|
31   -| `title_zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 |
32   -| `title_en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 |
33   -| `brief_zh` | SPU 表 | `brief` | 同上 |
34   -| `brief_en` | SPU 表 | `brief` | 同上 |
35   -| `description_zh` | SPU 表 | `description` | 同上 |
36   -| `description_en` | SPU 表 | `description` | 同上 |
37   -| `vendor_zh` | SPU 表 | `vendor` | 同上 |
38   -| `vendor_en` | SPU 表 | `vendor` | 同上 |
  31 +| `title.zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 |
  32 +| `title.en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 |
  33 +| `brief.zh` | SPU 表 | `brief` | 同上 |
  34 +| `brief.en` | SPU 表 | `brief` | 同上 |
  35 +| `description.zh` | SPU 表 | `description` | 同上 |
  36 +| `description.en` | SPU 表 | `description` | 同上 |
  37 +| `vendor.zh` | SPU 表 | `vendor` | 同上 |
  38 +| `vendor.en` | SPU 表 | `vendor` | 同上 |
39 39  
40 40 **翻译规则:**
41 41 - 根据租户配置的 `primary_language` 确定主语言
... ... @@ -104,7 +104,7 @@ category_ids = [&quot;1&quot;, &quot;2&quot;, &quot;3&quot;]
104 104  
105 105 | ES 字段 | 数据来源 | 转换说明 |
106 106 |---------|----------|----------|
107   -| `category_path_zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` |
  107 +| `category_path.zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` |
108 108 | `category1_name` | 类目名称列表[0] | 一级类目:`"电子产品"` |
109 109 | `category2_name` | 类目名称列表[1] | 二级类目:`"手机"` |
110 110 | `category3_name` | 类目名称列表[2] | 三级类目:`"iPhone"` |
... ... @@ -300,13 +300,13 @@ sku_weight_units = 去重后的重量单位列表
300 300  
301 301 | ES 字段 | 数据来源 | 转换说明 |
302 302 |---------|----------|----------|
303   -| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量<br/>优先使用 `title_en`,如果没有则使用 `title_zh` |
  303 +| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量<br/>优先使用 `title.en`,如果没有则使用 `title.zh` |
304 304  
305 305 **生成逻辑:**
306 306  
307 307 ```
308 308 如果启用向量搜索:
309   - 文本 = title_en 或 title_zh
  309 + 文本 = title.en 或 title.zh
310 310 向量 = 文本编码器.encode(文本)
311 311 title_embedding = 向量(1024 维浮点数组)
312 312 ```
... ... @@ -322,17 +322,17 @@ sku_weight_units = 去重后的重量单位列表
322 322 | **基础字段** |
323 323 | `tenant_id` | SPU | `tenant_id` | 租户ID |
324 324 | `spu_id` | SPU | `id` | 商品ID |
325   -| `title_zh/en` | SPU | `title` | 标题(多语言) |
326   -| `brief_zh/en` | SPU | `brief` | 简介(多语言) |
327   -| `description_zh/en` | SPU | `description` | 描述(多语言) |
328   -| `vendor_zh/en` | SPU | `vendor` | 品牌(多语言) |
  325 +| `title.zh/en` | SPU | `title` | 标题(多语言) |
  326 +| `brief.zh/en` | SPU | `brief` | 简介(多语言) |
  327 +| `description.zh/en` | SPU | `description` | 描述(多语言) |
  328 +| `vendor.zh/en` | SPU | `vendor` | 品牌(多语言) |
329 329 | `tags` | SPU | `tags` | 标签数组 |
330 330 | `image_url` | SPU | `image_src` | 主图URL |
331 331 | `sales` | SPU | `fake_sales` | 销量 |
332 332 | `create_time` | SPU | `create_time` | 创建时间 |
333 333 | `update_time` | SPU | `update_time` | 更新时间 |
334 334 | **类别字段** |
335   -| `category_path_zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 |
  335 +| `category_path.zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 |
336 336 | `category1_name` | SPU + 类目映射 | `category_path` → 类目名称[0] | 一级类目 |
337 337 | `category2_name` | SPU + 类目映射 | `category_path` → 类目名称[1] | 二级类目 |
338 338 | `category3_name` | SPU + 类目映射 | `category_path` → 类目名称[2] | 三级类目 |
... ... @@ -451,14 +451,14 @@ GET /search_products/_search
451 451 "_source": {
452 452 "tenant_id": "162",
453 453 "spu_id": "74174",
454   - "title_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
455   - "title_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
456   - "brief_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
457   - "brief_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
458   - "description_zh": "<p>实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】</p>",
459   - "description_en": "<p>Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]</p",
460   - "vendor_zh": "顺达",
461   - "vendor_en": "Shunda",
  454 + "title.zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
  455 + "title.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
  456 + "brief.zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
  457 + "brief.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
  458 + "description.zh": "<p>实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】</p>",
  459 + "description.en": "<p>Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]</p",
  460 + "vendor.zh": "顺达",
  461 + "vendor.en": "Shunda",
462 462 "title_embedding": [
463 463 -0.005125104915350676,
464 464 ...
... ... @@ -468,12 +468,12 @@ GET /search_products/_search
468 468 "魔方",
469 469 "ShunDa"
470 470 ],
471   - "category_path_zh": "玩具/汽车",
472   - "category_path_en": null,
  471 + "category_path.zh": "玩具/汽车",
  472 + "category_path.en": null,
473 473 "category1_name": "玩具",
474 474 "category2_name": "汽车",
475   - "category_name_zh": "汽车",
476   - "category_name_en": null,
  475 + "category_name_text.zh": "汽车",
  476 + "category_name_text.en": null,
477 477 "category_name": "汽车",
478 478 "category_id": "593389466647880832",
479 479 "category_level": 2,
... ... @@ -599,7 +599,7 @@ category_ids [&quot;1&quot;, &quot;2&quot;, &quot;3&quot;]
599 599 ↓ [通过映射表转换]
600 600 category_names ["电子产品", "手机", "iPhone"]
601 601 ↓ [构建字段]
602   -category_path_zh: "电子产品/手机/iPhone"
  602 +category_path.zh: "电子产品/手机/iPhone"
603 603 category1_name: "电子产品"
604 604 category2_name: "手机"
605 605 category3_name: "iPhone"
... ... @@ -644,12 +644,12 @@ sku_prices: [99.99, 109.99, 129.99]
644 644 |--------|---------|------|
645 645 | `id` | `spu_id` | 商品ID |
646 646 | `tenant_id` | `tenant_id` | 租户ID |
647   -| `title` | `title_zh/en` | 标题 |
648   -| `brief` | `brief_zh/en` | 简介 |
649   -| `description` | `description_zh/en` | 描述 |
650   -| `vendor` | `vendor_zh/en` | 品牌 |
  647 +| `title` | `title.zh/en` | 标题 |
  648 +| `brief` | `brief.zh/en` | 简介 |
  649 +| `description` | `description.zh/en` | 描述 |
  650 +| `vendor` | `vendor.zh/en` | 品牌 |
651 651 | `tags` | `tags` | 标签 |
652   -| `category_path` | `category_path_zh` | 类目路径 |
  652 +| `category_path` | `category_path.zh` | 类目路径 |
653 653 | `category_id` | `category_id` | 类目ID |
654 654 | `category_level` | `category_level` | 类目层级 |
655 655 | `image_src` | `image_url` | 主图 |
... ...
docs/MySQL到ES文档映射说明.md
... ... @@ -217,10 +217,10 @@ WHERE deleted = 0 AND category_id IS NOT NULL
217 217 文本字段支持中英文双语,根据租户配置进行自动翻译。
218 218  
219 219 **字段列表:**
220   -- `title_zh` / `title_en`
221   -- `brief_zh` / `brief_en`
222   -- `description_zh` / `description_en`
223   -- `vendor_zh` / `vendor_en`
  220 +- `title.zh` / `title.en`
  221 +- `brief.zh` / `brief.en`
  222 +- `description.zh` / `description.en`
  223 +- `vendor.zh` / `vendor.en`
224 224  
225 225 **填充逻辑:**
226 226  
... ... @@ -244,8 +244,8 @@ WHERE deleted = 0 AND category_id IS NOT NULL
244 244 translate_to_en=translate_to_en,
245 245 translate_to_zh=translate_to_zh,
246 246 )
247   - doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)
248   - doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None)
  247 + doc['title.zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)
  248 + doc['title.en'] = translations.get('en') or (title_text if primary_lang == 'en' else None)
249 249 ```
250 250  
251 251 **代码位置:** `indexer/document_transformer.py:168-298`
... ... @@ -340,7 +340,7 @@ for cid in category_ids:
340 340 if category_names:
341 341 # 构建类目路径字符串
342 342 category_path_str = '/'.join(category_names)
343   - doc['category_path_zh'] = category_path_str # 例如:"电子产品/手机/iPhone"
  343 + doc['category_path.zh'] = category_path_str # 例如:"电子产品/手机/iPhone"
344 344  
345 345 # 填充分层类目名称
346 346 if len(category_names) > 0:
... ... @@ -358,7 +358,7 @@ if category_names:
358 358 ```python
359 359 elif pd.notna(spu_row.get('category')):
360 360 category = str(spu_row['category'])
361   - doc['category_name_zh'] = category
  361 + doc['category_name_text.zh'] = category
362 362  
363 363 # 尝试从 category 字段解析多级分类(如果包含 "/")
364 364 if '/' in category:
... ... @@ -375,8 +375,8 @@ elif pd.notna(spu_row.get(&#39;category&#39;)):
375 375  
376 376 | ES 字段 | 类型 | 说明 |
377 377 |---------|------|------|
378   -| `category_path_zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") |
379   -| `category_path_en` | text | 类目路径英文(暂未实现) |
  378 +| `category_path.zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") |
  379 +| `category_path.en` | text | 类目路径英文(暂未实现) |
380 380 | `category1_name` | keyword | 一级类目名称 |
381 381 | `category2_name` | keyword | 二级类目名称 |
382 382 | `category3_name` | keyword | 三级类目名称 |
... ... @@ -673,7 +673,7 @@ if enable_embedding and encoder and documents:
673 673 title_texts = []
674 674 title_doc_indices = []
675 675 for i, (_, doc) in enumerate(documents):
676   - title_text = doc.get("title_en") or doc.get("title_zh")
  676 + title_text = doc.get("title.en") or doc.get("title.zh")
677 677 if title_text and str(title_text).strip():
678 678 title_texts.append(str(title_text))
679 679 title_doc_indices.append(i)
... ... @@ -858,16 +858,16 @@ spu_df[&quot;_is_deleted&quot;] = spu_df[&quot;deleted&quot;].apply(_is_deleted_value)
858 858 {
859 859 "tenant_id": "1",
860 860 "spu_id": "12345",
861   - "title_zh": "iPhone 15 Pro Max",
862   - "title_en": "iPhone 15 Pro Max",
863   - "brief_zh": "最新款 iPhone",
864   - "brief_en": "Latest iPhone",
865   - "description_zh": "详细描述...",
866   - "description_en": "Detailed description...",
867   - "vendor_zh": "Apple",
868   - "vendor_en": "Apple",
  861 + "title.zh": "iPhone 15 Pro Max",
  862 + "title.en": "iPhone 15 Pro Max",
  863 + "brief.zh": "最新款 iPhone",
  864 + "brief.en": "Latest iPhone",
  865 + "description.zh": "详细描述...",
  866 + "description.en": "Detailed description...",
  867 + "vendor.zh": "Apple",
  868 + "vendor.en": "Apple",
869 869 "tags": ["手机", "智能手机", "Apple"],
870   - "category_path_zh": "电子产品/手机/iPhone",
  870 + "category_path.zh": "电子产品/手机/iPhone",
871 871 "category1_name": "电子产品",
872 872 "category2_name": "手机",
873 873 "category3_name": "iPhone",
... ...
docs/Search-API-Examples.md
... ... @@ -132,7 +132,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
132 132 "language": "zh",
133 133 "filters": {
134 134 "category_name": "手机",
135   - "vendor_zh.keyword": "奇乐"
  135 + "vendor.zh.keyword": "奇乐"
136 136 }
137 137 }'
138 138 ```
... ... @@ -294,7 +294,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
294 294 "language": "zh",
295 295 "filters": {
296 296 "category_name": ["手机", "电子产品"],
297   - "vendor_zh.keyword": "品牌A"
  297 + "vendor.zh.keyword": "品牌A"
298 298 },
299 299 "range_filters": {
300 300 "min_price": {
... ...
docs/基础配置指南.md
... ... @@ -26,12 +26,12 @@
26 26 - `create_time`, `update_time` (date) - 时间字段
27 27  
28 28 #### 多语言文本字段
29   -- `title_zh`, `title_en` (text) - 标题(中英文)
30   -- `brief_zh`, `brief_en` (text) - 短描述(中英文)
31   -- `description_zh`, `description_en` (text) - 详细描述(中英文)
32   -- `vendor_zh`, `vendor_en` (text) - 供应商/品牌(中英文,含keyword子字段)
33   -- `category_path_zh`, `category_path_en` (text) - 类目路径(中英文)
34   -- `category_name_zh`, `category_name_en` (text) - 类目名称(中英文)
  29 +- `title.zh`, `title.en` (text) - 标题(中英文)
  30 +- `brief.zh`, `brief.en` (text) - 短描述(中英文)
  31 +- `description.zh`, `description.en` (text) - 详细描述(中英文)
  32 +- `vendor.zh`, `vendor.en` (text) - 供应商/品牌(中英文,含keyword子字段)
  33 +- `category_path.zh`, `category_path.en` (text) - 类目路径(中英文)
  34 +- `category_name_text.zh`, `category_name_text.en` (text) - 类目名称(中英文)
35 35  
36 36 #### 类目字段
37 37 - `category_id` (keyword) - 类目ID
... ... @@ -64,12 +64,12 @@
64 64 ### 文本召回字段
65 65  
66 66 默认同时搜索以下字段(中英文都包含):
67   -- `title_zh^3.0`, `title_en^3.0`
68   -- `brief_zh^1.5`, `brief_en^1.5`
69   -- `description_zh^1.0`, `description_en^1.0`
70   -- `vendor_zh^1.5`, `vendor_en^1.5`
71   -- `category_path_zh^1.5`, `category_path_en^1.5`
72   -- `category_name_zh^1.5`, `category_name_en^1.5`
  67 +- `title.zh^3.0`, `title.en^3.0`
  68 +- `brief.zh^1.5`, `brief.en^1.5`
  69 +- `description.zh^1.0`, `description.en^1.0`
  70 +- `vendor.zh^1.5`, `vendor.en^1.5`
  71 +- `category_path.zh^1.5`, `category_path.en^1.5`
  72 +- `category_name_text.zh^1.5`, `category_name_text.en^1.5`
73 73 - `tags^1.0`
74 74  
75 75 ### 查询架构
... ... @@ -126,12 +126,12 @@
126 126 - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
127 127  
128 128 映射规则:
129   -- `title_zh/en` → `title`
130   -- `brief_zh/en` → `brief`
131   -- `description_zh/en` → `description`
132   -- `vendor_zh/en` → `vendor`
133   -- `category_path_zh/en` → `category_path`
134   -- `category_name_zh/en` → `category_name`
  129 +- `title.zh/en` → `title`
  130 +- `brief.zh/en` → `brief`
  131 +- `description.zh/en` → `description`
  132 +- `vendor.zh/en` → `vendor`
  133 +- `category_path.zh/en` → `category_path`
  134 +- `category_name_text.zh/en` → `category_name`
135 135  
136 136 ## 配置修改
137 137  
... ...
docs/常用查询 - ES.md
... ... @@ -22,7 +22,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
22 22  
23 23 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
24 24 "size": 100,
25   - "_source": ["title_zh", "title_en"],
  25 + "_source": ["title"],
26 26 "query": {
27 27 "bool": {
28 28 "filter": [
... ... @@ -34,13 +34,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
34 34  
35 35 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
36 36 "size": 1,
37   - "_source": ["title_zh", "title_en"],
  37 + "_source": ["title"],
38 38 "query": {
39 39 "bool": {
40 40 "must": [
41 41 {
42 42 "match": {
43   - "title_zh": {
  43 + "title.zh": {
44 44 "query": "裙子"
45 45 }
46 46 }
... ... @@ -54,7 +54,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
54 54 }'
55 55  
56 56 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{
57   - "analyzer": "query_ansj",
  57 + "analyzer": "icu_analyzer",
58 58 "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
59 59 }'
60 60  
... ... @@ -73,13 +73,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
73 73 "multi_match": {
74 74 "_name": "base_query",
75 75 "fields": [
76   - "title_zh^3.0",
77   - "brief_zh^1.5",
78   - "description_zh",
79   - "vendor_zh^1.5",
  76 + "title.zh^3.0",
  77 + "brief.zh^1.5",
  78 + "description.zh",
  79 + "vendor.zh^1.5",
80 80 "tags",
81   - "category_path_zh^1.5",
82   - "category_name_zh^1.5",
  81 + "category_path.zh^1.5",
  82 + "category_name_text.zh^1.5",
83 83 "option1_values^0.5"
84 84 ],
85 85 "minimum_should_match": "75%",
... ... @@ -110,13 +110,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
110 110 "multi_match": {
111 111 "_name": "base_query",
112 112 "fields": [
113   - "title_zh^3.0",
114   - "brief_zh^1.5",
115   - "description_zh",
116   - "vendor_zh^1.5",
  113 + "title.zh^3.0",
  114 + "brief.zh^1.5",
  115 + "description.zh",
  116 + "vendor.zh^1.5",
117 117 "tags",
118   - "category_path_zh^1.5",
119   - "category_name_zh^1.5",
  118 + "category_path.zh^1.5",
  119 + "category_name_text.zh^1.5",
120 120 "option1_values^0.5"
121 121 ],
122 122 "minimum_should_match": "75%",
... ... @@ -271,7 +271,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
271 271 "size": 1,
272 272 "_source": [
273 273 "spu_id",
274   - "title_zh",
  274 + "title",
275 275 "category1_name",
276 276 "category2_name",
277 277 "category3_name",
... ... @@ -552,7 +552,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
552 552 }
553 553 },
554 554 "size": 1,
555   - "_source": ["spu_id", "title_zh", "specifications"]
  555 + "_source": ["spu_id", "title", "specifications"]
556 556 }'
557 557  
558 558 ## 4. 统计查询
... ... @@ -597,7 +597,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
597 597 }
598 598 },
599 599 "size": 10,
600   - "_source": ["spu_id", "title_zh", "category_name_zh", "category_path_zh"]
  600 + "_source": ["spu_id", "title", "category_name_text", "category_path"]
601 601 }'
602 602  
603 603 ### 5.2 查找有option但没有specifications的文档(数据转换问题)
... ... @@ -614,7 +614,32 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
614 614 }
615 615 },
616 616 "size": 10,
617   - "_source": ["spu_id", "title_zh", "option1_name", "option2_name", "option3_name", "specifications"]
  617 + "_source": ["spu_id", "title", "option1_name", "option2_name", "option3_name", "specifications"]
618 618 }'
619 619  
620 620  
  621 +重排序:
  622 +GET /search_products/_search
  623 +{
  624 + "query": {
  625 + "match": {
  626 + "title.en": {
  627 + "query": "quick brown fox",
  628 + "minimum_should_match": "90%"
  629 + }
  630 + }
  631 + },
  632 + "rescore": {
  633 + "window_size": 50,
  634 + "query": {
  635 + "rescore_query": {
  636 + "match_phrase": {
  637 + "title.en": {
  638 + "query": "quick brown fox",
  639 + "slop": 50
  640 + }
  641 + }
  642 + }
  643 + }
  644 + }
  645 +}
... ...
docs/搜索API对接指南.md
... ... @@ -203,7 +203,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
203 203 "category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个
204 204 "category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个
205 205 "category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个
206   - "vendor_zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个
  206 + "vendor.zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个
207 207 "tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个
208 208 // specifications 嵌套过滤(特殊格式)
209 209 "specifications": {
... ... @@ -274,7 +274,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
274 274 - `category_name`: 类目名称
275 275 - `category1_name`, `category2_name`, `category3_name`: 多级类目
276 276 - `category_id`: 类目ID
277   -- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段)
  277 +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
278 278 - `tags`: 标签(keyword类型,支持数组)
279 279 - `option1_name`, `option2_name`, `option3_name`: 选项名称
280 280 - `specifications`: 规格过滤(嵌套字段,格式见上文)
... ... @@ -602,7 +602,9 @@ curl &quot;http://localhost:6002/search/instant?q=玩具&amp;size=5&quot;
602 602 {
603 603 "id": "12345",
604 604 "source": {
605   - "title_zh": "芭比时尚娃娃",
  605 + "title": {
  606 + "zh": "芭比时尚娃娃"
  607 + },
606 608 "min_price": 89.99,
607 609 "category1_name": "玩具"
608 610 }
... ... @@ -739,7 +741,7 @@ curl &quot;http://localhost:6002/search/12345&quot;
739 741 | 字段 | 类型 | 说明 |
740 742 |------|------|------|
741 743 | `spu_id` | string | SPU ID |
742   -| `title` | string | 商品标题(根据language参数自动选择title_zh或title_en) |
  744 +| `title` | string | 商品标题(根据language参数自动选择 `title.zh` 或 `title.en`) |
743 745 | `brief` | string | 商品短描述(根据language参数自动选择) |
744 746 | `description` | string | 商品详细描述(根据language参数自动选择) |
745 747 | `vendor` | string | 供应商/品牌(根据language参数自动选择) |
... ... @@ -1088,7 +1090,9 @@ curl -X POST &quot;http://localhost:6004/indexer/index&quot; \
1088 1090 "document": {
1089 1091 "tenant_id": "162",
1090 1092 "spu_id": "123",
1091   - "title_zh": "商品标题",
  1093 + "title": {
  1094 + "zh": "商品标题"
  1095 + },
1092 1096 ...
1093 1097 }
1094 1098 },
... ... @@ -1298,7 +1302,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
1298 1302 "size": 20,
1299 1303 "language": "zh",
1300 1304 "filters": {
1301   - "vendor_zh.keyword": ["品牌A", "品牌B"]
  1305 + "vendor.zh.keyword": ["品牌A", "品牌B"]
1302 1306 },
1303 1307 "range_filters": {
1304 1308 "min_price": {
... ... @@ -1505,12 +1509,12 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
1505 1509 |--------|------|------|
1506 1510 | `tenant_id` | keyword | 租户ID(多租户隔离) |
1507 1511 | `spu_id` | keyword | SPU ID |
1508   -| `title_zh`, `title_en` | text | 商品标题(中英文) |
1509   -| `brief_zh`, `brief_en` | text | 商品短描述(中英文) |
1510   -| `description_zh`, `description_en` | text | 商品详细描述(中英文) |
1511   -| `vendor_zh`, `vendor_en` | text | 供应商/品牌(中英文,含keyword子字段) |
1512   -| `category_path_zh`, `category_path_en` | text | 类目路径(中英文,用于搜索) |
1513   -| `category_name_zh`, `category_name_en` | text | 类目名称(中英文,用于搜索) |
  1512 +| `title.<lang>` | object/text | 商品标题(多语言对象,如 `title.zh`, `title.en`) |
  1513 +| `brief.<lang>` | object/text | 商品短描述(多语言对象,如 `brief.zh`, `brief.en`) |
  1514 +| `description.<lang>` | object/text | 商品详细描述(多语言对象,如 `description.zh`, `description.en`) |
  1515 +| `vendor.<lang>` | object/text | 供应商/品牌(多语言对象,且带 keyword 子字段,如 `vendor.zh.keyword`) |
  1516 +| `category_path.<lang>` | object/text | 类目路径(多语言对象,用于搜索,如 `category_path.zh`) |
  1517 +| `category_name_text.<lang>` | object/text | 类目名称(多语言对象,用于搜索,如 `category_name_text.zh`) |
1514 1518 | `category_id` | keyword | 类目ID |
1515 1519 | `category_name` | keyword | 类目名称(用于过滤) |
1516 1520 | `category_level` | integer | 类目层级 |
... ... @@ -1552,7 +1556,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
1552 1556 - `category_name`: 类目名称
1553 1557 - `category1_name`, `category2_name`, `category3_name`: 多级类目
1554 1558 - `category_id`: 类目ID
1555   -- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段)
  1559 +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
1556 1560 - `tags`: 标签(keyword类型)
1557 1561 - `option1_name`, `option2_name`, `option3_name`: 选项名称
1558 1562 - `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解))
... ...
docs/搜索API速查表.md
... ... @@ -19,7 +19,7 @@ POST /search/
19 19 "filters": {
20 20 "category_name": "手机", // 单值
21 21 "category1_name": "服装", // 一级类目
22   - "vendor_zh.keyword": ["奇乐", "品牌A"], // 多值(OR)
  22 + "vendor.zh.keyword": ["奇乐", "品牌A"], // 多值(OR)
23 23 "tags": "手机", // 标签
24 24 // specifications 嵌套过滤
25 25 "specifications": {
... ...
docs/相关性检索优化说明.md
... ... @@ -20,7 +20,7 @@
20 20 {
21 21 "multi_match": {
22 22 "query": "戏水动物",
23   - "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
  23 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
24 24 "minimum_should_match": "67%",
25 25 "tie_breaker": 0.9,
26 26 "boost": 1,
... ... @@ -40,7 +40,7 @@
40 40 {
41 41 "multi_match": {
42 42 "_name": "base_query",
43   - "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
  43 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
44 44 "minimum_should_match": "75%",
45 45 "operator": "AND",
46 46 "query": "戏水动物",
... ... @@ -51,7 +51,7 @@
51 51 "multi_match": {
52 52 "_name": "base_query_trans_en",
53 53 "boost": 0.4,
54   - "fields": ["title_en^3.0", ...],
  54 + "fields": ["title.en^3.0", ...],
55 55 "minimum_should_match": "75%",
56 56 "operator": "AND",
57 57 "query": "water sports",
... ... @@ -61,7 +61,7 @@
61 61 {
62 62 "multi_match": {
63 63 "query": "戏水动物",
64   - "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
  64 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
65 65 "type": "phrase",
66 66 "slop": 2,
67 67 "boost": 1.0,
... ... @@ -71,7 +71,7 @@
71 71 {
72 72 "multi_match": {
73 73 "query": "戏水 动物",
74   - "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
  74 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
75 75 "operator": "AND",
76 76 "tie_breaker": 0.9,
77 77 "boost": 0.1,
... ...
docs/系统设计文档.md
... ... @@ -31,18 +31,18 @@
31 31 {
32 32 "tenant_id": "1",
33 33 "spu_id": "123",
34   - "title_zh": "蓝牙耳机",
35   - "title_en": "Bluetooth Headphones",
36   - "brief_zh": "高品质蓝牙耳机",
37   - "brief_en": "High-quality Bluetooth headphones",
  34 + "title.zh": "蓝牙耳机",
  35 + "title.en": "Bluetooth Headphones",
  36 + "brief.zh": "高品质蓝牙耳机",
  37 + "brief.en": "High-quality Bluetooth headphones",
38 38 "category_name": "电子产品",
39   - "category_path_zh": "电子产品/音频设备/耳机",
40   - "category_path_en": "Electronics/Audio/Headphones",
  39 + "category_path.zh": "电子产品/音频设备/耳机",
  40 + "category_path.en": "Electronics/Audio/Headphones",
41 41 "category1_name": "电子产品",
42 42 "category2_name": "音频设备",
43 43 "category3_name": "耳机",
44   - "vendor_zh": "品牌A",
45   - "vendor_en": "Brand A",
  44 + "vendor.zh": "品牌A",
  45 + "vendor.en": "Brand A",
46 46 "min_price": 199.99,
47 47 "max_price": 299.99,
48 48 "option1_name": "color",
... ... @@ -93,7 +93,7 @@
93 93 - **查询配置硬编码**:查询相关配置(字段 boost、查询域等)硬编码在 `search/query_config.py`
94 94  
95 95 **索引结构特点**:
96   -1. **多语言字段**:所有文本字段支持中英文(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`)
  96 +1. **多语言字段**:所有文本字段支持中英文(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`)
97 97 2. **嵌套字段**:
98 98 - `skus`: SKU 嵌套数组(包含价格、库存、选项值等)
99 99 - `specifications`: 规格嵌套数组(包含 name、value、sku_id)
... ... @@ -125,12 +125,12 @@
125 125 - `create_time`, `update_time` (date): 创建和更新时间
126 126  
127 127 #### 多语言文本字段
128   -- `title_zh/en` (text): 标题(中英文)
129   -- `brief_zh/en` (text): 短描述(中英文)
130   -- `description_zh/en` (text): 详细描述(中英文)
131   -- `vendor_zh/en` (text): 供应商/品牌(中英文)
132   -- `category_path_zh/en` (text): 类目路径(中英文)
133   -- `category_name_zh/en` (text): 类目名称(中英文)
  128 +- `title.zh/en` (text): 标题(中英文)
  129 +- `brief.zh/en` (text): 短描述(中英文)
  130 +- `description.zh/en` (text): 详细描述(中英文)
  131 +- `vendor.zh/en` (text): 供应商/品牌(中英文)
  132 +- `category_path.zh/en` (text): 类目路径(中英文)
  133 +- `category_name_text.zh/en` (text): 类目名称(中英文)
134 134  
135 135 **分析器配置**:
136 136 - 中文字段:`hanlp_index`(索引时)/ `hanlp_standard`(查询时)
... ... @@ -282,7 +282,7 @@ indexes:
282 282 2. **数据转换**(`indexer/spu_transformer.py`):
283 283 - 按`spu_id`和`tenant_id`关联SPU和SKU数据
284 284 - **多语言字段映射**:
285   - - MySQL的`title` → ES的`title_zh`(英文字段设为空)
  285 + - MySQL的`title` → ES的`title.zh`(英文字段设为空)
286 286 - 其他文本字段类似处理
287 287 - **分类字段映射**:
288 288 - 从SPU表的`category_path`解析多级类目(`category1_name`, `category2_name`, `category3_name`)
... ... @@ -483,7 +483,7 @@ laptop AND (gaming OR professional) ANDNOT cheap
483 483 - 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系
484 484 - 使用ES的`nested`查询实现
485 485 - **text_recall**: 文本相关性召回
486   - - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`)
  486 + - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`)
487 487 - 使用 `multi_match` 查询,支持字段 boost
488 488 - 中文字段使用中文分词器,英文字段使用英文分析器
489 489 - **embedding_recall**: 向量召回(KNN)
... ... @@ -503,8 +503,8 @@ laptop AND (gaming OR professional) ANDNOT cheap
503 503 "multi_match": {
504 504 "query": "手机",
505 505 "fields": [
506   - "title_zh^3.0", "title_en^3.0",
507   - "brief_zh^1.5", "brief_en^1.5",
  506 + "title.zh^3.0", "title.en^3.0",
  507 + "brief.zh^1.5", "brief.en^1.5",
508 508 ...
509 509 ]
510 510 }
... ...
docs/系统设计文档v1.md
... ... @@ -424,7 +424,7 @@ laptop AND (gaming OR professional) ANDNOT cheap
424 424 - **结构**: `filters AND (text_recall OR embedding_recall)`
425 425 - **filters**: 前端传递的过滤条件(永远起作用,放在 `filter` 中)
426 426 - **text_recall**: 文本相关性召回
427   - - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`)
  427 + - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`)
428 428 - 使用 `multi_match` 查询,支持字段 boost
429 429 - **embedding_recall**: 向量召回(KNN)
430 430 - 使用 `title_embedding` 字段进行 KNN 搜索
... ... @@ -443,8 +443,8 @@ laptop AND (gaming OR professional) ANDNOT cheap
443 443 "multi_match": {
444 444 "query": "手机",
445 445 "fields": [
446   - "title_zh^3.0", "title_en^3.0",
447   - "brief_zh^1.5", "brief_en^1.5",
  446 + "title.zh^3.0", "title.en^3.0",
  447 + "brief.zh^1.5", "brief.en^1.5",
448 448 ...
449 449 ]
450 450 }
... ...
docs/索引字段说明v1.md
... ... @@ -539,7 +539,7 @@ title brief description seo_title seo_description seo_keywords vendor vendor_key
539 539  
540 540 2. tenant - 数据灌入:
541 541 对每个tenant设置一一个语言,作为tenant的一个基本配置。
542   -写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title_en)
  542 +写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title.en)
543 543 查询的时候,将query转为商家所用语言,并到对应的field去查。
544 544  
545 545  
... ...
docs/索引字段说明v2-mapping结构.md
... ... @@ -16,22 +16,22 @@
16 16 },
17 17  
18 18 // 文本相关性相关字段
19   - "title_zh": {
  19 + "title.zh": {
20 20 "type": "text",
21 21 "analyzer": "hanlp_index",
22 22 "search_analyzer": "hanlp_standard"
23 23 },
24   - "brief_zh": {
  24 + "brief.zh": {
25 25 "type": "text",
26 26 "analyzer": "hanlp_index",
27 27 "search_analyzer": "hanlp_standard"
28 28 },
29   - "description_zh": {
  29 + "description.zh": {
30 30 "type": "text",
31 31 "analyzer": "hanlp_index",
32 32 "search_analyzer": "hanlp_standard"
33 33 },
34   - "vendor_zh": {
  34 + "vendor.zh": {
35 35 "type": "text",
36 36 "analyzer": "hanlp_index",
37 37 "search_analyzer": "hanlp_standard",
... ... @@ -43,23 +43,23 @@
43 43 }
44 44 },
45 45  
46   - "title_en": {
  46 + "title.en": {
47 47 "type": "text",
48 48 "analyzer": "english",
49 49 "search_analyzer": "english",
50 50 },
51   - "brief_en": {
  51 + "brief.en": {
52 52 "type": "text",
53 53 "analyzer": "english",
54 54 "search_analyzer": "english",
55 55  
56 56 },
57   - "description_en": {
  57 + "description.en": {
58 58 "type": "text",
59 59 "analyzer": "english",
60 60 "search_analyzer": "english",
61 61 },
62   - "vendor_en": {
  62 + "vendor.en": {
63 63 "type": "text",
64 64 "analyzer": "english",
65 65 "search_analyzer": "english",
... ... @@ -103,22 +103,22 @@
103 103 },
104 104  
105 105 // 分类相关
106   - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算
  106 + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
107 107 "type": "text",
108 108 "analyzer": "hanlp_index",
109 109 "search_analyzer": "hanlp_standard"
110 110 },
111   - "category_path_en": { // 提供模糊查询功能,辅助相关性计算
  111 + "category_path.en": { // 提供模糊查询功能,辅助相关性计算
112 112 "type": "text",
113 113 "analyzer": "english",
114 114 "search_analyzer": "english"
115 115 },
116   - "category_name_zh": { // 提供模糊查询功能,辅助相关性计算
  116 + "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
117 117 "type": "text",
118 118 "analyzer": "hanlp_index",
119 119 "search_analyzer": "hanlp_standard"
120 120 },
121   - "category_name_en": { // 提供模糊查询功能,辅助相关性计算
  121 + "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
122 122 "type": "text",
123 123 "analyzer": "english",
124 124 "search_analyzer": "english"
... ...
docs/索引字段说明v2-plan.md
... ... @@ -10,7 +10,7 @@
10 10  
11 11 #### 1.1 多语言文本字段
12 12  
13   -- 为文本字段添加中英文双字段支持(title_zh/title_en, brief_zh/brief_en, description_zh/description_en, vendor_zh/vendor_en)
  13 +- 为文本字段添加中英文双字段支持(title.zh/title.en, brief.zh/brief.en, description.zh/description.en, vendor.zh/vendor.en)
14 14 - 中文字段使用 `index_ansj`/`query_ansj` 分析器(对应文档中的hanlp_index/hanlp_standard)
15 15 - 英文字段使用 `english` 分析器
16 16 - **暂时只填充中文字段,英文字段设为空**(不需要语言检测,每个tenant的语言预先知道)
... ... @@ -28,22 +28,22 @@ category_path varchar(500)
28 28  
29 29 mapping:
30 30  
31   - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算
  31 + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
32 32 "type": "text",
33 33 "analyzer": "hanlp_index",
34 34 "search_analyzer": "hanlp_standard"
35 35 },
36   - "category_path_en": { // 提供模糊查询功能,辅助相关性计算
  36 + "category_path.en": { // 提供模糊查询功能,辅助相关性计算
37 37 "type": "text",
38 38 "analyzer": "english",
39 39 "search_analyzer": "english"
40 40 },
41   - "category_name_zh": { // 提供模糊查询功能,辅助相关性计算
  41 + "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
42 42 "type": "text",
43 43 "analyzer": "hanlp_index",
44 44 "search_analyzer": "hanlp_standard"
45 45 },
46   - "category_name_en": { // 提供模糊查询功能,辅助相关性计算
  46 + "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
47 47 "type": "text",
48 48 "analyzer": "english",
49 49 "search_analyzer": "english"
... ... @@ -104,15 +104,15 @@ mapping:
104 104  
105 105 #### 2.1 多语言文本处理
106 106  
107   -- **简化处理**:暂时只填充中文字段(title_zh, brief_zh, description_zh, vendor_zh)
108   -- 英文字段(title_en, brief_en, description_en, vendor_en)设为空或None
  107 +- **简化处理**:暂时只填充中文字段(title.zh, brief.zh, description.zh, vendor.zh)
  108 +- 英文字段(title.en, brief.en, description.en, vendor.en)设为空或None
109 109 - 不需要语言检测逻辑
110 110  
111 111 #### 2.2 分类路径解析
112 112  
113 113 - 从 `category_path` 字段按 "/" 分割提取分类层级
114 114 - 分割结果赋值给 `category1_name`, `category2_name`, `category3_name`
115   -- 生成 `category_path_zh`(暂时填充,`category_path_en` 设为空)
  115 +- 生成 `category_path.zh`(暂时填充,`category_path.en` 设为空)
116 116  
117 117 #### 2.3 SKU字段展开计算
118 118  
... ... @@ -152,7 +152,7 @@ mapping:
152 152  
153 153 **文件**: `indexer/spu_transformer.py`
154 154  
155   -- **简化多语言处理**:只填充中文字段(title_zh, brief_zh等),英文字段设为空或None
  155 +- **简化多语言处理**:只填充中文字段(title.zh, brief.zh等),英文字段设为空或None
156 156 - 实现分类路径的解析和展开
157 157 - 实现SKU字段的展开计算(价格、重量、库存)
158 158 - 实现选项字段的处理
... ...
docs/索引字段说明v2.md
... ... @@ -28,24 +28,24 @@
28 28  
29 29 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
30 30 |--------|--------|--------|------|----------|
31   -| `title_zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` |
32   -| `title_en` | text | english | 英文标题 | 暂为空(待翻译服务填充) |
  31 +| `title.zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` |
  32 +| `title.en` | text | english | 英文标题 | 暂为空(待翻译服务填充) |
33 33  
34 34 #### 2.2 描述字段
35 35  
36 36 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
37 37 |--------|--------|--------|------|----------|
38   -| `brief_zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` |
39   -| `brief_en` | text | english | 英文短描述 | 暂为空 |
40   -| `description_zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` |
41   -| `description_en` | text | english | 英文详细描述 | 暂为空 |
  38 +| `brief.zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` |
  39 +| `brief.en` | text | english | 英文短描述 | 暂为空 |
  40 +| `description.zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` |
  41 +| `description.en` | text | english | 英文详细描述 | 暂为空 |
42 42  
43 43 #### 2.3 供应商/品牌字段
44 44  
45 45 | 字段名 | ES类型 | 分析器 | 子字段 | 说明 | 数据来源 |
46 46 |--------|--------|--------|--------|------|----------|
47   -| `vendor_zh` | text | hanlp_index / hanlp_standard | `vendor_zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` |
48   -| `vendor_en` | text | english | `vendor_en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 |
  47 +| `vendor.zh` | text | hanlp_index / hanlp_standard | `vendor.zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` |
  48 +| `vendor.en` | text | english | `vendor.en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 |
49 49  
50 50 **用途**:
51 51 - `text` 类型:用于全文搜索(支持模糊匹配)
... ... @@ -65,15 +65,15 @@
65 65  
66 66 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
67 67 |--------|--------|--------|------|----------|
68   -| `category_path_zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` |
69   -| `category_path_en` | text | english | 英文类目路径 | 暂为空 |
  68 +| `category_path.zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` |
  69 +| `category_path.en` | text | english | 英文类目路径 | 暂为空 |
70 70  
71 71 #### 4.2 类目名称(用于搜索)
72 72  
73 73 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
74 74 |--------|--------|--------|------|----------|
75   -| `category_name_zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` |
76   -| `category_name_en` | text | english | 英文类目名称 | 暂为空 |
  75 +| `category_name_text.zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` |
  76 +| `category_name_text.en` | text | english | 英文类目名称 | 暂为空 |
77 77  
78 78 #### 4.3 类目标识(用于过滤和分面)
79 79  
... ... @@ -87,7 +87,7 @@
87 87 | `category3_name` | keyword | 三级类目名称 | 从 `category_path` 解析 |
88 88  
89 89 **用途**:
90   -- `category_path_zh/en`, `category_name_zh/en`: 用于全文搜索,支持模糊匹配
  90 +- `category_path.zh/en`, `category_name_text.zh/en`: 用于全文搜索,支持模糊匹配
91 91 - `category_id`, `category_name`, `category_level`, `category1/2/3_name`: 用于精确过滤和分面聚合
92 92  
93 93 ### 5. 规格字段(Specifications)
... ... @@ -348,20 +348,20 @@
348 348  
349 349 ### 搜索字段(参与相关性计算)
350 350  
351   -- `title_zh`, `title_en` (boost: 3.0)
352   -- `brief_zh`, `brief_en` (boost: 1.5)
353   -- `description_zh`, `description_en` (boost: 1.0)
354   -- `vendor_zh`, `vendor_en` (boost: 1.5)
  351 +- `title.zh`, `title.en` (boost: 3.0)
  352 +- `brief.zh`, `brief.en` (boost: 1.5)
  353 +- `description.zh`, `description.en` (boost: 1.0)
  354 +- `vendor.zh`, `vendor.en` (boost: 1.5)
355 355 - `tags` (boost: 1.0)
356   -- `category_path_zh`, `category_path_en` (boost: 1.5)
357   -- `category_name_zh`, `category_name_en` (boost: 1.5)
  356 +- `category_path.zh`, `category_path.en` (boost: 1.5)
  357 +- `category_name_text.zh`, `category_name_text.en` (boost: 1.5)
358 358 - `title_embedding` (向量召回,boost: 0.2)
359 359  
360 360 ### 过滤字段(精确匹配)
361 361  
362 362 - `tenant_id` (必需,多租户隔离)
363 363 - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name`
364   -- `vendor_zh.keyword`, `vendor_en.keyword`
  364 +- `vendor.zh.keyword`, `vendor.en.keyword`
365 365 - `specifications` (嵌套查询)
366 366 - `min_price`, `max_price` (范围过滤)
367 367 - `sales` (范围过滤)
... ... @@ -395,12 +395,12 @@
395 395 - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
396 396  
397 397 映射到前端字段:
398   -- `title_zh/en` → `title`
399   -- `brief_zh/en` → `brief`
400   -- `description_zh/en` → `description`
401   -- `vendor_zh/en` → `vendor`
402   -- `category_path_zh/en` → `category_path`
403   -- `category_name_zh/en` → `category_name`
  398 +- `title.zh/en` → `title`
  399 +- `brief.zh/en` → `brief`
  400 +- `description.zh/en` → `description`
  401 +- `vendor.zh/en` → `vendor`
  402 +- `category_path.zh/en` → `category_path`
  403 +- `category_name_text.zh/en` → `category_name`
404 404  
405 405 ### 规格数据构建
406 406  
... ... @@ -434,12 +434,12 @@ filters AND (text_recall OR embedding_recall)
434 434 ### 文本召回字段
435 435  
436 436 根据查询词的语言选择对应的索引字段:
437   -- `title_zh^3.0`, `title_en^3.0`
438   -- `brief_zh^1.5`, `brief_en^1.5`
439   -- `description_zh^1.0`, `description_en^1.0`
440   -- `vendor_zh^1.5`, `vendor_en^1.5`
441   -- `category_path_zh^1.5`, `category_path_en^1.5`
442   -- `category_name_zh^1.5`, `category_name_en^1.5`
  437 +- `title.zh^3.0`, `title.en^3.0`
  438 +- `brief.zh^1.5`, `brief.en^1.5`
  439 +- `description.zh^1.0`, `description.en^1.0`
  440 +- `vendor.zh^1.5`, `vendor.en^1.5`
  441 +- `category_path.zh^1.5`, `category_path.en^1.5`
  442 +- `category_name_text.zh^1.5`, `category_name_text.en^1.5`
443 443 - `tags^1.0`
444 444  
445 445 ## 注意事项
... ...
docs/索引数据接口文档___old.md
... ... @@ -52,12 +52,12 @@ tenant_config:
52 52 ### 配置规则
53 53  
54 54 1. **主语言**:指定SKU表中 `title`、`brief`、`description`、`vendor` 等字段的语言。
55   - - 如果主语言是 `zh`,这些字段的值会填充到 `title_zh`、`brief_zh` 等字段
56   - - 如果主语言是 `en`,这些字段的值会填充到 `title_en`、`brief_en` 等字段
  55 + - 如果主语言是 `zh`,这些字段的值会填充到 `title.zh`、`brief.zh` 等字段
  56 + - 如果主语言是 `en`,这些字段的值会填充到 `title.en`、`brief.en` 等字段
57 57  
58 58 2. **翻译配置**:
59   - - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title_en` 等字段
60   - - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title_zh` 等字段
  59 + - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title.en` 等字段
  60 + - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title.zh` 等字段
61 61 - **注意**:如果主语言本身就是目标语言,则不会触发翻译(例如主语言是英文,`translate_to_en: true` 不会触发翻译)
62 62  
63 63 3. **默认配置**:如果租户ID不在 `tenants` 中,则使用 `default` 配置。
... ... @@ -72,8 +72,8 @@ tenant_config:
72 72 "translate_to_zh": false
73 73 }
74 74 ```
75   -- SKU表的 `title` 字段(中文)→ `title_zh`
76   -- 翻译服务将中文翻译为英文 → `title_en`
  75 +- SKU表的 `title` 字段(中文)→ `title.zh`
  76 +- 翻译服务将中文翻译为英文 → `title.en`
77 77  
78 78 **示例2:英文主语言,需要翻译中文**
79 79 ```json
... ... @@ -83,8 +83,8 @@ tenant_config:
83 83 "translate_to_zh": true
84 84 }
85 85 ```
86   -- SKU表的 `title` 字段(英文)→ `title_en`
87   -- 翻译服务将英文翻译为中文 → `title_zh`
  86 +- SKU表的 `title` 字段(英文)→ `title.en`
  87 +- 翻译服务将英文翻译为中文 → `title.zh`
88 88  
89 89 **示例3:仅使用主语言,不翻译**
90 90 ```json
... ... @@ -94,8 +94,8 @@ tenant_config:
94 94 "translate_to_zh": false
95 95 }
96 96 ```
97   -- SKU表的 `title` 字段(中文)→ `title_zh`
98   -- `title_en` 保持为 `null`
  97 +- SKU表的 `title` 字段(中文)→ `title.zh`
  98 +- `title.en` 保持为 `null`
99 99  
100 100 ### 配置更新
101 101  
... ... @@ -272,19 +272,19 @@ String json = response.body().string();
272 272 {
273 273 "tenant_id": "1",
274 274 "spu_id": "123",
275   - "title_zh": "商品标题",
276   - "title_en": null,
277   - "brief_zh": "商品简介",
278   - "brief_en": null,
279   - "description_zh": "商品详细描述",
280   - "description_en": null,
281   - "vendor_zh": "供应商名称",
282   - "vendor_en": null,
  275 + "title.zh": "商品标题",
  276 + "title.en": null,
  277 + "brief.zh": "商品简介",
  278 + "brief.en": null,
  279 + "description.zh": "商品详细描述",
  280 + "description.en": null,
  281 + "vendor.zh": "供应商名称",
  282 + "vendor.en": null,
283 283 "tags": ["标签1", "标签2"],
284   - "category_path_zh": "类目1/类目2/类目3",
285   - "category_path_en": null,
286   - "category_name_zh": "类目名称",
287   - "category_name_en": null,
  284 + "category_path.zh": "类目1/类目2/类目3",
  285 + "category_path.en": null,
  286 + "category_name_text.zh": "类目名称",
  287 + "category_name_text.en": null,
288 288 "category_id": "100",
289 289 "category_name": "类目名称",
290 290 "category_level": 3,
... ... @@ -453,11 +453,11 @@ for (String spuId : changedSpuIds) {
453 453 |---------|--------|------|------|
454 454 | 基础标识 | `tenant_id` | keyword | 租户ID |
455 455 | 基础标识 | `spu_id` | keyword | SPU ID |
456   -| 文本字段 | `title_zh`, `title_en` | text | 标题(中英文) |
457   -| 文本字段 | `brief_zh`, `brief_en` | text | 简介(中英文) |
458   -| 文本字段 | `description_zh`, `description_en` | text | 描述(中英文) |
459   -| 文本字段 | `vendor_zh`, `vendor_en` | text | 供应商(中英文) |
460   -| 类目字段 | `category_path_zh`, `category_path_en` | text | 类目路径(中英文) |
  456 +| 文本字段 | `title.zh`, `title.en` | text | 标题(中英文) |
  457 +| 文本字段 | `brief.zh`, `brief.en` | text | 简介(中英文) |
  458 +| 文本字段 | `description.zh`, `description.en` | text | 描述(中英文) |
  459 +| 文本字段 | `vendor.zh`, `vendor.en` | text | 供应商(中英文) |
  460 +| 类目字段 | `category_path.zh`, `category_path.en` | text | 类目路径(中英文) |
461 461 | 类目字段 | `category1_name`, `category2_name`, `category3_name` | keyword | 分层类目名称 |
462 462 | 价格字段 | `min_price`, `max_price` | float | 价格范围 |
463 463 | 库存字段 | `total_inventory` | long | 总库存 |
... ...
docs/索引方案.md
... ... @@ -178,12 +178,12 @@ category_path varchar(500)
178 178 方案:采用方案2
179 179 4. categoryPath索引 + Prefix 查询(categoryPath.keyword: "服装/男装")(如果满足条件的key太多的则性能较差,比如 查询的是一级类目,类目树叶子节点太多时性能较差)
180 180 5. categoryPath支撑模糊查询 和 多级cate keyword索引支撑精确查询。 索引阶段冗余,查询性能高。
181   - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算
  181 + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
182 182 "type": "text",
183 183 "analyzer": "hanlp_index",
184 184 "search_analyzer": "hanlp_standard"
185 185 },
186   - "category_path_en": { // 提供模糊查询功能,辅助相关性计算
  186 + "category_path.en": { // 提供模糊查询功能,辅助相关性计算
187 187 "type": "text",
188 188 "analyzer": "english",
189 189 "search_analyzer": "english"
... ...
docs/翻译功能测试说明.md
... ... @@ -22,8 +22,8 @@
22 22 ```yaml
23 23 translation_prompts:
24 24 # 商品标题翻译提示词
25   - product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
26   - product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
  25 + product_title.zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
  26 + product_title.en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
27 27 # query翻译提示词
28 28 query_zh: "电商领域"
29 29 query_en: "e-commerce domain"
... ... @@ -35,8 +35,8 @@ translation_prompts:
35 35 ### 提示词使用规则
36 36  
37 37 1. **商品标题翻译**:
38   - - 中文→英文:使用 `product_title_en`
39   - - 英文→中文:使用 `product_title_zh`
  38 + - 中文→英文:使用 `product_title.en`
  39 + - 英文→中文:使用 `product_title.zh`
40 40  
41 41 2. **其他字段翻译**(brief, description, vendor):
42 42 - 根据目标语言选择 `default_zh` 或 `default_en`
... ... @@ -72,7 +72,7 @@ translator = Translator(
72 72  
73 73 # 测试商品标题翻译
74 74 text = "蓝牙耳机"
75   -prompt = config.query_config.translation_prompts.get('product_title_en')
  75 +prompt = config.query_config.translation_prompts.get('product_title.en')
76 76 result = translator.translate(
77 77 text,
78 78 target_lang='en',
... ... @@ -140,8 +140,8 @@ doc = transformer.transform_spu_to_doc(
140 140 options=pd.DataFrame()
141 141 )
142 142  
143   -print(f"title_zh: {doc.get('title_zh')}")
144   -print(f"title_en: {doc.get('title_en')}") # 应该包含翻译结果
  143 +print(f"title.zh: {doc.get('title.zh')}")
  144 +print(f"title.en: {doc.get('title.en')}") # 应该包含翻译结果
145 145 ```
146 146  
147 147 ### 5. 测试缓存功能
... ...
indexer/document_transformer.py
... ... @@ -184,17 +184,40 @@ class SPUDocumentTransformer:
184 184 translate_to_en = bool(self.tenant_config.get('translate_to_en'))
185 185 translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))
186 186  
  187 + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None):
  188 + """
  189 + Write multilingual text field as an object, e.g.:
  190 + doc[field_name] = {"zh": "...", "en": "..."}
  191 + Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh.
  192 + """
  193 + if not source_text or not str(source_text).strip():
  194 + return
  195 +
  196 + obj: Dict[str, str] = {}
  197 + src = str(source_text)
  198 + obj[primary_lang] = src
  199 +
  200 + tr = translations or {}
  201 + if translate_to_en and primary_lang != "en":
  202 + en_text = tr.get("en")
  203 + if en_text and str(en_text).strip():
  204 + obj["en"] = str(en_text)
  205 + if translate_to_zh and primary_lang != "zh":
  206 + zh_text = tr.get("zh")
  207 + if zh_text and str(zh_text).strip():
  208 + obj["zh"] = str(zh_text)
  209 +
  210 + if obj:
  211 + doc[field_name] = obj
  212 +
187 213 # Title
188 214 if pd.notna(spu_row.get('title')):
189 215 title_text = str(spu_row['title'])
190   -
191   - # 使用translator的translate_for_indexing方法,自动处理多语言翻译
  216 +
  217 + translations: Dict[str, str] = {}
192 218 if self.translator:
193   - # 根据目标语言选择对应的提示词
194 219 prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh')
195 220 prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en')
196   -
197   - # 调用translate_for_indexing,自动处理翻译逻辑
198 221 translations = self.translator.translate_for_indexing(
199 222 title_text,
200 223 shop_language=primary_lang,
... ... @@ -202,26 +225,14 @@ class SPUDocumentTransformer:
202 225 prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
203 226 translate_to_en=translate_to_en,
204 227 translate_to_zh=translate_to_zh,
205   - )
206   -
207   - # 填充翻译结果
208   - doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)
209   - doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None)
210   - else:
211   - # 无翻译器,只填充主语言字段
212   - if primary_lang == 'zh':
213   - doc['title_zh'] = title_text
214   - doc['title_en'] = None
215   - else:
216   - doc['title_zh'] = None
217   - doc['title_en'] = title_text
218   - else:
219   - doc['title_zh'] = None
220   - doc['title_en'] = None
  228 + ) or {}
  229 +
  230 + _set_lang_obj("title", title_text, translations)
221 231  
222 232 # Brief
223 233 if pd.notna(spu_row.get('brief')):
224 234 brief_text = str(spu_row['brief'])
  235 + translations: Dict[str, str] = {}
225 236 if self.translator:
226 237 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
227 238 translations = self.translator.translate_for_indexing(
... ... @@ -231,23 +242,13 @@ class SPUDocumentTransformer:
231 242 prompt=prompt,
232 243 translate_to_en=translate_to_en,
233 244 translate_to_zh=translate_to_zh,
234   - )
235   - doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None)
236   - doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None)
237   - else:
238   - if primary_lang == 'zh':
239   - doc['brief_zh'] = brief_text
240   - doc['brief_en'] = None
241   - else:
242   - doc['brief_zh'] = None
243   - doc['brief_en'] = brief_text
244   - else:
245   - doc['brief_zh'] = None
246   - doc['brief_en'] = None
  245 + ) or {}
  246 + _set_lang_obj("brief", brief_text, translations)
247 247  
248 248 # Description
249 249 if pd.notna(spu_row.get('description')):
250 250 desc_text = str(spu_row['description'])
  251 + translations: Dict[str, str] = {}
251 252 if self.translator:
252 253 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
253 254 translations = self.translator.translate_for_indexing(
... ... @@ -257,23 +258,13 @@ class SPUDocumentTransformer:
257 258 prompt=prompt,
258 259 translate_to_en=translate_to_en,
259 260 translate_to_zh=translate_to_zh,
260   - )
261   - doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None)
262   - doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None)
263   - else:
264   - if primary_lang == 'zh':
265   - doc['description_zh'] = desc_text
266   - doc['description_en'] = None
267   - else:
268   - doc['description_zh'] = None
269   - doc['description_en'] = desc_text
270   - else:
271   - doc['description_zh'] = None
272   - doc['description_en'] = None
  261 + ) or {}
  262 + _set_lang_obj("description", desc_text, translations)
273 263  
274 264 # Vendor
275 265 if pd.notna(spu_row.get('vendor')):
276 266 vendor_text = str(spu_row['vendor'])
  267 + translations: Dict[str, str] = {}
277 268 if self.translator:
278 269 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
279 270 translations = self.translator.translate_for_indexing(
... ... @@ -283,19 +274,8 @@ class SPUDocumentTransformer:
283 274 prompt=prompt,
284 275 translate_to_en=translate_to_en,
285 276 translate_to_zh=translate_to_zh,
286   - )
287   - doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None)
288   - doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None)
289   - else:
290   - if primary_lang == 'zh':
291   - doc['vendor_zh'] = vendor_text
292   - doc['vendor_en'] = None
293   - else:
294   - doc['vendor_zh'] = None
295   - doc['vendor_en'] = vendor_text
296   - else:
297   - doc['vendor_zh'] = None
298   - doc['vendor_en'] = None
  277 + ) or {}
  278 + _set_lang_obj("vendor", vendor_text, translations)
299 279  
300 280 def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series):
301 281 """填充类目相关字段。"""
... ... @@ -303,6 +283,8 @@ class SPUDocumentTransformer:
303 283 # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目)
304 284 # - 仅记录错误日志,不阻塞索引流程
305 285  
  286 + primary_lang = self.tenant_config.get('primary_language', 'zh')
  287 +
306 288 if pd.notna(spu_row.get('category_path')):
307 289 category_path = str(spu_row['category_path'])
308 290  
... ... @@ -329,8 +311,7 @@ class SPUDocumentTransformer:
329 311 # 构建类目路径字符串(用于搜索)
330 312 if category_names:
331 313 category_path_str = '/'.join(category_names)
332   - doc['category_path_zh'] = category_path_str
333   - doc['category_path_en'] = None # 暂时设为空
  314 + doc['category_path'] = {primary_lang: category_path_str}
334 315  
335 316 # 填充分层类目名称
336 317 if len(category_names) > 0:
... ... @@ -342,8 +323,7 @@ class SPUDocumentTransformer:
342 323 elif pd.notna(spu_row.get('category')):
343 324 # 如果category_path为空,使用category字段作为category1_name的备选
344 325 category = str(spu_row['category'])
345   - doc['category_name_zh'] = category
346   - doc['category_name_en'] = None
  326 + doc['category_name_text'] = {primary_lang: category}
347 327 doc['category_name'] = category
348 328  
349 329 # 尝试从category字段解析多级分类
... ... @@ -362,10 +342,8 @@ class SPUDocumentTransformer:
362 342 if pd.notna(spu_row.get('category')):
363 343 # 确保category相关字段都被设置(如果前面没有设置)
364 344 category_name = str(spu_row['category'])
365   - if 'category_name_zh' not in doc:
366   - doc['category_name_zh'] = category_name
367   - if 'category_name_en' not in doc:
368   - doc['category_name_en'] = None
  345 + if 'category_name_text' not in doc:
  346 + doc['category_name_text'] = {primary_lang: category_name}
369 347 if 'category_name' not in doc:
370 348 doc['category_name'] = category_name
371 349  
... ... @@ -587,13 +565,22 @@ class SPUDocumentTransformer:
587 565 """
588 566 填充标题向量化字段。
589 567  
590   - 使用英文标题(title_en)生成embedding。如果title_en不存在,则使用title_zh。
  568 + 使用英文标题(title.en)生成embedding。如果title.en不存在,则使用title.zh。
591 569  
592 570 Args:
593 571 doc: ES文档字典
594 572 """
595   - # 优先使用英文标题,如果没有则使用中文标题
596   - title_text = doc.get('title_en') or doc.get('title_zh')
  573 + # 优先使用英文标题,如果没有则使用中文标题;再没有则取任意可用语言
  574 + title_obj = doc.get("title") or {}
  575 + if isinstance(title_obj, dict):
  576 + title_text = title_obj.get("en") or title_obj.get("zh")
  577 + if not title_text:
  578 + for v in title_obj.values():
  579 + if v and str(v).strip():
  580 + title_text = str(v)
  581 + break
  582 + else:
  583 + title_text = None
597 584  
598 585 if not title_text or not title_text.strip():
599 586 logger.debug(f"No title text available for embedding, SPU: {doc.get('spu_id')}")
... ...
indexer/incremental_service.py
... ... @@ -130,7 +130,15 @@ class IncrementalIndexerService:
130 130  
131 131 # 单条场景下也可补齐 embedding(仍走缓存)
132 132 if enable_embedding and encoder:
133   - title_text = doc.get("title_en") or doc.get("title_zh")
  133 + title_obj = doc.get("title") or {}
  134 + title_text = None
  135 + if isinstance(title_obj, dict):
  136 + title_text = title_obj.get("en") or title_obj.get("zh")
  137 + if not title_text:
  138 + for v in title_obj.values():
  139 + if v and str(v).strip():
  140 + title_text = str(v)
  141 + break
134 142 if title_text and str(title_text).strip():
135 143 try:
136 144 embeddings = encoder.encode(title_text)
... ... @@ -560,7 +568,15 @@ class IncrementalIndexerService:
560 568 title_texts: List[str] = []
561 569 title_doc_indices: List[int] = []
562 570 for i, (_, doc) in enumerate(documents):
563   - title_text = doc.get("title_en") or doc.get("title_zh")
  571 + title_obj = doc.get("title") or {}
  572 + title_text = None
  573 + if isinstance(title_obj, dict):
  574 + title_text = title_obj.get("en") or title_obj.get("zh")
  575 + if not title_text:
  576 + for v in title_obj.values():
  577 + if v and str(v).strip():
  578 + title_text = str(v)
  579 + break
564 580 if title_text and str(title_text).strip():
565 581 title_texts.append(str(title_text))
566 582 title_doc_indices.append(i)
... ...
indexer/test_indexing.py
... ... @@ -114,16 +114,17 @@ def test_full_indexing(tenant_id: str = &quot;162&quot;):
114 114 print(f"\n文档 {i+1}:")
115 115 print(f" SPU ID: {doc.get('spu_id')}")
116 116 print(f" Tenant ID: {doc.get('tenant_id')}")
117   - print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}")
118   - print(f" 标题 (英文): {doc.get('title_en', 'N/A')}")
  117 + title_obj = doc.get("title") or {}
  118 + print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
  119 + print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
119 120  
120 121 # 检查租户162的翻译状态
121 122 if tenant_id == "162":
122   - # 租户162翻译应该关闭,title_en应该为None
123   - if doc.get('title_en') is None:
124   - print(f" ✓ 翻译已关闭(title_en为None)")
  123 + # 租户162翻译应该关闭:只写入主语言,不应出现 title.en
  124 + if isinstance(title_obj, dict) and title_obj.get("en") is None:
  125 + print(f" ✓ 翻译已关闭(title.en为空)")
125 126 else:
126   - print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}")
  127 + print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
127 128  
128 129 return True
129 130  
... ... @@ -192,17 +193,18 @@ def test_incremental_indexing(tenant_id: str = &quot;162&quot;):
192 193 print(f"✓ SPU文档获取成功")
193 194 print(f" SPU ID: {doc.get('spu_id')}")
194 195 print(f" Tenant ID: {doc.get('tenant_id')}")
195   - print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}")
196   - print(f" 标题 (英文): {doc.get('title_en', 'N/A')}")
  196 + title_obj = doc.get("title") or {}
  197 + print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
  198 + print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
197 199 print(f" SKU数量: {len(doc.get('skus', []))}")
198 200 print(f" 规格数量: {len(doc.get('specifications', []))}")
199 201  
200 202 # 检查租户162的翻译状态
201 203 if tenant_id == "162":
202   - if doc.get('title_en') is None:
203   - print(f" ✓ 翻译已关闭(title_en为None)")
  204 + if isinstance(title_obj, dict) and title_obj.get("en") is None:
  205 + print(f" ✓ 翻译已关闭(title.en为空)")
204 206 else:
205   - print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}")
  207 + print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
206 208  
207 209 return True
208 210  
... ... @@ -291,12 +293,13 @@ def test_document_transformer():
291 293  
292 294 if doc:
293 295 print(f"✓ 文档转换成功")
294   - print(f" title_zh: {doc.get('title_zh')}")
295   - print(f" title_en: {doc.get('title_en')}")
  296 + title_obj = doc.get("title") or {}
  297 + print(f" title.zh: {title_obj.get('zh') if isinstance(title_obj, dict) else None}")
  298 + print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
296 299 print(f" SKU数量: {len(doc.get('skus', []))}")
297 300  
298 301 # 验证租户162翻译关闭
299   - if doc.get('title_en') is None:
  302 + if isinstance(title_obj, dict) and title_obj.get("en") is None:
300 303 print(f" ✓ 翻译已关闭(符合租户162配置)")
301 304 else:
302 305 print(f" ⚠ 警告:翻译应该关闭")
... ...
mappings/search_products.json
... ... @@ -4,22 +4,12 @@
4 4 "number_of_replicas": 0,
5 5 "refresh_interval": "30s",
6 6 "analysis": {
7   - "analyzer": {
8   - "hanlp_index": {
9   - "type": "custom",
10   - "tokenizer": "standard",
11   - "filter": ["lowercase", "asciifolding"]
12   - },
13   - "hanlp_standard": {
14   - "type": "custom",
15   - "tokenizer": "standard",
16   - "filter": ["lowercase", "asciifolding"]
17   - }
18   - },
19 7 "normalizer": {
20 8 "lowercase": {
21 9 "type": "custom",
22   - "filter": ["lowercase"]
  10 + "filter": [
  11 + "lowercase"
  12 + ]
23 13 }
24 14 }
25 15 },
... ... @@ -45,75 +35,963 @@
45 35 "update_time": {
46 36 "type": "date"
47 37 },
48   - "title_zh": {
49   - "type": "text",
50   - "analyzer": "hanlp_index",
51   - "search_analyzer": "hanlp_standard"
52   - },
53   - "qanchors_zh": {
54   - "type": "text",
55   - "analyzer": "hanlp_index",
56   - "search_analyzer": "hanlp_standard"
57   - },
58   - "keywords_zh": {
59   - "type": "text",
60   - "analyzer": "hanlp_index",
61   - "search_analyzer": "hanlp_standard"
62   - },
63   - "brief_zh": {
64   - "type": "text",
65   - "analyzer": "hanlp_index",
66   - "search_analyzer": "hanlp_standard"
67   - },
68   - "description_zh": {
69   - "type": "text",
70   - "analyzer": "hanlp_index",
71   - "search_analyzer": "hanlp_standard"
72   - },
73   - "vendor_zh": {
74   - "type": "text",
75   - "analyzer": "hanlp_index",
76   - "search_analyzer": "hanlp_standard",
77   - "fields": {
78   - "keyword": {
79   - "type": "keyword",
80   - "normalizer": "lowercase"
  38 + "title": {
  39 + "type": "object",
  40 + "properties": {
  41 + "ar": {
  42 + "type": "text",
  43 + "analyzer": "arabic"
  44 + },
  45 + "hy": {
  46 + "type": "text",
  47 + "analyzer": "armenian"
  48 + },
  49 + "eu": {
  50 + "type": "text",
  51 + "analyzer": "basque"
  52 + },
  53 + "pt_br": {
  54 + "type": "text",
  55 + "analyzer": "brazilian"
  56 + },
  57 + "bg": {
  58 + "type": "text",
  59 + "analyzer": "bulgarian"
  60 + },
  61 + "ca": {
  62 + "type": "text",
  63 + "analyzer": "catalan"
  64 + },
  65 + "zh": {
  66 + "type": "text",
  67 + "analyzer": "icu_analyzer"
  68 + },
  69 + "chinese": {
  70 + "type": "text",
  71 + "analyzer": "chinese"
  72 + },
  73 + "cjk": {
  74 + "type": "text",
  75 + "analyzer": "cjk"
  76 + },
  77 + "cs": {
  78 + "type": "text",
  79 + "analyzer": "czech"
  80 + },
  81 + "da": {
  82 + "type": "text",
  83 + "analyzer": "danish"
  84 + },
  85 + "nl": {
  86 + "type": "text",
  87 + "analyzer": "dutch"
  88 + },
  89 + "en": {
  90 + "type": "text",
  91 + "analyzer": "english"
  92 + },
  93 + "fi": {
  94 + "type": "text",
  95 + "analyzer": "finnish"
  96 + },
  97 + "fr": {
  98 + "type": "text",
  99 + "analyzer": "french"
  100 + },
  101 + "gl": {
  102 + "type": "text",
  103 + "analyzer": "galician"
  104 + },
  105 + "de": {
  106 + "type": "text",
  107 + "analyzer": "german"
  108 + },
  109 + "el": {
  110 + "type": "text",
  111 + "analyzer": "greek"
  112 + },
  113 + "hi": {
  114 + "type": "text",
  115 + "analyzer": "hindi"
  116 + },
  117 + "hu": {
  118 + "type": "text",
  119 + "analyzer": "hungarian"
  120 + },
  121 + "id": {
  122 + "type": "text",
  123 + "analyzer": "indonesian"
  124 + },
  125 + "it": {
  126 + "type": "text",
  127 + "analyzer": "italian"
  128 + },
  129 + "no": {
  130 + "type": "text",
  131 + "analyzer": "norwegian"
  132 + },
  133 + "fa": {
  134 + "type": "text",
  135 + "analyzer": "persian"
  136 + },
  137 + "pt": {
  138 + "type": "text",
  139 + "analyzer": "portuguese"
  140 + },
  141 + "ro": {
  142 + "type": "text",
  143 + "analyzer": "romanian"
  144 + },
  145 + "ru": {
  146 + "type": "text",
  147 + "analyzer": "russian"
  148 + },
  149 + "es": {
  150 + "type": "text",
  151 + "analyzer": "spanish"
  152 + },
  153 + "sv": {
  154 + "type": "text",
  155 + "analyzer": "swedish"
  156 + },
  157 + "tr": {
  158 + "type": "text",
  159 + "analyzer": "turkish"
  160 + },
  161 + "th": {
  162 + "type": "text",
  163 + "analyzer": "thai"
81 164 }
82 165 }
83 166 },
84   - "title_en": {
85   - "type": "text",
86   - "analyzer": "english",
87   - "search_analyzer": "english"
88   - },
89   - "qanchors_en": {
90   - "type": "text",
91   - "analyzer": "english",
92   - "search_analyzer": "english"
93   - },
94   - "keywords_en": {
95   - "type": "text",
96   - "analyzer": "english",
97   - "search_analyzer": "english"
98   - },
99   - "brief_en": {
100   - "type": "text",
101   - "analyzer": "english",
102   - "search_analyzer": "english"
103   - },
104   - "description_en": {
105   - "type": "text",
106   - "analyzer": "english",
107   - "search_analyzer": "english"
108   - },
109   - "vendor_en": {
110   - "type": "text",
111   - "analyzer": "english",
112   - "search_analyzer": "english",
113   - "fields": {
114   - "keyword": {
115   - "type": "keyword",
116   - "normalizer": "lowercase"
  167 + "qanchors": {
  168 + "type": "object",
  169 + "properties": {
  170 + "ar": {
  171 + "type": "text",
  172 + "analyzer": "arabic"
  173 + },
  174 + "hy": {
  175 + "type": "text",
  176 + "analyzer": "armenian"
  177 + },
  178 + "eu": {
  179 + "type": "text",
  180 + "analyzer": "basque"
  181 + },
  182 + "pt_br": {
  183 + "type": "text",
  184 + "analyzer": "brazilian"
  185 + },
  186 + "bg": {
  187 + "type": "text",
  188 + "analyzer": "bulgarian"
  189 + },
  190 + "ca": {
  191 + "type": "text",
  192 + "analyzer": "catalan"
  193 + },
  194 + "zh": {
  195 + "type": "text",
  196 + "analyzer": "icu_analyzer"
  197 + },
  198 + "chinese": {
  199 + "type": "text",
  200 + "analyzer": "chinese"
  201 + },
  202 + "cjk": {
  203 + "type": "text",
  204 + "analyzer": "cjk"
  205 + },
  206 + "cs": {
  207 + "type": "text",
  208 + "analyzer": "czech"
  209 + },
  210 + "da": {
  211 + "type": "text",
  212 + "analyzer": "danish"
  213 + },
  214 + "nl": {
  215 + "type": "text",
  216 + "analyzer": "dutch"
  217 + },
  218 + "en": {
  219 + "type": "text",
  220 + "analyzer": "english"
  221 + },
  222 + "fi": {
  223 + "type": "text",
  224 + "analyzer": "finnish"
  225 + },
  226 + "fr": {
  227 + "type": "text",
  228 + "analyzer": "french"
  229 + },
  230 + "gl": {
  231 + "type": "text",
  232 + "analyzer": "galician"
  233 + },
  234 + "de": {
  235 + "type": "text",
  236 + "analyzer": "german"
  237 + },
  238 + "el": {
  239 + "type": "text",
  240 + "analyzer": "greek"
  241 + },
  242 + "hi": {
  243 + "type": "text",
  244 + "analyzer": "hindi"
  245 + },
  246 + "hu": {
  247 + "type": "text",
  248 + "analyzer": "hungarian"
  249 + },
  250 + "id": {
  251 + "type": "text",
  252 + "analyzer": "indonesian"
  253 + },
  254 + "it": {
  255 + "type": "text",
  256 + "analyzer": "italian"
  257 + },
  258 + "no": {
  259 + "type": "text",
  260 + "analyzer": "norwegian"
  261 + },
  262 + "fa": {
  263 + "type": "text",
  264 + "analyzer": "persian"
  265 + },
  266 + "pt": {
  267 + "type": "text",
  268 + "analyzer": "portuguese"
  269 + },
  270 + "ro": {
  271 + "type": "text",
  272 + "analyzer": "romanian"
  273 + },
  274 + "ru": {
  275 + "type": "text",
  276 + "analyzer": "russian"
  277 + },
  278 + "es": {
  279 + "type": "text",
  280 + "analyzer": "spanish"
  281 + },
  282 + "sv": {
  283 + "type": "text",
  284 + "analyzer": "swedish"
  285 + },
  286 + "tr": {
  287 + "type": "text",
  288 + "analyzer": "turkish"
  289 + },
  290 + "th": {
  291 + "type": "text",
  292 + "analyzer": "thai"
  293 + }
  294 + }
  295 + },
  296 + "keywords": {
  297 + "type": "object",
  298 + "properties": {
  299 + "ar": {
  300 + "type": "text",
  301 + "analyzer": "arabic"
  302 + },
  303 + "hy": {
  304 + "type": "text",
  305 + "analyzer": "armenian"
  306 + },
  307 + "eu": {
  308 + "type": "text",
  309 + "analyzer": "basque"
  310 + },
  311 + "pt_br": {
  312 + "type": "text",
  313 + "analyzer": "brazilian"
  314 + },
  315 + "bg": {
  316 + "type": "text",
  317 + "analyzer": "bulgarian"
  318 + },
  319 + "ca": {
  320 + "type": "text",
  321 + "analyzer": "catalan"
  322 + },
  323 + "zh": {
  324 + "type": "text",
  325 + "analyzer": "icu_analyzer"
  326 + },
  327 + "chinese": {
  328 + "type": "text",
  329 + "analyzer": "chinese"
  330 + },
  331 + "cjk": {
  332 + "type": "text",
  333 + "analyzer": "cjk"
  334 + },
  335 + "cs": {
  336 + "type": "text",
  337 + "analyzer": "czech"
  338 + },
  339 + "da": {
  340 + "type": "text",
  341 + "analyzer": "danish"
  342 + },
  343 + "nl": {
  344 + "type": "text",
  345 + "analyzer": "dutch"
  346 + },
  347 + "en": {
  348 + "type": "text",
  349 + "analyzer": "english"
  350 + },
  351 + "fi": {
  352 + "type": "text",
  353 + "analyzer": "finnish"
  354 + },
  355 + "fr": {
  356 + "type": "text",
  357 + "analyzer": "french"
  358 + },
  359 + "gl": {
  360 + "type": "text",
  361 + "analyzer": "galician"
  362 + },
  363 + "de": {
  364 + "type": "text",
  365 + "analyzer": "german"
  366 + },
  367 + "el": {
  368 + "type": "text",
  369 + "analyzer": "greek"
  370 + },
  371 + "hi": {
  372 + "type": "text",
  373 + "analyzer": "hindi"
  374 + },
  375 + "hu": {
  376 + "type": "text",
  377 + "analyzer": "hungarian"
  378 + },
  379 + "id": {
  380 + "type": "text",
  381 + "analyzer": "indonesian"
  382 + },
  383 + "it": {
  384 + "type": "text",
  385 + "analyzer": "italian"
  386 + },
  387 + "no": {
  388 + "type": "text",
  389 + "analyzer": "norwegian"
  390 + },
  391 + "fa": {
  392 + "type": "text",
  393 + "analyzer": "persian"
  394 + },
  395 + "pt": {
  396 + "type": "text",
  397 + "analyzer": "portuguese"
  398 + },
  399 + "ro": {
  400 + "type": "text",
  401 + "analyzer": "romanian"
  402 + },
  403 + "ru": {
  404 + "type": "text",
  405 + "analyzer": "russian"
  406 + },
  407 + "es": {
  408 + "type": "text",
  409 + "analyzer": "spanish"
  410 + },
  411 + "sv": {
  412 + "type": "text",
  413 + "analyzer": "swedish"
  414 + },
  415 + "tr": {
  416 + "type": "text",
  417 + "analyzer": "turkish"
  418 + },
  419 + "th": {
  420 + "type": "text",
  421 + "analyzer": "thai"
  422 + }
  423 + }
  424 + },
  425 + "brief": {
  426 + "type": "object",
  427 + "properties": {
  428 + "ar": {
  429 + "type": "text",
  430 + "analyzer": "arabic"
  431 + },
  432 + "hy": {
  433 + "type": "text",
  434 + "analyzer": "armenian"
  435 + },
  436 + "eu": {
  437 + "type": "text",
  438 + "analyzer": "basque"
  439 + },
  440 + "pt_br": {
  441 + "type": "text",
  442 + "analyzer": "brazilian"
  443 + },
  444 + "bg": {
  445 + "type": "text",
  446 + "analyzer": "bulgarian"
  447 + },
  448 + "ca": {
  449 + "type": "text",
  450 + "analyzer": "catalan"
  451 + },
  452 + "zh": {
  453 + "type": "text",
  454 + "analyzer": "icu_analyzer"
  455 + },
  456 + "chinese": {
  457 + "type": "text",
  458 + "analyzer": "chinese"
  459 + },
  460 + "cjk": {
  461 + "type": "text",
  462 + "analyzer": "cjk"
  463 + },
  464 + "cs": {
  465 + "type": "text",
  466 + "analyzer": "czech"
  467 + },
  468 + "da": {
  469 + "type": "text",
  470 + "analyzer": "danish"
  471 + },
  472 + "nl": {
  473 + "type": "text",
  474 + "analyzer": "dutch"
  475 + },
  476 + "en": {
  477 + "type": "text",
  478 + "analyzer": "english"
  479 + },
  480 + "fi": {
  481 + "type": "text",
  482 + "analyzer": "finnish"
  483 + },
  484 + "fr": {
  485 + "type": "text",
  486 + "analyzer": "french"
  487 + },
  488 + "gl": {
  489 + "type": "text",
  490 + "analyzer": "galician"
  491 + },
  492 + "de": {
  493 + "type": "text",
  494 + "analyzer": "german"
  495 + },
  496 + "el": {
  497 + "type": "text",
  498 + "analyzer": "greek"
  499 + },
  500 + "hi": {
  501 + "type": "text",
  502 + "analyzer": "hindi"
  503 + },
  504 + "hu": {
  505 + "type": "text",
  506 + "analyzer": "hungarian"
  507 + },
  508 + "id": {
  509 + "type": "text",
  510 + "analyzer": "indonesian"
  511 + },
  512 + "it": {
  513 + "type": "text",
  514 + "analyzer": "italian"
  515 + },
  516 + "no": {
  517 + "type": "text",
  518 + "analyzer": "norwegian"
  519 + },
  520 + "fa": {
  521 + "type": "text",
  522 + "analyzer": "persian"
  523 + },
  524 + "pt": {
  525 + "type": "text",
  526 + "analyzer": "portuguese"
  527 + },
  528 + "ro": {
  529 + "type": "text",
  530 + "analyzer": "romanian"
  531 + },
  532 + "ru": {
  533 + "type": "text",
  534 + "analyzer": "russian"
  535 + },
  536 + "es": {
  537 + "type": "text",
  538 + "analyzer": "spanish"
  539 + },
  540 + "sv": {
  541 + "type": "text",
  542 + "analyzer": "swedish"
  543 + },
  544 + "tr": {
  545 + "type": "text",
  546 + "analyzer": "turkish"
  547 + },
  548 + "th": {
  549 + "type": "text",
  550 + "analyzer": "thai"
  551 + }
  552 + }
  553 + },
  554 + "description": {
  555 + "type": "object",
  556 + "properties": {
  557 + "ar": {
  558 + "type": "text",
  559 + "analyzer": "arabic"
  560 + },
  561 + "hy": {
  562 + "type": "text",
  563 + "analyzer": "armenian"
  564 + },
  565 + "eu": {
  566 + "type": "text",
  567 + "analyzer": "basque"
  568 + },
  569 + "pt_br": {
  570 + "type": "text",
  571 + "analyzer": "brazilian"
  572 + },
  573 + "bg": {
  574 + "type": "text",
  575 + "analyzer": "bulgarian"
  576 + },
  577 + "ca": {
  578 + "type": "text",
  579 + "analyzer": "catalan"
  580 + },
  581 + "zh": {
  582 + "type": "text",
  583 + "analyzer": "icu_analyzer"
  584 + },
  585 + "chinese": {
  586 + "type": "text",
  587 + "analyzer": "chinese"
  588 + },
  589 + "cjk": {
  590 + "type": "text",
  591 + "analyzer": "cjk"
  592 + },
  593 + "cs": {
  594 + "type": "text",
  595 + "analyzer": "czech"
  596 + },
  597 + "da": {
  598 + "type": "text",
  599 + "analyzer": "danish"
  600 + },
  601 + "nl": {
  602 + "type": "text",
  603 + "analyzer": "dutch"
  604 + },
  605 + "en": {
  606 + "type": "text",
  607 + "analyzer": "english"
  608 + },
  609 + "fi": {
  610 + "type": "text",
  611 + "analyzer": "finnish"
  612 + },
  613 + "fr": {
  614 + "type": "text",
  615 + "analyzer": "french"
  616 + },
  617 + "gl": {
  618 + "type": "text",
  619 + "analyzer": "galician"
  620 + },
  621 + "de": {
  622 + "type": "text",
  623 + "analyzer": "german"
  624 + },
  625 + "el": {
  626 + "type": "text",
  627 + "analyzer": "greek"
  628 + },
  629 + "hi": {
  630 + "type": "text",
  631 + "analyzer": "hindi"
  632 + },
  633 + "hu": {
  634 + "type": "text",
  635 + "analyzer": "hungarian"
  636 + },
  637 + "id": {
  638 + "type": "text",
  639 + "analyzer": "indonesian"
  640 + },
  641 + "it": {
  642 + "type": "text",
  643 + "analyzer": "italian"
  644 + },
  645 + "no": {
  646 + "type": "text",
  647 + "analyzer": "norwegian"
  648 + },
  649 + "fa": {
  650 + "type": "text",
  651 + "analyzer": "persian"
  652 + },
  653 + "pt": {
  654 + "type": "text",
  655 + "analyzer": "portuguese"
  656 + },
  657 + "ro": {
  658 + "type": "text",
  659 + "analyzer": "romanian"
  660 + },
  661 + "ru": {
  662 + "type": "text",
  663 + "analyzer": "russian"
  664 + },
  665 + "es": {
  666 + "type": "text",
  667 + "analyzer": "spanish"
  668 + },
  669 + "sv": {
  670 + "type": "text",
  671 + "analyzer": "swedish"
  672 + },
  673 + "tr": {
  674 + "type": "text",
  675 + "analyzer": "turkish"
  676 + },
  677 + "th": {
  678 + "type": "text",
  679 + "analyzer": "thai"
  680 + }
  681 + }
  682 + },
  683 + "vendor": {
  684 + "type": "object",
  685 + "properties": {
  686 + "ar": {
  687 + "type": "text",
  688 + "analyzer": "arabic",
  689 + "fields": {
  690 + "keyword": {
  691 + "type": "keyword",
  692 + "normalizer": "lowercase"
  693 + }
  694 + }
  695 + },
  696 + "hy": {
  697 + "type": "text",
  698 + "analyzer": "armenian",
  699 + "fields": {
  700 + "keyword": {
  701 + "type": "keyword",
  702 + "normalizer": "lowercase"
  703 + }
  704 + }
  705 + },
  706 + "eu": {
  707 + "type": "text",
  708 + "analyzer": "basque",
  709 + "fields": {
  710 + "keyword": {
  711 + "type": "keyword",
  712 + "normalizer": "lowercase"
  713 + }
  714 + }
  715 + },
  716 + "pt_br": {
  717 + "type": "text",
  718 + "analyzer": "brazilian",
  719 + "fields": {
  720 + "keyword": {
  721 + "type": "keyword",
  722 + "normalizer": "lowercase"
  723 + }
  724 + }
  725 + },
  726 + "bg": {
  727 + "type": "text",
  728 + "analyzer": "bulgarian",
  729 + "fields": {
  730 + "keyword": {
  731 + "type": "keyword",
  732 + "normalizer": "lowercase"
  733 + }
  734 + }
  735 + },
  736 + "ca": {
  737 + "type": "text",
  738 + "analyzer": "catalan",
  739 + "fields": {
  740 + "keyword": {
  741 + "type": "keyword",
  742 + "normalizer": "lowercase"
  743 + }
  744 + }
  745 + },
  746 + "zh": {
  747 + "type": "text",
  748 + "analyzer": "icu_analyzer",
  749 + "fields": {
  750 + "keyword": {
  751 + "type": "keyword",
  752 + "normalizer": "lowercase"
  753 + }
  754 + }
  755 + },
  756 + "chinese": {
  757 + "type": "text",
  758 + "analyzer": "chinese",
  759 + "fields": {
  760 + "keyword": {
  761 + "type": "keyword",
  762 + "normalizer": "lowercase"
  763 + }
  764 + }
  765 + },
  766 + "cjk": {
  767 + "type": "text",
  768 + "analyzer": "cjk",
  769 + "fields": {
  770 + "keyword": {
  771 + "type": "keyword",
  772 + "normalizer": "lowercase"
  773 + }
  774 + }
  775 + },
  776 + "cs": {
  777 + "type": "text",
  778 + "analyzer": "czech",
  779 + "fields": {
  780 + "keyword": {
  781 + "type": "keyword",
  782 + "normalizer": "lowercase"
  783 + }
  784 + }
  785 + },
  786 + "da": {
  787 + "type": "text",
  788 + "analyzer": "danish",
  789 + "fields": {
  790 + "keyword": {
  791 + "type": "keyword",
  792 + "normalizer": "lowercase"
  793 + }
  794 + }
  795 + },
  796 + "nl": {
  797 + "type": "text",
  798 + "analyzer": "dutch",
  799 + "fields": {
  800 + "keyword": {
  801 + "type": "keyword",
  802 + "normalizer": "lowercase"
  803 + }
  804 + }
  805 + },
  806 + "en": {
  807 + "type": "text",
  808 + "analyzer": "english",
  809 + "fields": {
  810 + "keyword": {
  811 + "type": "keyword",
  812 + "normalizer": "lowercase"
  813 + }
  814 + }
  815 + },
  816 + "fi": {
  817 + "type": "text",
  818 + "analyzer": "finnish",
  819 + "fields": {
  820 + "keyword": {
  821 + "type": "keyword",
  822 + "normalizer": "lowercase"
  823 + }
  824 + }
  825 + },
  826 + "fr": {
  827 + "type": "text",
  828 + "analyzer": "french",
  829 + "fields": {
  830 + "keyword": {
  831 + "type": "keyword",
  832 + "normalizer": "lowercase"
  833 + }
  834 + }
  835 + },
  836 + "gl": {
  837 + "type": "text",
  838 + "analyzer": "galician",
  839 + "fields": {
  840 + "keyword": {
  841 + "type": "keyword",
  842 + "normalizer": "lowercase"
  843 + }
  844 + }
  845 + },
  846 + "de": {
  847 + "type": "text",
  848 + "analyzer": "german",
  849 + "fields": {
  850 + "keyword": {
  851 + "type": "keyword",
  852 + "normalizer": "lowercase"
  853 + }
  854 + }
  855 + },
  856 + "el": {
  857 + "type": "text",
  858 + "analyzer": "greek",
  859 + "fields": {
  860 + "keyword": {
  861 + "type": "keyword",
  862 + "normalizer": "lowercase"
  863 + }
  864 + }
  865 + },
  866 + "hi": {
  867 + "type": "text",
  868 + "analyzer": "hindi",
  869 + "fields": {
  870 + "keyword": {
  871 + "type": "keyword",
  872 + "normalizer": "lowercase"
  873 + }
  874 + }
  875 + },
  876 + "hu": {
  877 + "type": "text",
  878 + "analyzer": "hungarian",
  879 + "fields": {
  880 + "keyword": {
  881 + "type": "keyword",
  882 + "normalizer": "lowercase"
  883 + }
  884 + }
  885 + },
  886 + "id": {
  887 + "type": "text",
  888 + "analyzer": "indonesian",
  889 + "fields": {
  890 + "keyword": {
  891 + "type": "keyword",
  892 + "normalizer": "lowercase"
  893 + }
  894 + }
  895 + },
  896 + "it": {
  897 + "type": "text",
  898 + "analyzer": "italian",
  899 + "fields": {
  900 + "keyword": {
  901 + "type": "keyword",
  902 + "normalizer": "lowercase"
  903 + }
  904 + }
  905 + },
  906 + "no": {
  907 + "type": "text",
  908 + "analyzer": "norwegian",
  909 + "fields": {
  910 + "keyword": {
  911 + "type": "keyword",
  912 + "normalizer": "lowercase"
  913 + }
  914 + }
  915 + },
  916 + "fa": {
  917 + "type": "text",
  918 + "analyzer": "persian",
  919 + "fields": {
  920 + "keyword": {
  921 + "type": "keyword",
  922 + "normalizer": "lowercase"
  923 + }
  924 + }
  925 + },
  926 + "pt": {
  927 + "type": "text",
  928 + "analyzer": "portuguese",
  929 + "fields": {
  930 + "keyword": {
  931 + "type": "keyword",
  932 + "normalizer": "lowercase"
  933 + }
  934 + }
  935 + },
  936 + "ro": {
  937 + "type": "text",
  938 + "analyzer": "romanian",
  939 + "fields": {
  940 + "keyword": {
  941 + "type": "keyword",
  942 + "normalizer": "lowercase"
  943 + }
  944 + }
  945 + },
  946 + "ru": {
  947 + "type": "text",
  948 + "analyzer": "russian",
  949 + "fields": {
  950 + "keyword": {
  951 + "type": "keyword",
  952 + "normalizer": "lowercase"
  953 + }
  954 + }
  955 + },
  956 + "es": {
  957 + "type": "text",
  958 + "analyzer": "spanish",
  959 + "fields": {
  960 + "keyword": {
  961 + "type": "keyword",
  962 + "normalizer": "lowercase"
  963 + }
  964 + }
  965 + },
  966 + "sv": {
  967 + "type": "text",
  968 + "analyzer": "swedish",
  969 + "fields": {
  970 + "keyword": {
  971 + "type": "keyword",
  972 + "normalizer": "lowercase"
  973 + }
  974 + }
  975 + },
  976 + "tr": {
  977 + "type": "text",
  978 + "analyzer": "turkish",
  979 + "fields": {
  980 + "keyword": {
  981 + "type": "keyword",
  982 + "normalizer": "lowercase"
  983 + }
  984 + }
  985 + },
  986 + "th": {
  987 + "type": "text",
  988 + "analyzer": "thai",
  989 + "fields": {
  990 + "keyword": {
  991 + "type": "keyword",
  992 + "normalizer": "lowercase"
  993 + }
  994 + }
117 995 }
118 996 }
119 997 },
... ... @@ -158,25 +1036,263 @@
158 1036 }
159 1037 }
160 1038 },
161   - "category_path_zh": {
162   - "type": "text",
163   - "analyzer": "hanlp_index",
164   - "search_analyzer": "hanlp_standard"
165   - },
166   - "category_path_en": {
167   - "type": "text",
168   - "analyzer": "english",
169   - "search_analyzer": "english"
170   - },
171   - "category_name_zh": {
172   - "type": "text",
173   - "analyzer": "hanlp_index",
174   - "search_analyzer": "hanlp_standard"
  1039 + "category_path": {
  1040 + "type": "object",
  1041 + "properties": {
  1042 + "ar": {
  1043 + "type": "text",
  1044 + "analyzer": "arabic"
  1045 + },
  1046 + "hy": {
  1047 + "type": "text",
  1048 + "analyzer": "armenian"
  1049 + },
  1050 + "eu": {
  1051 + "type": "text",
  1052 + "analyzer": "basque"
  1053 + },
  1054 + "pt_br": {
  1055 + "type": "text",
  1056 + "analyzer": "brazilian"
  1057 + },
  1058 + "bg": {
  1059 + "type": "text",
  1060 + "analyzer": "bulgarian"
  1061 + },
  1062 + "ca": {
  1063 + "type": "text",
  1064 + "analyzer": "catalan"
  1065 + },
  1066 + "zh": {
  1067 + "type": "text",
  1068 + "analyzer": "icu_analyzer"
  1069 + },
  1070 + "chinese": {
  1071 + "type": "text",
  1072 + "analyzer": "chinese"
  1073 + },
  1074 + "cjk": {
  1075 + "type": "text",
  1076 + "analyzer": "cjk"
  1077 + },
  1078 + "cs": {
  1079 + "type": "text",
  1080 + "analyzer": "czech"
  1081 + },
  1082 + "da": {
  1083 + "type": "text",
  1084 + "analyzer": "danish"
  1085 + },
  1086 + "nl": {
  1087 + "type": "text",
  1088 + "analyzer": "dutch"
  1089 + },
  1090 + "en": {
  1091 + "type": "text",
  1092 + "analyzer": "english"
  1093 + },
  1094 + "fi": {
  1095 + "type": "text",
  1096 + "analyzer": "finnish"
  1097 + },
  1098 + "fr": {
  1099 + "type": "text",
  1100 + "analyzer": "french"
  1101 + },
  1102 + "gl": {
  1103 + "type": "text",
  1104 + "analyzer": "galician"
  1105 + },
  1106 + "de": {
  1107 + "type": "text",
  1108 + "analyzer": "german"
  1109 + },
  1110 + "el": {
  1111 + "type": "text",
  1112 + "analyzer": "greek"
  1113 + },
  1114 + "hi": {
  1115 + "type": "text",
  1116 + "analyzer": "hindi"
  1117 + },
  1118 + "hu": {
  1119 + "type": "text",
  1120 + "analyzer": "hungarian"
  1121 + },
  1122 + "id": {
  1123 + "type": "text",
  1124 + "analyzer": "indonesian"
  1125 + },
  1126 + "it": {
  1127 + "type": "text",
  1128 + "analyzer": "italian"
  1129 + },
  1130 + "no": {
  1131 + "type": "text",
  1132 + "analyzer": "norwegian"
  1133 + },
  1134 + "fa": {
  1135 + "type": "text",
  1136 + "analyzer": "persian"
  1137 + },
  1138 + "pt": {
  1139 + "type": "text",
  1140 + "analyzer": "portuguese"
  1141 + },
  1142 + "ro": {
  1143 + "type": "text",
  1144 + "analyzer": "romanian"
  1145 + },
  1146 + "ru": {
  1147 + "type": "text",
  1148 + "analyzer": "russian"
  1149 + },
  1150 + "es": {
  1151 + "type": "text",
  1152 + "analyzer": "spanish"
  1153 + },
  1154 + "sv": {
  1155 + "type": "text",
  1156 + "analyzer": "swedish"
  1157 + },
  1158 + "tr": {
  1159 + "type": "text",
  1160 + "analyzer": "turkish"
  1161 + },
  1162 + "th": {
  1163 + "type": "text",
  1164 + "analyzer": "thai"
  1165 + }
  1166 + }
175 1167 },
176   - "category_name_en": {
177   - "type": "text",
178   - "analyzer": "english",
179   - "search_analyzer": "english"
  1168 + "category_name_text": {
  1169 + "type": "object",
  1170 + "properties": {
  1171 + "ar": {
  1172 + "type": "text",
  1173 + "analyzer": "arabic"
  1174 + },
  1175 + "hy": {
  1176 + "type": "text",
  1177 + "analyzer": "armenian"
  1178 + },
  1179 + "eu": {
  1180 + "type": "text",
  1181 + "analyzer": "basque"
  1182 + },
  1183 + "pt_br": {
  1184 + "type": "text",
  1185 + "analyzer": "brazilian"
  1186 + },
  1187 + "bg": {
  1188 + "type": "text",
  1189 + "analyzer": "bulgarian"
  1190 + },
  1191 + "ca": {
  1192 + "type": "text",
  1193 + "analyzer": "catalan"
  1194 + },
  1195 + "zh": {
  1196 + "type": "text",
  1197 + "analyzer": "icu_analyzer"
  1198 + },
  1199 + "chinese": {
  1200 + "type": "text",
  1201 + "analyzer": "chinese"
  1202 + },
  1203 + "cjk": {
  1204 + "type": "text",
  1205 + "analyzer": "cjk"
  1206 + },
  1207 + "cs": {
  1208 + "type": "text",
  1209 + "analyzer": "czech"
  1210 + },
  1211 + "da": {
  1212 + "type": "text",
  1213 + "analyzer": "danish"
  1214 + },
  1215 + "nl": {
  1216 + "type": "text",
  1217 + "analyzer": "dutch"
  1218 + },
  1219 + "en": {
  1220 + "type": "text",
  1221 + "analyzer": "english"
  1222 + },
  1223 + "fi": {
  1224 + "type": "text",
  1225 + "analyzer": "finnish"
  1226 + },
  1227 + "fr": {
  1228 + "type": "text",
  1229 + "analyzer": "french"
  1230 + },
  1231 + "gl": {
  1232 + "type": "text",
  1233 + "analyzer": "galician"
  1234 + },
  1235 + "de": {
  1236 + "type": "text",
  1237 + "analyzer": "german"
  1238 + },
  1239 + "el": {
  1240 + "type": "text",
  1241 + "analyzer": "greek"
  1242 + },
  1243 + "hi": {
  1244 + "type": "text",
  1245 + "analyzer": "hindi"
  1246 + },
  1247 + "hu": {
  1248 + "type": "text",
  1249 + "analyzer": "hungarian"
  1250 + },
  1251 + "id": {
  1252 + "type": "text",
  1253 + "analyzer": "indonesian"
  1254 + },
  1255 + "it": {
  1256 + "type": "text",
  1257 + "analyzer": "italian"
  1258 + },
  1259 + "no": {
  1260 + "type": "text",
  1261 + "analyzer": "norwegian"
  1262 + },
  1263 + "fa": {
  1264 + "type": "text",
  1265 + "analyzer": "persian"
  1266 + },
  1267 + "pt": {
  1268 + "type": "text",
  1269 + "analyzer": "portuguese"
  1270 + },
  1271 + "ro": {
  1272 + "type": "text",
  1273 + "analyzer": "romanian"
  1274 + },
  1275 + "ru": {
  1276 + "type": "text",
  1277 + "analyzer": "russian"
  1278 + },
  1279 + "es": {
  1280 + "type": "text",
  1281 + "analyzer": "spanish"
  1282 + },
  1283 + "sv": {
  1284 + "type": "text",
  1285 + "analyzer": "swedish"
  1286 + },
  1287 + "tr": {
  1288 + "type": "text",
  1289 + "analyzer": "turkish"
  1290 + },
  1291 + "th": {
  1292 + "type": "text",
  1293 + "analyzer": "thai"
  1294 + }
  1295 + }
180 1296 },
181 1297 "category_id": {
182 1298 "type": "keyword"
... ... @@ -294,4 +1410,3 @@
294 1410 }
295 1411 }
296 1412 }
297   -
... ...
scripts/check_es_data.py
... ... @@ -28,12 +28,12 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
28 28 "size": size,
29 29 "_source": [
30 30 "spu_id",
31   - "title_zh",
  31 + "title",
32 32 "category1_name",
33 33 "category2_name",
34 34 "category3_name",
35 35 "category_name",
36   - "category_path_zh",
  36 + "category_path",
37 37 "specifications",
38 38 "option1_name",
39 39 "option2_name",
... ... @@ -51,14 +51,16 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
51 51  
52 52 for i, hit in enumerate(hits, 1):
53 53 source = hit.get('_source', {})
  54 + title_obj = source.get("title") or {}
  55 + category_path_obj = source.get("category_path") or {}
54 56 print(f"文档 {i}:")
55 57 print(f" spu_id: {source.get('spu_id')}")
56   - print(f" title_zh: {source.get('title_zh', '')[:50]}")
  58 + print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}")
57 59 print(f" category1_name: {source.get('category1_name')}")
58 60 print(f" category2_name: {source.get('category2_name')}")
59 61 print(f" category3_name: {source.get('category3_name')}")
60 62 print(f" category_name: {source.get('category_name')}")
61   - print(f" category_path_zh: {source.get('category_path_zh')}")
  63 + print(f" category_path.zh: {category_path_obj.get('zh') if isinstance(category_path_obj, dict) else None}")
62 64 print(f" option1_name: {source.get('option1_name')}")
63 65 print(f" option2_name: {source.get('option2_name')}")
64 66 print(f" option3_name: {source.get('option3_name')}")
... ...
scripts/check_index_mapping.py
... ... @@ -20,11 +20,19 @@ def check_field_mapping(mapping_dict, field_path):
20 20 current = mapping_dict
21 21  
22 22 for part in parts:
23   - if isinstance(current, dict):
24   - current = current.get(part)
25   - if current is None:
26   - return None
27   - else:
  23 + if not isinstance(current, dict):
  24 + return None
  25 +
  26 + # ES mapping nesting: object fields store subfields under "properties"
  27 + if "properties" in current and isinstance(current["properties"], dict):
  28 + current = current["properties"]
  29 +
  30 + # multi-fields store subfields under "fields" (e.g. vendor.zh.keyword)
  31 + if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]:
  32 + current = current["fields"]
  33 +
  34 + current = current.get(part)
  35 + if current is None:
28 36 return None
29 37 return current
30 38  
... ... @@ -70,12 +78,13 @@ def main():
70 78  
71 79 # 检查关键字段
72 80 fields_to_check = [
73   - 'title_zh',
74   - 'brief_zh',
75   - 'description_zh',
76   - 'vendor_zh',
77   - 'category_path_zh',
78   - 'category_name_zh'
  81 + "title.zh",
  82 + "brief.zh",
  83 + "description.zh",
  84 + "vendor.zh",
  85 + "vendor.zh.keyword",
  86 + "category_path.zh",
  87 + "category_name_text.zh"
79 88 ]
80 89  
81 90 print("=" * 80)
... ... @@ -83,7 +92,7 @@ def main():
83 92 print("=" * 80)
84 93  
85 94 for field_name in fields_to_check:
86   - field_mapping = index_mapping.get(field_name)
  95 + field_mapping = check_field_mapping(index_mapping, field_name)
87 96  
88 97 if field_mapping is None:
89 98 print(f"\n❌ {field_name}: 字段不存在")
... ...
search/es_query_builder.py
... ... @@ -367,37 +367,37 @@ class ESQueryBuilder:
367 367 """
368 368 if language == 'zh':
369 369 all_fields = [
370   - "title_zh^3.0",
371   - "brief_zh^1.5",
372   - "description_zh",
373   - "vendor_zh^1.5",
  370 + "title.zh^3.0",
  371 + "brief.zh^1.5",
  372 + "description.zh",
  373 + "vendor.zh^1.5",
374 374 "tags",
375   - "category_path_zh^1.5",
376   - "category_name_zh^1.5",
  375 + "category_path.zh^1.5",
  376 + "category_name_text.zh^1.5",
377 377 "option1_values^0.5"
378 378 ]
379 379 core_fields = [
380   - "title_zh^3.0",
381   - "brief_zh^1.5",
382   - "vendor_zh^1.5",
383   - "category_name_zh^1.5"
  380 + "title.zh^3.0",
  381 + "brief.zh^1.5",
  382 + "vendor.zh^1.5",
  383 + "category_name_text.zh^1.5"
384 384 ]
385 385 else: # en
386 386 all_fields = [
387   - "title_en^3.0",
388   - "brief_en^1.5",
389   - "description_en",
390   - "vendor_en^1.5",
  387 + "title.en^3.0",
  388 + "brief.en^1.5",
  389 + "description.en",
  390 + "vendor.en^1.5",
391 391 "tags",
392   - "category_path_en^1.5",
393   - "category_name_en^1.5",
  392 + "category_path.en^1.5",
  393 + "category_name_text.en^1.5",
394 394 "option1_values^0.5"
395 395 ]
396 396 core_fields = [
397   - "title_en^3.0",
398   - "brief_en^1.5",
399   - "vendor_en^1.5",
400   - "category_name_en^1.5"
  397 + "title.en^3.0",
  398 + "brief.en^1.5",
  399 + "vendor.en^1.5",
  400 + "category_name_text.en^1.5"
401 401 ]
402 402 return all_fields, core_fields
403 403  
... ...
tests/conftest.py
... ... @@ -28,7 +28,7 @@ def sample_index_config() -&gt; IndexConfig:
28 28 return IndexConfig(
29 29 name="default",
30 30 label="默认索引",
31   - fields=["title_zh", "brief_zh", "tags"],
  31 + fields=["title.zh", "brief.zh", "tags"],
32 32 boost=1.0
33 33 )
34 34  
... ... @@ -60,10 +60,10 @@ def sample_search_config(sample_index_config) -&gt; SearchConfig:
60 60 es_index_name="test_products",
61 61 field_boosts={
62 62 "tenant_id": 1.0,
63   - "title_zh": 3.0,
64   - "brief_zh": 1.5,
  63 + "title.zh": 3.0,
  64 + "brief.zh": 1.5,
65 65 "tags": 1.0,
66   - "category_path_zh": 1.5,
  66 + "category_path.zh": 1.5,
67 67 },
68 68 indexes=[sample_index_config],
69 69 query_config=query_config,
... ... @@ -89,8 +89,8 @@ def mock_es_client() -&gt; Mock:
89 89 "_id": "1",
90 90 "_score": 2.5,
91 91 "_source": {
92   - "title_zh": "红色连衣裙",
93   - "vendor_zh": "测试品牌",
  92 + "title": {"zh": "红色连衣裙"},
  93 + "vendor": {"zh": "测试品牌"},
94 94 "min_price": 299.0,
95 95 "category_id": "1"
96 96 }
... ... @@ -99,8 +99,8 @@ def mock_es_client() -&gt; Mock:
99 99 "_id": "2",
100 100 "_score": 2.2,
101 101 "_source": {
102   - "title_zh": "蓝色连衣裙",
103   - "vendor_zh": "测试品牌",
  102 + "title": {"zh": "蓝色连衣裙"},
  103 + "vendor": {"zh": "测试品牌"},
104 104 "min_price": 399.0,
105 105 "category_id": "1"
106 106 }
... ... @@ -142,8 +142,8 @@ def sample_search_results() -&gt; Dict[str, Any]:
142 142 "query": "红色连衣裙",
143 143 "expected_total": 2,
144 144 "expected_products": [
145   - {"title_zh": "红色连衣裙", "min_price": 299.0},
146   - {"title_zh": "蓝色连衣裙", "min_price": 399.0}
  145 + {"title": "红色连衣裙", "min_price": 299.0},
  146 + {"title": "蓝色连衣裙", "min_price": 399.0}
147 147 ]
148 148 }
149 149  
... ... @@ -157,16 +157,16 @@ def temp_config_file() -&gt; Generator[str, None, None]:
157 157 config_data = {
158 158 "es_index_name": "test_products",
159 159 "field_boosts": {
160   - "title_zh": 3.0,
161   - "brief_zh": 1.5,
  160 + "title.zh": 3.0,
  161 + "brief.zh": 1.5,
162 162 "tags": 1.0,
163   - "category_path_zh": 1.5
  163 + "category_path.zh": 1.5
164 164 },
165 165 "indexes": [
166 166 {
167 167 "name": "default",
168 168 "label": "默认索引",
169   - "fields": ["title_zh", "brief_zh", "tags"],
  169 + "fields": ["title.zh", "brief.zh", "tags"],
170 170 "boost": 1.0
171 171 }
172 172 ],
... ...