Commit d7d48f5238c6c83665e359cda931dbc17dc748f8

Authored by tangwang
1 parent 62b7972c

改动(mapping + 灌入结构)

mappings/search_products.json:把原来的 title_zh/title_en/brief_zh/... 改成 按语言 key 的对象结构( /products/_doc/1 { "title": {"en":...} } )
同时在这些字段下 预置了全部 analyzer 语言:
arabic, armenian, basque, brazilian, bulgarian, catalan, chinese, cjk, czech, danish, dutch, english, finnish, french, galician, german, greek, hindi, hungarian, indonesian, italian, norwegian, persian, portuguese, romanian, russian, spanish, swedish, turkish, thai

实现为 type: object + properties,同时满足“按语言灌入”和“按语言 analyzer”。
索引灌入(全量/增量/transformer)已同步改完
indexer/document_transformer.py:输出从 title_zh/title_en/... 改为:
title: {<primary_lang>: 原文, en?: 翻译, zh?: 翻译}
brief/description/vendor 同理
category_path/category_name_text 也改为语言对象(避免查询侧继续依赖旧字段)
indexer/incremental_service.py:embedding 取值从 title_en/title_zh 改为从 title 对象里优先取 en,否则取 zh,否则取任一可用语言。
查询侧与配置、API/文档已同步
search/es_query_builder.py:查询字段统一改成点路径:title.zh / title.en / vendor.zh / vendor.zh.keyword / category_name_text.zh 等。
config/config.yaml:field boosts / indexes 里的字段名同步为新点路径。
API & formatter:
api/result_formatter.py 已支持新结构(并保留对旧 *_zh/_en 的兼容兜底)。
api/models.py、相关 docs/examples 里的 vendor_zh.keyword 等已更新为 vendor.zh.keyword。
文档/脚本:docs/、README.md、scripts/ 里所有旧字段名引用已批量替换为新结构。
@@ -256,12 +256,12 @@ The system uses centralized configuration through `config/config.yaml`: @@ -256,12 +256,12 @@ The system uses centralized configuration through `config/config.yaml`:
256 { 256 {
257 "tenant_id": "keyword", // Multi-tenant isolation 257 "tenant_id": "keyword", // Multi-tenant isolation
258 "spu_id": "keyword", // Product identifier 258 "spu_id": "keyword", // Product identifier
259 - "title_zh/en": "text", // Multi-language titles  
260 - "brief_zh/en": "text", // Short descriptions  
261 - "description_zh/en": "text", // Detailed descriptions  
262 - "vendor_zh/en": "text", // Supplier/brand with keyword subfield  
263 - "category_path_zh/en": "text", // Hierarchical category paths  
264 - "category_name_zh/en": "text", // Category names for search 259 + "title.zh/en": "text", // Multi-language titles
  260 + "brief.zh/en": "text", // Short descriptions
  261 + "description.zh/en": "text", // Detailed descriptions
  262 + "vendor.zh/en": "text", // Supplier/brand with keyword subfield
  263 + "category_path.zh/en": "text", // Hierarchical category paths
  264 + "category_name_text.zh/en": "text", // Category names for search
265 "category1/2/3_name": "keyword", // Multi-level category filtering 265 "category1/2/3_name": "keyword", // Multi-level category filtering
266 "tags": "keyword", // Product tags 266 "tags": "keyword", // Product tags
267 "specifications": "nested", // Product variants (color, size, etc.) 267 "specifications": "nested", // Product variants (color, size, etc.)
@@ -318,11 +318,11 @@ The system uses centralized configuration through `config/config.yaml`: @@ -318,11 +318,11 @@ The system uses centralized configuration through `config/config.yaml`:
318 **Field Boost Configuration**: 318 **Field Boost Configuration**:
319 ```yaml 319 ```yaml
320 field_boosts: 320 field_boosts:
321 - title_zh/en: 3.0 # Highest priority  
322 - brief_zh/en: 1.5 # Medium priority  
323 - description_zh/en: 1.0 # Lower priority  
324 - vendor_zh/en: 1.5 # Brand emphasis  
325 - category_path_zh/en: 1.5 # Category relevance 321 + title.zh/en: 3.0 # Highest priority
  322 + brief.zh/en: 1.5 # Medium priority
  323 + description.zh/en: 1.0 # Lower priority
  324 + vendor.zh/en: 1.5 # Brand emphasis
  325 + category_path.zh/en: 1.5 # Category relevance
326 tags: 1.0 # Tag matching 326 tags: 1.0 # Tag matching
327 ``` 327 ```
328 328
@@ -15,8 +15,8 @@ @@ -15,8 +15,8 @@
15 "query" : { 15 "query" : {
16 "dis_max" : { 16 "dis_max" : {
17 "queries" : [ 17 "queries" : [
18 - {"match" : { "title_en" : xxx }},  
19 - {"match" : { "title_zh" : xxx }}, 18 + {"match" : { "title.en" : xxx }},
  19 + {"match" : { "title.zh" : xxx }},
20 {"match" : { "title_xx" : xxx }} 20 {"match" : { "title_xx" : xxx }}
21 ], 21 ],
22 "tie_breakler" : 0.8 22 "tie_breakler" : 0.8
@@ -86,7 +86,7 @@ class SearchRequest(BaseModel): @@ -86,7 +86,7 @@ class SearchRequest(BaseModel):
86 "examples": [ 86 "examples": [
87 { 87 {
88 "category_name": ["手机", "电子产品"], 88 "category_name": ["手机", "电子产品"],
89 - "vendor_zh.keyword": "奇乐", 89 + "vendor.zh.keyword": "奇乐",
90 "specifications": {"name": "颜色", "value": "白色"} 90 "specifications": {"name": "颜色", "value": "白色"}
91 }, 91 },
92 { 92 {
api/result_formatter.py
@@ -32,13 +32,22 @@ class ResultFormatter: @@ -32,13 +32,22 @@ class ResultFormatter:
32 lang = "en" 32 lang = "en"
33 33
34 def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]: 34 def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
35 - """从 *_zh / *_en 字段中按语言选择一个值,若目标语言缺失则回退到另一种。"""  
36 - zh_val = src.get(f"{base}_zh")  
37 - en_val = src.get(f"{base}_en") 35 + """
  36 + 从多语言对象字段中按语言选择一个值:
  37 + - 新结构: {base: {"zh": "...", "en": "...", ...}}
  38 + - 兼容旧结构: {base_zh: "...", base_en: "..."}
  39 + 若目标语言缺失则回退到另一种。
  40 + """
  41 + obj = src.get(base)
  42 + if isinstance(obj, dict):
  43 + zh_val = obj.get("zh")
  44 + en_val = obj.get("en")
  45 + else:
  46 + zh_val = src.get(f"{base}_zh")
  47 + en_val = src.get(f"{base}_en")
38 if lang == "zh": 48 if lang == "zh":
39 return zh_val or en_val 49 return zh_val or en_val
40 - else:  
41 - return en_val or zh_val 50 + return en_val or zh_val
42 51
43 for hit in es_hits: 52 for hit in es_hits:
44 source = hit.get('_source', {}) 53 source = hit.get('_source', {})
@@ -60,7 +69,7 @@ class ResultFormatter: @@ -60,7 +69,7 @@ class ResultFormatter:
60 description = pick_lang_field(source, "description") 69 description = pick_lang_field(source, "description")
61 vendor = pick_lang_field(source, "vendor") 70 vendor = pick_lang_field(source, "vendor")
62 category_path = pick_lang_field(source, "category_path") 71 category_path = pick_lang_field(source, "category_path")
63 - category_name = pick_lang_field(source, "category_name") 72 + category_name = pick_lang_field(source, "category_name_text") or source.get("category_name")
64 73
65 # Extract SKUs 74 # Extract SKUs
66 skus = [] 75 skus = []
config/config.yaml
@@ -15,20 +15,20 @@ es_settings: @@ -15,20 +15,20 @@ es_settings:
15 # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义) 15 # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义)
16 field_boosts: 16 field_boosts:
17 # 文本相关性字段 17 # 文本相关性字段
18 - title_zh: 3.0  
19 - brief_zh: 1.5  
20 - description_zh: 1.0  
21 - vendor_zh: 1.5  
22 - title_en: 3.0  
23 - brief_en: 1.5  
24 - description_en: 1.0  
25 - vendor_en: 1.5 18 + "title.zh": 3.0
  19 + "brief.zh": 1.5
  20 + "description.zh": 1.0
  21 + "vendor.zh": 1.5
  22 + "title.en": 3.0
  23 + "brief.en": 1.5
  24 + "description.en": 1.0
  25 + "vendor.en": 1.5
26 26
27 # 分类相关字段 27 # 分类相关字段
28 - category_path_zh: 1.5  
29 - category_name_zh: 1.5  
30 - category_path_en: 1.5  
31 - category_name_en: 1.5 28 + "category_path.zh": 1.5
  29 + "category_name_text.zh": 1.5
  30 + "category_path.en": 1.5
  31 + "category_name_text.en": 1.5
32 32
33 # 标签和属性值字段 33 # 标签和属性值字段
34 tags: 1.0 34 tags: 1.0
@@ -42,33 +42,33 @@ indexes: @@ -42,33 +42,33 @@ indexes:
42 - name: "default" 42 - name: "default"
43 label: "默认搜索" 43 label: "默认搜索"
44 fields: 44 fields:
45 - - "title_zh"  
46 - - "brief_zh"  
47 - - "description_zh"  
48 - - "vendor_zh" 45 + - "title.zh"
  46 + - "brief.zh"
  47 + - "description.zh"
  48 + - "vendor.zh"
49 - "tags" 49 - "tags"
50 - - "category_path_zh"  
51 - - "category_name_zh" 50 + - "category_path.zh"
  51 + - "category_name_text.zh"
52 - "option1_values" 52 - "option1_values"
53 boost: 1.0 53 boost: 1.0
54 54
55 - name: "title" 55 - name: "title"
56 label: "标题搜索" 56 label: "标题搜索"
57 fields: 57 fields:
58 - - "title_zh" 58 + - "title.zh"
59 boost: 2.0 59 boost: 2.0
60 60
61 - name: "vendor" 61 - name: "vendor"
62 label: "品牌搜索" 62 label: "品牌搜索"
63 fields: 63 fields:
64 - - "vendor_zh" 64 + - "vendor.zh"
65 boost: 1.5 65 boost: 1.5
66 66
67 - name: "category" 67 - name: "category"
68 label: "类目搜索" 68 label: "类目搜索"
69 fields: 69 fields:
70 - - "category_path_zh"  
71 - - "category_name_zh" 70 + - "category_path.zh"
  71 + - "category_name_text.zh"
72 boost: 1.5 72 boost: 1.5
73 73
74 - name: "tags" 74 - name: "tags"
@@ -17,7 +17,7 @@ def get_match_fields_for_index(config: SearchConfig, index_name: str = &quot;default&quot; @@ -17,7 +17,7 @@ def get_match_fields_for_index(config: SearchConfig, index_name: str = &quot;default&quot;
17 index_name: Name of the index domain (default: "default") 17 index_name: Name of the index domain (default: "default")
18 18
19 Returns: 19 Returns:
20 - List of field names with boost, e.g., ["title_zh^3.0", "brief_zh^1.5"] 20 + List of field names with boost, e.g., ["title.zh^3.0", "brief.zh^1.5"]
21 """ 21 """
22 # Find the index config 22 # Find the index config
23 index_config = None 23 index_config = None
docs/MySQL到ES字段映射说明-业务版.md
@@ -28,14 +28,14 @@ @@ -28,14 +28,14 @@
28 28
29 | ES 字段 | 数据来源表 | 表中字段 | 转换说明 | 29 | ES 字段 | 数据来源表 | 表中字段 | 转换说明 |
30 |---------|-----------|----------|----------| 30 |---------|-----------|----------|----------|
31 -| `title_zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 |  
32 -| `title_en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 |  
33 -| `brief_zh` | SPU 表 | `brief` | 同上 |  
34 -| `brief_en` | SPU 表 | `brief` | 同上 |  
35 -| `description_zh` | SPU 表 | `description` | 同上 |  
36 -| `description_en` | SPU 表 | `description` | 同上 |  
37 -| `vendor_zh` | SPU 表 | `vendor` | 同上 |  
38 -| `vendor_en` | SPU 表 | `vendor` | 同上 | 31 +| `title.zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 |
  32 +| `title.en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 |
  33 +| `brief.zh` | SPU 表 | `brief` | 同上 |
  34 +| `brief.en` | SPU 表 | `brief` | 同上 |
  35 +| `description.zh` | SPU 表 | `description` | 同上 |
  36 +| `description.en` | SPU 表 | `description` | 同上 |
  37 +| `vendor.zh` | SPU 表 | `vendor` | 同上 |
  38 +| `vendor.en` | SPU 表 | `vendor` | 同上 |
39 39
40 **翻译规则:** 40 **翻译规则:**
41 - 根据租户配置的 `primary_language` 确定主语言 41 - 根据租户配置的 `primary_language` 确定主语言
@@ -104,7 +104,7 @@ category_ids = [&quot;1&quot;, &quot;2&quot;, &quot;3&quot;] @@ -104,7 +104,7 @@ category_ids = [&quot;1&quot;, &quot;2&quot;, &quot;3&quot;]
104 104
105 | ES 字段 | 数据来源 | 转换说明 | 105 | ES 字段 | 数据来源 | 转换说明 |
106 |---------|----------|----------| 106 |---------|----------|----------|
107 -| `category_path_zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` | 107 +| `category_path.zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` |
108 | `category1_name` | 类目名称列表[0] | 一级类目:`"电子产品"` | 108 | `category1_name` | 类目名称列表[0] | 一级类目:`"电子产品"` |
109 | `category2_name` | 类目名称列表[1] | 二级类目:`"手机"` | 109 | `category2_name` | 类目名称列表[1] | 二级类目:`"手机"` |
110 | `category3_name` | 类目名称列表[2] | 三级类目:`"iPhone"` | 110 | `category3_name` | 类目名称列表[2] | 三级类目:`"iPhone"` |
@@ -300,13 +300,13 @@ sku_weight_units = 去重后的重量单位列表 @@ -300,13 +300,13 @@ sku_weight_units = 去重后的重量单位列表
300 300
301 | ES 字段 | 数据来源 | 转换说明 | 301 | ES 字段 | 数据来源 | 转换说明 |
302 |---------|----------|----------| 302 |---------|----------|----------|
303 -| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量<br/>优先使用 `title_en`,如果没有则使用 `title_zh` | 303 +| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量<br/>优先使用 `title.en`,如果没有则使用 `title.zh` |
304 304
305 **生成逻辑:** 305 **生成逻辑:**
306 306
307 ``` 307 ```
308 如果启用向量搜索: 308 如果启用向量搜索:
309 - 文本 = title_en 或 title_zh 309 + 文本 = title.en 或 title.zh
310 向量 = 文本编码器.encode(文本) 310 向量 = 文本编码器.encode(文本)
311 title_embedding = 向量(1024 维浮点数组) 311 title_embedding = 向量(1024 维浮点数组)
312 ``` 312 ```
@@ -322,17 +322,17 @@ sku_weight_units = 去重后的重量单位列表 @@ -322,17 +322,17 @@ sku_weight_units = 去重后的重量单位列表
322 | **基础字段** | 322 | **基础字段** |
323 | `tenant_id` | SPU | `tenant_id` | 租户ID | 323 | `tenant_id` | SPU | `tenant_id` | 租户ID |
324 | `spu_id` | SPU | `id` | 商品ID | 324 | `spu_id` | SPU | `id` | 商品ID |
325 -| `title_zh/en` | SPU | `title` | 标题(多语言) |  
326 -| `brief_zh/en` | SPU | `brief` | 简介(多语言) |  
327 -| `description_zh/en` | SPU | `description` | 描述(多语言) |  
328 -| `vendor_zh/en` | SPU | `vendor` | 品牌(多语言) | 325 +| `title.zh/en` | SPU | `title` | 标题(多语言) |
  326 +| `brief.zh/en` | SPU | `brief` | 简介(多语言) |
  327 +| `description.zh/en` | SPU | `description` | 描述(多语言) |
  328 +| `vendor.zh/en` | SPU | `vendor` | 品牌(多语言) |
329 | `tags` | SPU | `tags` | 标签数组 | 329 | `tags` | SPU | `tags` | 标签数组 |
330 | `image_url` | SPU | `image_src` | 主图URL | 330 | `image_url` | SPU | `image_src` | 主图URL |
331 | `sales` | SPU | `fake_sales` | 销量 | 331 | `sales` | SPU | `fake_sales` | 销量 |
332 | `create_time` | SPU | `create_time` | 创建时间 | 332 | `create_time` | SPU | `create_time` | 创建时间 |
333 | `update_time` | SPU | `update_time` | 更新时间 | 333 | `update_time` | SPU | `update_time` | 更新时间 |
334 | **类别字段** | 334 | **类别字段** |
335 -| `category_path_zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 | 335 +| `category_path.zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 |
336 | `category1_name` | SPU + 类目映射 | `category_path` → 类目名称[0] | 一级类目 | 336 | `category1_name` | SPU + 类目映射 | `category_path` → 类目名称[0] | 一级类目 |
337 | `category2_name` | SPU + 类目映射 | `category_path` → 类目名称[1] | 二级类目 | 337 | `category2_name` | SPU + 类目映射 | `category_path` → 类目名称[1] | 二级类目 |
338 | `category3_name` | SPU + 类目映射 | `category_path` → 类目名称[2] | 三级类目 | 338 | `category3_name` | SPU + 类目映射 | `category_path` → 类目名称[2] | 三级类目 |
@@ -451,14 +451,14 @@ GET /search_products/_search @@ -451,14 +451,14 @@ GET /search_products/_search
451 "_source": { 451 "_source": {
452 "tenant_id": "162", 452 "tenant_id": "162",
453 "spu_id": "74174", 453 "spu_id": "74174",
454 - "title_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",  
455 - "title_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",  
456 - "brief_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",  
457 - "brief_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",  
458 - "description_zh": "<p>实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】</p>",  
459 - "description_en": "<p>Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]</p",  
460 - "vendor_zh": "顺达",  
461 - "vendor_en": "Shunda", 454 + "title.zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
  455 + "title.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
  456 + "brief.zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
  457 + "brief.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
  458 + "description.zh": "<p>实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】</p>",
  459 + "description.en": "<p>Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]</p",
  460 + "vendor.zh": "顺达",
  461 + "vendor.en": "Shunda",
462 "title_embedding": [ 462 "title_embedding": [
463 -0.005125104915350676, 463 -0.005125104915350676,
464 ... 464 ...
@@ -468,12 +468,12 @@ GET /search_products/_search @@ -468,12 +468,12 @@ GET /search_products/_search
468 "魔方", 468 "魔方",
469 "ShunDa" 469 "ShunDa"
470 ], 470 ],
471 - "category_path_zh": "玩具/汽车",  
472 - "category_path_en": null, 471 + "category_path.zh": "玩具/汽车",
  472 + "category_path.en": null,
473 "category1_name": "玩具", 473 "category1_name": "玩具",
474 "category2_name": "汽车", 474 "category2_name": "汽车",
475 - "category_name_zh": "汽车",  
476 - "category_name_en": null, 475 + "category_name_text.zh": "汽车",
  476 + "category_name_text.en": null,
477 "category_name": "汽车", 477 "category_name": "汽车",
478 "category_id": "593389466647880832", 478 "category_id": "593389466647880832",
479 "category_level": 2, 479 "category_level": 2,
@@ -599,7 +599,7 @@ category_ids [&quot;1&quot;, &quot;2&quot;, &quot;3&quot;] @@ -599,7 +599,7 @@ category_ids [&quot;1&quot;, &quot;2&quot;, &quot;3&quot;]
599 ↓ [通过映射表转换] 599 ↓ [通过映射表转换]
600 category_names ["电子产品", "手机", "iPhone"] 600 category_names ["电子产品", "手机", "iPhone"]
601 ↓ [构建字段] 601 ↓ [构建字段]
602 -category_path_zh: "电子产品/手机/iPhone" 602 +category_path.zh: "电子产品/手机/iPhone"
603 category1_name: "电子产品" 603 category1_name: "电子产品"
604 category2_name: "手机" 604 category2_name: "手机"
605 category3_name: "iPhone" 605 category3_name: "iPhone"
@@ -644,12 +644,12 @@ sku_prices: [99.99, 109.99, 129.99] @@ -644,12 +644,12 @@ sku_prices: [99.99, 109.99, 129.99]
644 |--------|---------|------| 644 |--------|---------|------|
645 | `id` | `spu_id` | 商品ID | 645 | `id` | `spu_id` | 商品ID |
646 | `tenant_id` | `tenant_id` | 租户ID | 646 | `tenant_id` | `tenant_id` | 租户ID |
647 -| `title` | `title_zh/en` | 标题 |  
648 -| `brief` | `brief_zh/en` | 简介 |  
649 -| `description` | `description_zh/en` | 描述 |  
650 -| `vendor` | `vendor_zh/en` | 品牌 | 647 +| `title` | `title.zh/en` | 标题 |
  648 +| `brief` | `brief.zh/en` | 简介 |
  649 +| `description` | `description.zh/en` | 描述 |
  650 +| `vendor` | `vendor.zh/en` | 品牌 |
651 | `tags` | `tags` | 标签 | 651 | `tags` | `tags` | 标签 |
652 -| `category_path` | `category_path_zh` | 类目路径 | 652 +| `category_path` | `category_path.zh` | 类目路径 |
653 | `category_id` | `category_id` | 类目ID | 653 | `category_id` | `category_id` | 类目ID |
654 | `category_level` | `category_level` | 类目层级 | 654 | `category_level` | `category_level` | 类目层级 |
655 | `image_src` | `image_url` | 主图 | 655 | `image_src` | `image_url` | 主图 |
docs/MySQL到ES文档映射说明.md
@@ -217,10 +217,10 @@ WHERE deleted = 0 AND category_id IS NOT NULL @@ -217,10 +217,10 @@ WHERE deleted = 0 AND category_id IS NOT NULL
217 文本字段支持中英文双语,根据租户配置进行自动翻译。 217 文本字段支持中英文双语,根据租户配置进行自动翻译。
218 218
219 **字段列表:** 219 **字段列表:**
220 -- `title_zh` / `title_en`  
221 -- `brief_zh` / `brief_en`  
222 -- `description_zh` / `description_en`  
223 -- `vendor_zh` / `vendor_en` 220 +- `title.zh` / `title.en`
  221 +- `brief.zh` / `brief.en`
  222 +- `description.zh` / `description.en`
  223 +- `vendor.zh` / `vendor.en`
224 224
225 **填充逻辑:** 225 **填充逻辑:**
226 226
@@ -244,8 +244,8 @@ WHERE deleted = 0 AND category_id IS NOT NULL @@ -244,8 +244,8 @@ WHERE deleted = 0 AND category_id IS NOT NULL
244 translate_to_en=translate_to_en, 244 translate_to_en=translate_to_en,
245 translate_to_zh=translate_to_zh, 245 translate_to_zh=translate_to_zh,
246 ) 246 )
247 - doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)  
248 - doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None) 247 + doc['title.zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)
  248 + doc['title.en'] = translations.get('en') or (title_text if primary_lang == 'en' else None)
249 ``` 249 ```
250 250
251 **代码位置:** `indexer/document_transformer.py:168-298` 251 **代码位置:** `indexer/document_transformer.py:168-298`
@@ -340,7 +340,7 @@ for cid in category_ids: @@ -340,7 +340,7 @@ for cid in category_ids:
340 if category_names: 340 if category_names:
341 # 构建类目路径字符串 341 # 构建类目路径字符串
342 category_path_str = '/'.join(category_names) 342 category_path_str = '/'.join(category_names)
343 - doc['category_path_zh'] = category_path_str # 例如:"电子产品/手机/iPhone" 343 + doc['category_path.zh'] = category_path_str # 例如:"电子产品/手机/iPhone"
344 344
345 # 填充分层类目名称 345 # 填充分层类目名称
346 if len(category_names) > 0: 346 if len(category_names) > 0:
@@ -358,7 +358,7 @@ if category_names: @@ -358,7 +358,7 @@ if category_names:
358 ```python 358 ```python
359 elif pd.notna(spu_row.get('category')): 359 elif pd.notna(spu_row.get('category')):
360 category = str(spu_row['category']) 360 category = str(spu_row['category'])
361 - doc['category_name_zh'] = category 361 + doc['category_name_text.zh'] = category
362 362
363 # 尝试从 category 字段解析多级分类(如果包含 "/") 363 # 尝试从 category 字段解析多级分类(如果包含 "/")
364 if '/' in category: 364 if '/' in category:
@@ -375,8 +375,8 @@ elif pd.notna(spu_row.get(&#39;category&#39;)): @@ -375,8 +375,8 @@ elif pd.notna(spu_row.get(&#39;category&#39;)):
375 375
376 | ES 字段 | 类型 | 说明 | 376 | ES 字段 | 类型 | 说明 |
377 |---------|------|------| 377 |---------|------|------|
378 -| `category_path_zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") |  
379 -| `category_path_en` | text | 类目路径英文(暂未实现) | 378 +| `category_path.zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") |
  379 +| `category_path.en` | text | 类目路径英文(暂未实现) |
380 | `category1_name` | keyword | 一级类目名称 | 380 | `category1_name` | keyword | 一级类目名称 |
381 | `category2_name` | keyword | 二级类目名称 | 381 | `category2_name` | keyword | 二级类目名称 |
382 | `category3_name` | keyword | 三级类目名称 | 382 | `category3_name` | keyword | 三级类目名称 |
@@ -673,7 +673,7 @@ if enable_embedding and encoder and documents: @@ -673,7 +673,7 @@ if enable_embedding and encoder and documents:
673 title_texts = [] 673 title_texts = []
674 title_doc_indices = [] 674 title_doc_indices = []
675 for i, (_, doc) in enumerate(documents): 675 for i, (_, doc) in enumerate(documents):
676 - title_text = doc.get("title_en") or doc.get("title_zh") 676 + title_text = doc.get("title.en") or doc.get("title.zh")
677 if title_text and str(title_text).strip(): 677 if title_text and str(title_text).strip():
678 title_texts.append(str(title_text)) 678 title_texts.append(str(title_text))
679 title_doc_indices.append(i) 679 title_doc_indices.append(i)
@@ -858,16 +858,16 @@ spu_df[&quot;_is_deleted&quot;] = spu_df[&quot;deleted&quot;].apply(_is_deleted_value) @@ -858,16 +858,16 @@ spu_df[&quot;_is_deleted&quot;] = spu_df[&quot;deleted&quot;].apply(_is_deleted_value)
858 { 858 {
859 "tenant_id": "1", 859 "tenant_id": "1",
860 "spu_id": "12345", 860 "spu_id": "12345",
861 - "title_zh": "iPhone 15 Pro Max",  
862 - "title_en": "iPhone 15 Pro Max",  
863 - "brief_zh": "最新款 iPhone",  
864 - "brief_en": "Latest iPhone",  
865 - "description_zh": "详细描述...",  
866 - "description_en": "Detailed description...",  
867 - "vendor_zh": "Apple",  
868 - "vendor_en": "Apple", 861 + "title.zh": "iPhone 15 Pro Max",
  862 + "title.en": "iPhone 15 Pro Max",
  863 + "brief.zh": "最新款 iPhone",
  864 + "brief.en": "Latest iPhone",
  865 + "description.zh": "详细描述...",
  866 + "description.en": "Detailed description...",
  867 + "vendor.zh": "Apple",
  868 + "vendor.en": "Apple",
869 "tags": ["手机", "智能手机", "Apple"], 869 "tags": ["手机", "智能手机", "Apple"],
870 - "category_path_zh": "电子产品/手机/iPhone", 870 + "category_path.zh": "电子产品/手机/iPhone",
871 "category1_name": "电子产品", 871 "category1_name": "电子产品",
872 "category2_name": "手机", 872 "category2_name": "手机",
873 "category3_name": "iPhone", 873 "category3_name": "iPhone",
docs/Search-API-Examples.md
@@ -132,7 +132,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \ @@ -132,7 +132,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
132 "language": "zh", 132 "language": "zh",
133 "filters": { 133 "filters": {
134 "category_name": "手机", 134 "category_name": "手机",
135 - "vendor_zh.keyword": "奇乐" 135 + "vendor.zh.keyword": "奇乐"
136 } 136 }
137 }' 137 }'
138 ``` 138 ```
@@ -294,7 +294,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \ @@ -294,7 +294,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
294 "language": "zh", 294 "language": "zh",
295 "filters": { 295 "filters": {
296 "category_name": ["手机", "电子产品"], 296 "category_name": ["手机", "电子产品"],
297 - "vendor_zh.keyword": "品牌A" 297 + "vendor.zh.keyword": "品牌A"
298 }, 298 },
299 "range_filters": { 299 "range_filters": {
300 "min_price": { 300 "min_price": {
docs/基础配置指南.md
@@ -26,12 +26,12 @@ @@ -26,12 +26,12 @@
26 - `create_time`, `update_time` (date) - 时间字段 26 - `create_time`, `update_time` (date) - 时间字段
27 27
28 #### 多语言文本字段 28 #### 多语言文本字段
29 -- `title_zh`, `title_en` (text) - 标题(中英文)  
30 -- `brief_zh`, `brief_en` (text) - 短描述(中英文)  
31 -- `description_zh`, `description_en` (text) - 详细描述(中英文)  
32 -- `vendor_zh`, `vendor_en` (text) - 供应商/品牌(中英文,含keyword子字段)  
33 -- `category_path_zh`, `category_path_en` (text) - 类目路径(中英文)  
34 -- `category_name_zh`, `category_name_en` (text) - 类目名称(中英文) 29 +- `title.zh`, `title.en` (text) - 标题(中英文)
  30 +- `brief.zh`, `brief.en` (text) - 短描述(中英文)
  31 +- `description.zh`, `description.en` (text) - 详细描述(中英文)
  32 +- `vendor.zh`, `vendor.en` (text) - 供应商/品牌(中英文,含keyword子字段)
  33 +- `category_path.zh`, `category_path.en` (text) - 类目路径(中英文)
  34 +- `category_name_text.zh`, `category_name_text.en` (text) - 类目名称(中英文)
35 35
36 #### 类目字段 36 #### 类目字段
37 - `category_id` (keyword) - 类目ID 37 - `category_id` (keyword) - 类目ID
@@ -64,12 +64,12 @@ @@ -64,12 +64,12 @@
64 ### 文本召回字段 64 ### 文本召回字段
65 65
66 默认同时搜索以下字段(中英文都包含): 66 默认同时搜索以下字段(中英文都包含):
67 -- `title_zh^3.0`, `title_en^3.0`  
68 -- `brief_zh^1.5`, `brief_en^1.5`  
69 -- `description_zh^1.0`, `description_en^1.0`  
70 -- `vendor_zh^1.5`, `vendor_en^1.5`  
71 -- `category_path_zh^1.5`, `category_path_en^1.5`  
72 -- `category_name_zh^1.5`, `category_name_en^1.5` 67 +- `title.zh^3.0`, `title.en^3.0`
  68 +- `brief.zh^1.5`, `brief.en^1.5`
  69 +- `description.zh^1.0`, `description.en^1.0`
  70 +- `vendor.zh^1.5`, `vendor.en^1.5`
  71 +- `category_path.zh^1.5`, `category_path.en^1.5`
  72 +- `category_name_text.zh^1.5`, `category_name_text.en^1.5`
73 - `tags^1.0` 73 - `tags^1.0`
74 74
75 ### 查询架构 75 ### 查询架构
@@ -126,12 +126,12 @@ @@ -126,12 +126,12 @@
126 - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段 126 - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
127 127
128 映射规则: 128 映射规则:
129 -- `title_zh/en` → `title`  
130 -- `brief_zh/en` → `brief`  
131 -- `description_zh/en` → `description`  
132 -- `vendor_zh/en` → `vendor`  
133 -- `category_path_zh/en` → `category_path`  
134 -- `category_name_zh/en` → `category_name` 129 +- `title.zh/en` → `title`
  130 +- `brief.zh/en` → `brief`
  131 +- `description.zh/en` → `description`
  132 +- `vendor.zh/en` → `vendor`
  133 +- `category_path.zh/en` → `category_path`
  134 +- `category_name_text.zh/en` → `category_name`
135 135
136 ## 配置修改 136 ## 配置修改
137 137
docs/常用查询 - ES.md
@@ -22,7 +22,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_ @@ -22,7 +22,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
22 22
23 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ 23 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
24 "size": 100, 24 "size": 100,
25 - "_source": ["title_zh", "title_en"], 25 + "_source": ["title"],
26 "query": { 26 "query": {
27 "bool": { 27 "bool": {
28 "filter": [ 28 "filter": [
@@ -34,13 +34,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/ @@ -34,13 +34,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
34 34
35 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ 35 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
36 "size": 1, 36 "size": 1,
37 - "_source": ["title_zh", "title_en"], 37 + "_source": ["title"],
38 "query": { 38 "query": {
39 "bool": { 39 "bool": {
40 "must": [ 40 "must": [
41 { 41 {
42 "match": { 42 "match": {
43 - "title_zh": { 43 + "title.zh": {
44 "query": "裙子" 44 "query": "裙子"
45 } 45 }
46 } 46 }
@@ -54,7 +54,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/ @@ -54,7 +54,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
54 }' 54 }'
55 55
56 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ 56 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{
57 - "analyzer": "query_ansj", 57 + "analyzer": "icu_analyzer",
58 "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" 58 "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
59 }' 59 }'
60 60
@@ -73,13 +73,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/ @@ -73,13 +73,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
73 "multi_match": { 73 "multi_match": {
74 "_name": "base_query", 74 "_name": "base_query",
75 "fields": [ 75 "fields": [
76 - "title_zh^3.0",  
77 - "brief_zh^1.5",  
78 - "description_zh",  
79 - "vendor_zh^1.5", 76 + "title.zh^3.0",
  77 + "brief.zh^1.5",
  78 + "description.zh",
  79 + "vendor.zh^1.5",
80 "tags", 80 "tags",
81 - "category_path_zh^1.5",  
82 - "category_name_zh^1.5", 81 + "category_path.zh^1.5",
  82 + "category_name_text.zh^1.5",
83 "option1_values^0.5" 83 "option1_values^0.5"
84 ], 84 ],
85 "minimum_should_match": "75%", 85 "minimum_should_match": "75%",
@@ -110,13 +110,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/ @@ -110,13 +110,13 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
110 "multi_match": { 110 "multi_match": {
111 "_name": "base_query", 111 "_name": "base_query",
112 "fields": [ 112 "fields": [
113 - "title_zh^3.0",  
114 - "brief_zh^1.5",  
115 - "description_zh",  
116 - "vendor_zh^1.5", 113 + "title.zh^3.0",
  114 + "brief.zh^1.5",
  115 + "description.zh",
  116 + "vendor.zh^1.5",
117 "tags", 117 "tags",
118 - "category_path_zh^1.5",  
119 - "category_name_zh^1.5", 118 + "category_path.zh^1.5",
  119 + "category_name_text.zh^1.5",
120 "option1_values^0.5" 120 "option1_values^0.5"
121 ], 121 ],
122 "minimum_should_match": "75%", 122 "minimum_should_match": "75%",
@@ -271,7 +271,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s @@ -271,7 +271,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
271 "size": 1, 271 "size": 1,
272 "_source": [ 272 "_source": [
273 "spu_id", 273 "spu_id",
274 - "title_zh", 274 + "title",
275 "category1_name", 275 "category1_name",
276 "category2_name", 276 "category2_name",
277 "category3_name", 277 "category3_name",
@@ -552,7 +552,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s @@ -552,7 +552,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
552 } 552 }
553 }, 553 },
554 "size": 1, 554 "size": 1,
555 - "_source": ["spu_id", "title_zh", "specifications"] 555 + "_source": ["spu_id", "title", "specifications"]
556 }' 556 }'
557 557
558 ## 4. 统计查询 558 ## 4. 统计查询
@@ -597,7 +597,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s @@ -597,7 +597,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
597 } 597 }
598 }, 598 },
599 "size": 10, 599 "size": 10,
600 - "_source": ["spu_id", "title_zh", "category_name_zh", "category_path_zh"] 600 + "_source": ["spu_id", "title", "category_name_text", "category_path"]
601 }' 601 }'
602 602
603 ### 5.2 查找有option但没有specifications的文档(数据转换问题) 603 ### 5.2 查找有option但没有specifications的文档(数据转换问题)
@@ -614,7 +614,32 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s @@ -614,7 +614,32 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
614 } 614 }
615 }, 615 },
616 "size": 10, 616 "size": 10,
617 - "_source": ["spu_id", "title_zh", "option1_name", "option2_name", "option3_name", "specifications"] 617 + "_source": ["spu_id", "title", "option1_name", "option2_name", "option3_name", "specifications"]
618 }' 618 }'
619 619
620 620
  621 +重排序:
  622 +GET /search_products/_search
  623 +{
  624 + "query": {
  625 + "match": {
  626 + "title.en": {
  627 + "query": "quick brown fox",
  628 + "minimum_should_match": "90%"
  629 + }
  630 + }
  631 + },
  632 + "rescore": {
  633 + "window_size": 50,
  634 + "query": {
  635 + "rescore_query": {
  636 + "match_phrase": {
  637 + "title.en": {
  638 + "query": "quick brown fox",
  639 + "slop": 50
  640 + }
  641 + }
  642 + }
  643 + }
  644 + }
  645 +}
docs/搜索API对接指南.md
@@ -203,7 +203,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \ @@ -203,7 +203,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
203 "category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个 203 "category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个
204 "category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个 204 "category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个
205 "category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个 205 "category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个
206 - "vendor_zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个 206 + "vendor.zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个
207 "tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个 207 "tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个
208 // specifications 嵌套过滤(特殊格式) 208 // specifications 嵌套过滤(特殊格式)
209 "specifications": { 209 "specifications": {
@@ -274,7 +274,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \ @@ -274,7 +274,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
274 - `category_name`: 类目名称 274 - `category_name`: 类目名称
275 - `category1_name`, `category2_name`, `category3_name`: 多级类目 275 - `category1_name`, `category2_name`, `category3_name`: 多级类目
276 - `category_id`: 类目ID 276 - `category_id`: 类目ID
277 -- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段) 277 +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
278 - `tags`: 标签(keyword类型,支持数组) 278 - `tags`: 标签(keyword类型,支持数组)
279 - `option1_name`, `option2_name`, `option3_name`: 选项名称 279 - `option1_name`, `option2_name`, `option3_name`: 选项名称
280 - `specifications`: 规格过滤(嵌套字段,格式见上文) 280 - `specifications`: 规格过滤(嵌套字段,格式见上文)
@@ -602,7 +602,9 @@ curl &quot;http://localhost:6002/search/instant?q=玩具&amp;size=5&quot; @@ -602,7 +602,9 @@ curl &quot;http://localhost:6002/search/instant?q=玩具&amp;size=5&quot;
602 { 602 {
603 "id": "12345", 603 "id": "12345",
604 "source": { 604 "source": {
605 - "title_zh": "芭比时尚娃娃", 605 + "title": {
  606 + "zh": "芭比时尚娃娃"
  607 + },
606 "min_price": 89.99, 608 "min_price": 89.99,
607 "category1_name": "玩具" 609 "category1_name": "玩具"
608 } 610 }
@@ -739,7 +741,7 @@ curl &quot;http://localhost:6002/search/12345&quot; @@ -739,7 +741,7 @@ curl &quot;http://localhost:6002/search/12345&quot;
739 | 字段 | 类型 | 说明 | 741 | 字段 | 类型 | 说明 |
740 |------|------|------| 742 |------|------|------|
741 | `spu_id` | string | SPU ID | 743 | `spu_id` | string | SPU ID |
742 -| `title` | string | 商品标题(根据language参数自动选择title_zh或title_en) | 744 +| `title` | string | 商品标题(根据language参数自动选择 `title.zh` 或 `title.en`) |
743 | `brief` | string | 商品短描述(根据language参数自动选择) | 745 | `brief` | string | 商品短描述(根据language参数自动选择) |
744 | `description` | string | 商品详细描述(根据language参数自动选择) | 746 | `description` | string | 商品详细描述(根据language参数自动选择) |
745 | `vendor` | string | 供应商/品牌(根据language参数自动选择) | 747 | `vendor` | string | 供应商/品牌(根据language参数自动选择) |
@@ -1088,7 +1090,9 @@ curl -X POST &quot;http://localhost:6004/indexer/index&quot; \ @@ -1088,7 +1090,9 @@ curl -X POST &quot;http://localhost:6004/indexer/index&quot; \
1088 "document": { 1090 "document": {
1089 "tenant_id": "162", 1091 "tenant_id": "162",
1090 "spu_id": "123", 1092 "spu_id": "123",
1091 - "title_zh": "商品标题", 1093 + "title": {
  1094 + "zh": "商品标题"
  1095 + },
1092 ... 1096 ...
1093 } 1097 }
1094 }, 1098 },
@@ -1298,7 +1302,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot; @@ -1298,7 +1302,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
1298 "size": 20, 1302 "size": 20,
1299 "language": "zh", 1303 "language": "zh",
1300 "filters": { 1304 "filters": {
1301 - "vendor_zh.keyword": ["品牌A", "品牌B"] 1305 + "vendor.zh.keyword": ["品牌A", "品牌B"]
1302 }, 1306 },
1303 "range_filters": { 1307 "range_filters": {
1304 "min_price": { 1308 "min_price": {
@@ -1505,12 +1509,12 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot; @@ -1505,12 +1509,12 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
1505 |--------|------|------| 1509 |--------|------|------|
1506 | `tenant_id` | keyword | 租户ID(多租户隔离) | 1510 | `tenant_id` | keyword | 租户ID(多租户隔离) |
1507 | `spu_id` | keyword | SPU ID | 1511 | `spu_id` | keyword | SPU ID |
1508 -| `title_zh`, `title_en` | text | 商品标题(中英文) |  
1509 -| `brief_zh`, `brief_en` | text | 商品短描述(中英文) |  
1510 -| `description_zh`, `description_en` | text | 商品详细描述(中英文) |  
1511 -| `vendor_zh`, `vendor_en` | text | 供应商/品牌(中英文,含keyword子字段) |  
1512 -| `category_path_zh`, `category_path_en` | text | 类目路径(中英文,用于搜索) |  
1513 -| `category_name_zh`, `category_name_en` | text | 类目名称(中英文,用于搜索) | 1512 +| `title.<lang>` | object/text | 商品标题(多语言对象,如 `title.zh`, `title.en`) |
  1513 +| `brief.<lang>` | object/text | 商品短描述(多语言对象,如 `brief.zh`, `brief.en`) |
  1514 +| `description.<lang>` | object/text | 商品详细描述(多语言对象,如 `description.zh`, `description.en`) |
  1515 +| `vendor.<lang>` | object/text | 供应商/品牌(多语言对象,且带 keyword 子字段,如 `vendor.zh.keyword`) |
  1516 +| `category_path.<lang>` | object/text | 类目路径(多语言对象,用于搜索,如 `category_path.zh`) |
  1517 +| `category_name_text.<lang>` | object/text | 类目名称(多语言对象,用于搜索,如 `category_name_text.zh`) |
1514 | `category_id` | keyword | 类目ID | 1518 | `category_id` | keyword | 类目ID |
1515 | `category_name` | keyword | 类目名称(用于过滤) | 1519 | `category_name` | keyword | 类目名称(用于过滤) |
1516 | `category_level` | integer | 类目层级 | 1520 | `category_level` | integer | 类目层级 |
@@ -1552,7 +1556,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot; @@ -1552,7 +1556,7 @@ curl -X GET &quot;http://localhost:6004/indexer/health&quot;
1552 - `category_name`: 类目名称 1556 - `category_name`: 类目名称
1553 - `category1_name`, `category2_name`, `category3_name`: 多级类目 1557 - `category1_name`, `category2_name`, `category3_name`: 多级类目
1554 - `category_id`: 类目ID 1558 - `category_id`: 类目ID
1555 -- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段) 1559 +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
1556 - `tags`: 标签(keyword类型) 1560 - `tags`: 标签(keyword类型)
1557 - `option1_name`, `option2_name`, `option3_name`: 选项名称 1561 - `option1_name`, `option2_name`, `option3_name`: 选项名称
1558 - `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解)) 1562 - `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解))
docs/搜索API速查表.md
@@ -19,7 +19,7 @@ POST /search/ @@ -19,7 +19,7 @@ POST /search/
19 "filters": { 19 "filters": {
20 "category_name": "手机", // 单值 20 "category_name": "手机", // 单值
21 "category1_name": "服装", // 一级类目 21 "category1_name": "服装", // 一级类目
22 - "vendor_zh.keyword": ["奇乐", "品牌A"], // 多值(OR) 22 + "vendor.zh.keyword": ["奇乐", "品牌A"], // 多值(OR)
23 "tags": "手机", // 标签 23 "tags": "手机", // 标签
24 // specifications 嵌套过滤 24 // specifications 嵌套过滤
25 "specifications": { 25 "specifications": {
docs/相关性检索优化说明.md
@@ -20,7 +20,7 @@ @@ -20,7 +20,7 @@
20 { 20 {
21 "multi_match": { 21 "multi_match": {
22 "query": "戏水动物", 22 "query": "戏水动物",
23 - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], 23 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
24 "minimum_should_match": "67%", 24 "minimum_should_match": "67%",
25 "tie_breaker": 0.9, 25 "tie_breaker": 0.9,
26 "boost": 1, 26 "boost": 1,
@@ -40,7 +40,7 @@ @@ -40,7 +40,7 @@
40 { 40 {
41 "multi_match": { 41 "multi_match": {
42 "_name": "base_query", 42 "_name": "base_query",
43 - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], 43 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
44 "minimum_should_match": "75%", 44 "minimum_should_match": "75%",
45 "operator": "AND", 45 "operator": "AND",
46 "query": "戏水动物", 46 "query": "戏水动物",
@@ -51,7 +51,7 @@ @@ -51,7 +51,7 @@
51 "multi_match": { 51 "multi_match": {
52 "_name": "base_query_trans_en", 52 "_name": "base_query_trans_en",
53 "boost": 0.4, 53 "boost": 0.4,
54 - "fields": ["title_en^3.0", ...], 54 + "fields": ["title.en^3.0", ...],
55 "minimum_should_match": "75%", 55 "minimum_should_match": "75%",
56 "operator": "AND", 56 "operator": "AND",
57 "query": "water sports", 57 "query": "water sports",
@@ -61,7 +61,7 @@ @@ -61,7 +61,7 @@
61 { 61 {
62 "multi_match": { 62 "multi_match": {
63 "query": "戏水动物", 63 "query": "戏水动物",
64 - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], 64 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
65 "type": "phrase", 65 "type": "phrase",
66 "slop": 2, 66 "slop": 2,
67 "boost": 1.0, 67 "boost": 1.0,
@@ -71,7 +71,7 @@ @@ -71,7 +71,7 @@
71 { 71 {
72 "multi_match": { 72 "multi_match": {
73 "query": "戏水 动物", 73 "query": "戏水 动物",
74 - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], 74 + "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
75 "operator": "AND", 75 "operator": "AND",
76 "tie_breaker": 0.9, 76 "tie_breaker": 0.9,
77 "boost": 0.1, 77 "boost": 0.1,
docs/系统设计文档.md
@@ -31,18 +31,18 @@ @@ -31,18 +31,18 @@
31 { 31 {
32 "tenant_id": "1", 32 "tenant_id": "1",
33 "spu_id": "123", 33 "spu_id": "123",
34 - "title_zh": "蓝牙耳机",  
35 - "title_en": "Bluetooth Headphones",  
36 - "brief_zh": "高品质蓝牙耳机",  
37 - "brief_en": "High-quality Bluetooth headphones", 34 + "title.zh": "蓝牙耳机",
  35 + "title.en": "Bluetooth Headphones",
  36 + "brief.zh": "高品质蓝牙耳机",
  37 + "brief.en": "High-quality Bluetooth headphones",
38 "category_name": "电子产品", 38 "category_name": "电子产品",
39 - "category_path_zh": "电子产品/音频设备/耳机",  
40 - "category_path_en": "Electronics/Audio/Headphones", 39 + "category_path.zh": "电子产品/音频设备/耳机",
  40 + "category_path.en": "Electronics/Audio/Headphones",
41 "category1_name": "电子产品", 41 "category1_name": "电子产品",
42 "category2_name": "音频设备", 42 "category2_name": "音频设备",
43 "category3_name": "耳机", 43 "category3_name": "耳机",
44 - "vendor_zh": "品牌A",  
45 - "vendor_en": "Brand A", 44 + "vendor.zh": "品牌A",
  45 + "vendor.en": "Brand A",
46 "min_price": 199.99, 46 "min_price": 199.99,
47 "max_price": 299.99, 47 "max_price": 299.99,
48 "option1_name": "color", 48 "option1_name": "color",
@@ -93,7 +93,7 @@ @@ -93,7 +93,7 @@
93 - **查询配置硬编码**:查询相关配置(字段 boost、查询域等)硬编码在 `search/query_config.py` 93 - **查询配置硬编码**:查询相关配置(字段 boost、查询域等)硬编码在 `search/query_config.py`
94 94
95 **索引结构特点**: 95 **索引结构特点**:
96 -1. **多语言字段**:所有文本字段支持中英文(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`) 96 +1. **多语言字段**:所有文本字段支持中英文(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`)
97 2. **嵌套字段**: 97 2. **嵌套字段**:
98 - `skus`: SKU 嵌套数组(包含价格、库存、选项值等) 98 - `skus`: SKU 嵌套数组(包含价格、库存、选项值等)
99 - `specifications`: 规格嵌套数组(包含 name、value、sku_id) 99 - `specifications`: 规格嵌套数组(包含 name、value、sku_id)
@@ -125,12 +125,12 @@ @@ -125,12 +125,12 @@
125 - `create_time`, `update_time` (date): 创建和更新时间 125 - `create_time`, `update_time` (date): 创建和更新时间
126 126
127 #### 多语言文本字段 127 #### 多语言文本字段
128 -- `title_zh/en` (text): 标题(中英文)  
129 -- `brief_zh/en` (text): 短描述(中英文)  
130 -- `description_zh/en` (text): 详细描述(中英文)  
131 -- `vendor_zh/en` (text): 供应商/品牌(中英文)  
132 -- `category_path_zh/en` (text): 类目路径(中英文)  
133 -- `category_name_zh/en` (text): 类目名称(中英文) 128 +- `title.zh/en` (text): 标题(中英文)
  129 +- `brief.zh/en` (text): 短描述(中英文)
  130 +- `description.zh/en` (text): 详细描述(中英文)
  131 +- `vendor.zh/en` (text): 供应商/品牌(中英文)
  132 +- `category_path.zh/en` (text): 类目路径(中英文)
  133 +- `category_name_text.zh/en` (text): 类目名称(中英文)
134 134
135 **分析器配置**: 135 **分析器配置**:
136 - 中文字段:`hanlp_index`(索引时)/ `hanlp_standard`(查询时) 136 - 中文字段:`hanlp_index`(索引时)/ `hanlp_standard`(查询时)
@@ -282,7 +282,7 @@ indexes: @@ -282,7 +282,7 @@ indexes:
282 2. **数据转换**(`indexer/spu_transformer.py`): 282 2. **数据转换**(`indexer/spu_transformer.py`):
283 - 按`spu_id`和`tenant_id`关联SPU和SKU数据 283 - 按`spu_id`和`tenant_id`关联SPU和SKU数据
284 - **多语言字段映射**: 284 - **多语言字段映射**:
285 - - MySQL的`title` → ES的`title_zh`(英文字段设为空) 285 + - MySQL的`title` → ES的`title.zh`(英文字段设为空)
286 - 其他文本字段类似处理 286 - 其他文本字段类似处理
287 - **分类字段映射**: 287 - **分类字段映射**:
288 - 从SPU表的`category_path`解析多级类目(`category1_name`, `category2_name`, `category3_name`) 288 - 从SPU表的`category_path`解析多级类目(`category1_name`, `category2_name`, `category3_name`)
@@ -483,7 +483,7 @@ laptop AND (gaming OR professional) ANDNOT cheap @@ -483,7 +483,7 @@ laptop AND (gaming OR professional) ANDNOT cheap
483 - 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系 483 - 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系
484 - 使用ES的`nested`查询实现 484 - 使用ES的`nested`查询实现
485 - **text_recall**: 文本相关性召回 485 - **text_recall**: 文本相关性召回
486 - - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`) 486 + - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`)
487 - 使用 `multi_match` 查询,支持字段 boost 487 - 使用 `multi_match` 查询,支持字段 boost
488 - 中文字段使用中文分词器,英文字段使用英文分析器 488 - 中文字段使用中文分词器,英文字段使用英文分析器
489 - **embedding_recall**: 向量召回(KNN) 489 - **embedding_recall**: 向量召回(KNN)
@@ -503,8 +503,8 @@ laptop AND (gaming OR professional) ANDNOT cheap @@ -503,8 +503,8 @@ laptop AND (gaming OR professional) ANDNOT cheap
503 "multi_match": { 503 "multi_match": {
504 "query": "手机", 504 "query": "手机",
505 "fields": [ 505 "fields": [
506 - "title_zh^3.0", "title_en^3.0",  
507 - "brief_zh^1.5", "brief_en^1.5", 506 + "title.zh^3.0", "title.en^3.0",
  507 + "brief.zh^1.5", "brief.en^1.5",
508 ... 508 ...
509 ] 509 ]
510 } 510 }
docs/系统设计文档v1.md
@@ -424,7 +424,7 @@ laptop AND (gaming OR professional) ANDNOT cheap @@ -424,7 +424,7 @@ laptop AND (gaming OR professional) ANDNOT cheap
424 - **结构**: `filters AND (text_recall OR embedding_recall)` 424 - **结构**: `filters AND (text_recall OR embedding_recall)`
425 - **filters**: 前端传递的过滤条件(永远起作用,放在 `filter` 中) 425 - **filters**: 前端传递的过滤条件(永远起作用,放在 `filter` 中)
426 - **text_recall**: 文本相关性召回 426 - **text_recall**: 文本相关性召回
427 - - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`) 427 + - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`)
428 - 使用 `multi_match` 查询,支持字段 boost 428 - 使用 `multi_match` 查询,支持字段 boost
429 - **embedding_recall**: 向量召回(KNN) 429 - **embedding_recall**: 向量召回(KNN)
430 - 使用 `title_embedding` 字段进行 KNN 搜索 430 - 使用 `title_embedding` 字段进行 KNN 搜索
@@ -443,8 +443,8 @@ laptop AND (gaming OR professional) ANDNOT cheap @@ -443,8 +443,8 @@ laptop AND (gaming OR professional) ANDNOT cheap
443 "multi_match": { 443 "multi_match": {
444 "query": "手机", 444 "query": "手机",
445 "fields": [ 445 "fields": [
446 - "title_zh^3.0", "title_en^3.0",  
447 - "brief_zh^1.5", "brief_en^1.5", 446 + "title.zh^3.0", "title.en^3.0",
  447 + "brief.zh^1.5", "brief.en^1.5",
448 ... 448 ...
449 ] 449 ]
450 } 450 }
docs/索引字段说明v1.md
@@ -539,7 +539,7 @@ title brief description seo_title seo_description seo_keywords vendor vendor_key @@ -539,7 +539,7 @@ title brief description seo_title seo_description seo_keywords vendor vendor_key
539 539
540 2. tenant - 数据灌入: 540 2. tenant - 数据灌入:
541 对每个tenant设置一一个语言,作为tenant的一个基本配置。 541 对每个tenant设置一一个语言,作为tenant的一个基本配置。
542 -写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title_en) 542 +写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title.en)
543 查询的时候,将query转为商家所用语言,并到对应的field去查。 543 查询的时候,将query转为商家所用语言,并到对应的field去查。
544 544
545 545
docs/索引字段说明v2-mapping结构.md
@@ -16,22 +16,22 @@ @@ -16,22 +16,22 @@
16 }, 16 },
17 17
18 // 文本相关性相关字段 18 // 文本相关性相关字段
19 - "title_zh": { 19 + "title.zh": {
20 "type": "text", 20 "type": "text",
21 "analyzer": "hanlp_index", 21 "analyzer": "hanlp_index",
22 "search_analyzer": "hanlp_standard" 22 "search_analyzer": "hanlp_standard"
23 }, 23 },
24 - "brief_zh": { 24 + "brief.zh": {
25 "type": "text", 25 "type": "text",
26 "analyzer": "hanlp_index", 26 "analyzer": "hanlp_index",
27 "search_analyzer": "hanlp_standard" 27 "search_analyzer": "hanlp_standard"
28 }, 28 },
29 - "description_zh": { 29 + "description.zh": {
30 "type": "text", 30 "type": "text",
31 "analyzer": "hanlp_index", 31 "analyzer": "hanlp_index",
32 "search_analyzer": "hanlp_standard" 32 "search_analyzer": "hanlp_standard"
33 }, 33 },
34 - "vendor_zh": { 34 + "vendor.zh": {
35 "type": "text", 35 "type": "text",
36 "analyzer": "hanlp_index", 36 "analyzer": "hanlp_index",
37 "search_analyzer": "hanlp_standard", 37 "search_analyzer": "hanlp_standard",
@@ -43,23 +43,23 @@ @@ -43,23 +43,23 @@
43 } 43 }
44 }, 44 },
45 45
46 - "title_en": { 46 + "title.en": {
47 "type": "text", 47 "type": "text",
48 "analyzer": "english", 48 "analyzer": "english",
49 "search_analyzer": "english", 49 "search_analyzer": "english",
50 }, 50 },
51 - "brief_en": { 51 + "brief.en": {
52 "type": "text", 52 "type": "text",
53 "analyzer": "english", 53 "analyzer": "english",
54 "search_analyzer": "english", 54 "search_analyzer": "english",
55 55
56 }, 56 },
57 - "description_en": { 57 + "description.en": {
58 "type": "text", 58 "type": "text",
59 "analyzer": "english", 59 "analyzer": "english",
60 "search_analyzer": "english", 60 "search_analyzer": "english",
61 }, 61 },
62 - "vendor_en": { 62 + "vendor.en": {
63 "type": "text", 63 "type": "text",
64 "analyzer": "english", 64 "analyzer": "english",
65 "search_analyzer": "english", 65 "search_analyzer": "english",
@@ -103,22 +103,22 @@ @@ -103,22 +103,22 @@
103 }, 103 },
104 104
105 // 分类相关 105 // 分类相关
106 - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 106 + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
107 "type": "text", 107 "type": "text",
108 "analyzer": "hanlp_index", 108 "analyzer": "hanlp_index",
109 "search_analyzer": "hanlp_standard" 109 "search_analyzer": "hanlp_standard"
110 }, 110 },
111 - "category_path_en": { // 提供模糊查询功能,辅助相关性计算 111 + "category_path.en": { // 提供模糊查询功能,辅助相关性计算
112 "type": "text", 112 "type": "text",
113 "analyzer": "english", 113 "analyzer": "english",
114 "search_analyzer": "english" 114 "search_analyzer": "english"
115 }, 115 },
116 - "category_name_zh": { // 提供模糊查询功能,辅助相关性计算 116 + "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
117 "type": "text", 117 "type": "text",
118 "analyzer": "hanlp_index", 118 "analyzer": "hanlp_index",
119 "search_analyzer": "hanlp_standard" 119 "search_analyzer": "hanlp_standard"
120 }, 120 },
121 - "category_name_en": { // 提供模糊查询功能,辅助相关性计算 121 + "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
122 "type": "text", 122 "type": "text",
123 "analyzer": "english", 123 "analyzer": "english",
124 "search_analyzer": "english" 124 "search_analyzer": "english"
docs/索引字段说明v2-plan.md
@@ -10,7 +10,7 @@ @@ -10,7 +10,7 @@
10 10
11 #### 1.1 多语言文本字段 11 #### 1.1 多语言文本字段
12 12
13 -- 为文本字段添加中英文双字段支持(title_zh/title_en, brief_zh/brief_en, description_zh/description_en, vendor_zh/vendor_en) 13 +- 为文本字段添加中英文双字段支持(title.zh/title.en, brief.zh/brief.en, description.zh/description.en, vendor.zh/vendor.en)
14 - 中文字段使用 `index_ansj`/`query_ansj` 分析器(对应文档中的hanlp_index/hanlp_standard) 14 - 中文字段使用 `index_ansj`/`query_ansj` 分析器(对应文档中的hanlp_index/hanlp_standard)
15 - 英文字段使用 `english` 分析器 15 - 英文字段使用 `english` 分析器
16 - **暂时只填充中文字段,英文字段设为空**(不需要语言检测,每个tenant的语言预先知道) 16 - **暂时只填充中文字段,英文字段设为空**(不需要语言检测,每个tenant的语言预先知道)
@@ -28,22 +28,22 @@ category_path varchar(500) @@ -28,22 +28,22 @@ category_path varchar(500)
28 28
29 mapping: 29 mapping:
30 30
31 - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 31 + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
32 "type": "text", 32 "type": "text",
33 "analyzer": "hanlp_index", 33 "analyzer": "hanlp_index",
34 "search_analyzer": "hanlp_standard" 34 "search_analyzer": "hanlp_standard"
35 }, 35 },
36 - "category_path_en": { // 提供模糊查询功能,辅助相关性计算 36 + "category_path.en": { // 提供模糊查询功能,辅助相关性计算
37 "type": "text", 37 "type": "text",
38 "analyzer": "english", 38 "analyzer": "english",
39 "search_analyzer": "english" 39 "search_analyzer": "english"
40 }, 40 },
41 - "category_name_zh": { // 提供模糊查询功能,辅助相关性计算 41 + "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
42 "type": "text", 42 "type": "text",
43 "analyzer": "hanlp_index", 43 "analyzer": "hanlp_index",
44 "search_analyzer": "hanlp_standard" 44 "search_analyzer": "hanlp_standard"
45 }, 45 },
46 - "category_name_en": { // 提供模糊查询功能,辅助相关性计算 46 + "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
47 "type": "text", 47 "type": "text",
48 "analyzer": "english", 48 "analyzer": "english",
49 "search_analyzer": "english" 49 "search_analyzer": "english"
@@ -104,15 +104,15 @@ mapping: @@ -104,15 +104,15 @@ mapping:
104 104
105 #### 2.1 多语言文本处理 105 #### 2.1 多语言文本处理
106 106
107 -- **简化处理**:暂时只填充中文字段(title_zh, brief_zh, description_zh, vendor_zh)  
108 -- 英文字段(title_en, brief_en, description_en, vendor_en)设为空或None 107 +- **简化处理**:暂时只填充中文字段(title.zh, brief.zh, description.zh, vendor.zh)
  108 +- 英文字段(title.en, brief.en, description.en, vendor.en)设为空或None
109 - 不需要语言检测逻辑 109 - 不需要语言检测逻辑
110 110
111 #### 2.2 分类路径解析 111 #### 2.2 分类路径解析
112 112
113 - 从 `category_path` 字段按 "/" 分割提取分类层级 113 - 从 `category_path` 字段按 "/" 分割提取分类层级
114 - 分割结果赋值给 `category1_name`, `category2_name`, `category3_name` 114 - 分割结果赋值给 `category1_name`, `category2_name`, `category3_name`
115 -- 生成 `category_path_zh`(暂时填充,`category_path_en` 设为空) 115 +- 生成 `category_path.zh`(暂时填充,`category_path.en` 设为空)
116 116
117 #### 2.3 SKU字段展开计算 117 #### 2.3 SKU字段展开计算
118 118
@@ -152,7 +152,7 @@ mapping: @@ -152,7 +152,7 @@ mapping:
152 152
153 **文件**: `indexer/spu_transformer.py` 153 **文件**: `indexer/spu_transformer.py`
154 154
155 -- **简化多语言处理**:只填充中文字段(title_zh, brief_zh等),英文字段设为空或None 155 +- **简化多语言处理**:只填充中文字段(title.zh, brief.zh等),英文字段设为空或None
156 - 实现分类路径的解析和展开 156 - 实现分类路径的解析和展开
157 - 实现SKU字段的展开计算(价格、重量、库存) 157 - 实现SKU字段的展开计算(价格、重量、库存)
158 - 实现选项字段的处理 158 - 实现选项字段的处理
docs/索引字段说明v2.md
@@ -28,24 +28,24 @@ @@ -28,24 +28,24 @@
28 28
29 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | 29 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
30 |--------|--------|--------|------|----------| 30 |--------|--------|--------|------|----------|
31 -| `title_zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` |  
32 -| `title_en` | text | english | 英文标题 | 暂为空(待翻译服务填充) | 31 +| `title.zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` |
  32 +| `title.en` | text | english | 英文标题 | 暂为空(待翻译服务填充) |
33 33
34 #### 2.2 描述字段 34 #### 2.2 描述字段
35 35
36 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | 36 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
37 |--------|--------|--------|------|----------| 37 |--------|--------|--------|------|----------|
38 -| `brief_zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` |  
39 -| `brief_en` | text | english | 英文短描述 | 暂为空 |  
40 -| `description_zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` |  
41 -| `description_en` | text | english | 英文详细描述 | 暂为空 | 38 +| `brief.zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` |
  39 +| `brief.en` | text | english | 英文短描述 | 暂为空 |
  40 +| `description.zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` |
  41 +| `description.en` | text | english | 英文详细描述 | 暂为空 |
42 42
43 #### 2.3 供应商/品牌字段 43 #### 2.3 供应商/品牌字段
44 44
45 | 字段名 | ES类型 | 分析器 | 子字段 | 说明 | 数据来源 | 45 | 字段名 | ES类型 | 分析器 | 子字段 | 说明 | 数据来源 |
46 |--------|--------|--------|--------|------|----------| 46 |--------|--------|--------|--------|------|----------|
47 -| `vendor_zh` | text | hanlp_index / hanlp_standard | `vendor_zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` |  
48 -| `vendor_en` | text | english | `vendor_en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 | 47 +| `vendor.zh` | text | hanlp_index / hanlp_standard | `vendor.zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` |
  48 +| `vendor.en` | text | english | `vendor.en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 |
49 49
50 **用途**: 50 **用途**:
51 - `text` 类型:用于全文搜索(支持模糊匹配) 51 - `text` 类型:用于全文搜索(支持模糊匹配)
@@ -65,15 +65,15 @@ @@ -65,15 +65,15 @@
65 65
66 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | 66 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
67 |--------|--------|--------|------|----------| 67 |--------|--------|--------|------|----------|
68 -| `category_path_zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` |  
69 -| `category_path_en` | text | english | 英文类目路径 | 暂为空 | 68 +| `category_path.zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` |
  69 +| `category_path.en` | text | english | 英文类目路径 | 暂为空 |
70 70
71 #### 4.2 类目名称(用于搜索) 71 #### 4.2 类目名称(用于搜索)
72 72
73 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | 73 | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
74 |--------|--------|--------|------|----------| 74 |--------|--------|--------|------|----------|
75 -| `category_name_zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` |  
76 -| `category_name_en` | text | english | 英文类目名称 | 暂为空 | 75 +| `category_name_text.zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` |
  76 +| `category_name_text.en` | text | english | 英文类目名称 | 暂为空 |
77 77
78 #### 4.3 类目标识(用于过滤和分面) 78 #### 4.3 类目标识(用于过滤和分面)
79 79
@@ -87,7 +87,7 @@ @@ -87,7 +87,7 @@
87 | `category3_name` | keyword | 三级类目名称 | 从 `category_path` 解析 | 87 | `category3_name` | keyword | 三级类目名称 | 从 `category_path` 解析 |
88 88
89 **用途**: 89 **用途**:
90 -- `category_path_zh/en`, `category_name_zh/en`: 用于全文搜索,支持模糊匹配 90 +- `category_path.zh/en`, `category_name_text.zh/en`: 用于全文搜索,支持模糊匹配
91 - `category_id`, `category_name`, `category_level`, `category1/2/3_name`: 用于精确过滤和分面聚合 91 - `category_id`, `category_name`, `category_level`, `category1/2/3_name`: 用于精确过滤和分面聚合
92 92
93 ### 5. 规格字段(Specifications) 93 ### 5. 规格字段(Specifications)
@@ -348,20 +348,20 @@ @@ -348,20 +348,20 @@
348 348
349 ### 搜索字段(参与相关性计算) 349 ### 搜索字段(参与相关性计算)
350 350
351 -- `title_zh`, `title_en` (boost: 3.0)  
352 -- `brief_zh`, `brief_en` (boost: 1.5)  
353 -- `description_zh`, `description_en` (boost: 1.0)  
354 -- `vendor_zh`, `vendor_en` (boost: 1.5) 351 +- `title.zh`, `title.en` (boost: 3.0)
  352 +- `brief.zh`, `brief.en` (boost: 1.5)
  353 +- `description.zh`, `description.en` (boost: 1.0)
  354 +- `vendor.zh`, `vendor.en` (boost: 1.5)
355 - `tags` (boost: 1.0) 355 - `tags` (boost: 1.0)
356 -- `category_path_zh`, `category_path_en` (boost: 1.5)  
357 -- `category_name_zh`, `category_name_en` (boost: 1.5) 356 +- `category_path.zh`, `category_path.en` (boost: 1.5)
  357 +- `category_name_text.zh`, `category_name_text.en` (boost: 1.5)
358 - `title_embedding` (向量召回,boost: 0.2) 358 - `title_embedding` (向量召回,boost: 0.2)
359 359
360 ### 过滤字段(精确匹配) 360 ### 过滤字段(精确匹配)
361 361
362 - `tenant_id` (必需,多租户隔离) 362 - `tenant_id` (必需,多租户隔离)
363 - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name` 363 - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name`
364 -- `vendor_zh.keyword`, `vendor_en.keyword` 364 +- `vendor.zh.keyword`, `vendor.en.keyword`
365 - `specifications` (嵌套查询) 365 - `specifications` (嵌套查询)
366 - `min_price`, `max_price` (范围过滤) 366 - `min_price`, `max_price` (范围过滤)
367 - `sales` (范围过滤) 367 - `sales` (范围过滤)
@@ -395,12 +395,12 @@ @@ -395,12 +395,12 @@
395 - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段 395 - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
396 396
397 映射到前端字段: 397 映射到前端字段:
398 -- `title_zh/en` → `title`  
399 -- `brief_zh/en` → `brief`  
400 -- `description_zh/en` → `description`  
401 -- `vendor_zh/en` → `vendor`  
402 -- `category_path_zh/en` → `category_path`  
403 -- `category_name_zh/en` → `category_name` 398 +- `title.zh/en` → `title`
  399 +- `brief.zh/en` → `brief`
  400 +- `description.zh/en` → `description`
  401 +- `vendor.zh/en` → `vendor`
  402 +- `category_path.zh/en` → `category_path`
  403 +- `category_name_text.zh/en` → `category_name`
404 404
405 ### 规格数据构建 405 ### 规格数据构建
406 406
@@ -434,12 +434,12 @@ filters AND (text_recall OR embedding_recall) @@ -434,12 +434,12 @@ filters AND (text_recall OR embedding_recall)
434 ### 文本召回字段 434 ### 文本召回字段
435 435
436 根据查询词的语言选择对应的索引字段: 436 根据查询词的语言选择对应的索引字段:
437 -- `title_zh^3.0`, `title_en^3.0`  
438 -- `brief_zh^1.5`, `brief_en^1.5`  
439 -- `description_zh^1.0`, `description_en^1.0`  
440 -- `vendor_zh^1.5`, `vendor_en^1.5`  
441 -- `category_path_zh^1.5`, `category_path_en^1.5`  
442 -- `category_name_zh^1.5`, `category_name_en^1.5` 437 +- `title.zh^3.0`, `title.en^3.0`
  438 +- `brief.zh^1.5`, `brief.en^1.5`
  439 +- `description.zh^1.0`, `description.en^1.0`
  440 +- `vendor.zh^1.5`, `vendor.en^1.5`
  441 +- `category_path.zh^1.5`, `category_path.en^1.5`
  442 +- `category_name_text.zh^1.5`, `category_name_text.en^1.5`
443 - `tags^1.0` 443 - `tags^1.0`
444 444
445 ## 注意事项 445 ## 注意事项
docs/索引数据接口文档___old.md
@@ -52,12 +52,12 @@ tenant_config: @@ -52,12 +52,12 @@ tenant_config:
52 ### 配置规则 52 ### 配置规则
53 53
54 1. **主语言**:指定SKU表中 `title`、`brief`、`description`、`vendor` 等字段的语言。 54 1. **主语言**:指定SKU表中 `title`、`brief`、`description`、`vendor` 等字段的语言。
55 - - 如果主语言是 `zh`,这些字段的值会填充到 `title_zh`、`brief_zh` 等字段  
56 - - 如果主语言是 `en`,这些字段的值会填充到 `title_en`、`brief_en` 等字段 55 + - 如果主语言是 `zh`,这些字段的值会填充到 `title.zh`、`brief.zh` 等字段
  56 + - 如果主语言是 `en`,这些字段的值会填充到 `title.en`、`brief.en` 等字段
57 57
58 2. **翻译配置**: 58 2. **翻译配置**:
59 - - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title_en` 等字段  
60 - - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title_zh` 等字段 59 + - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title.en` 等字段
  60 + - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title.zh` 等字段
61 - **注意**:如果主语言本身就是目标语言,则不会触发翻译(例如主语言是英文,`translate_to_en: true` 不会触发翻译) 61 - **注意**:如果主语言本身就是目标语言,则不会触发翻译(例如主语言是英文,`translate_to_en: true` 不会触发翻译)
62 62
63 3. **默认配置**:如果租户ID不在 `tenants` 中,则使用 `default` 配置。 63 3. **默认配置**:如果租户ID不在 `tenants` 中,则使用 `default` 配置。
@@ -72,8 +72,8 @@ tenant_config: @@ -72,8 +72,8 @@ tenant_config:
72 "translate_to_zh": false 72 "translate_to_zh": false
73 } 73 }
74 ``` 74 ```
75 -- SKU表的 `title` 字段(中文)→ `title_zh`  
76 -- 翻译服务将中文翻译为英文 → `title_en` 75 +- SKU表的 `title` 字段(中文)→ `title.zh`
  76 +- 翻译服务将中文翻译为英文 → `title.en`
77 77
78 **示例2:英文主语言,需要翻译中文** 78 **示例2:英文主语言,需要翻译中文**
79 ```json 79 ```json
@@ -83,8 +83,8 @@ tenant_config: @@ -83,8 +83,8 @@ tenant_config:
83 "translate_to_zh": true 83 "translate_to_zh": true
84 } 84 }
85 ``` 85 ```
86 -- SKU表的 `title` 字段(英文)→ `title_en`  
87 -- 翻译服务将英文翻译为中文 → `title_zh` 86 +- SKU表的 `title` 字段(英文)→ `title.en`
  87 +- 翻译服务将英文翻译为中文 → `title.zh`
88 88
89 **示例3:仅使用主语言,不翻译** 89 **示例3:仅使用主语言,不翻译**
90 ```json 90 ```json
@@ -94,8 +94,8 @@ tenant_config: @@ -94,8 +94,8 @@ tenant_config:
94 "translate_to_zh": false 94 "translate_to_zh": false
95 } 95 }
96 ``` 96 ```
97 -- SKU表的 `title` 字段(中文)→ `title_zh`  
98 -- `title_en` 保持为 `null` 97 +- SKU表的 `title` 字段(中文)→ `title.zh`
  98 +- `title.en` 保持为 `null`
99 99
100 ### 配置更新 100 ### 配置更新
101 101
@@ -272,19 +272,19 @@ String json = response.body().string(); @@ -272,19 +272,19 @@ String json = response.body().string();
272 { 272 {
273 "tenant_id": "1", 273 "tenant_id": "1",
274 "spu_id": "123", 274 "spu_id": "123",
275 - "title_zh": "商品标题",  
276 - "title_en": null,  
277 - "brief_zh": "商品简介",  
278 - "brief_en": null,  
279 - "description_zh": "商品详细描述",  
280 - "description_en": null,  
281 - "vendor_zh": "供应商名称",  
282 - "vendor_en": null, 275 + "title.zh": "商品标题",
  276 + "title.en": null,
  277 + "brief.zh": "商品简介",
  278 + "brief.en": null,
  279 + "description.zh": "商品详细描述",
  280 + "description.en": null,
  281 + "vendor.zh": "供应商名称",
  282 + "vendor.en": null,
283 "tags": ["标签1", "标签2"], 283 "tags": ["标签1", "标签2"],
284 - "category_path_zh": "类目1/类目2/类目3",  
285 - "category_path_en": null,  
286 - "category_name_zh": "类目名称",  
287 - "category_name_en": null, 284 + "category_path.zh": "类目1/类目2/类目3",
  285 + "category_path.en": null,
  286 + "category_name_text.zh": "类目名称",
  287 + "category_name_text.en": null,
288 "category_id": "100", 288 "category_id": "100",
289 "category_name": "类目名称", 289 "category_name": "类目名称",
290 "category_level": 3, 290 "category_level": 3,
@@ -453,11 +453,11 @@ for (String spuId : changedSpuIds) { @@ -453,11 +453,11 @@ for (String spuId : changedSpuIds) {
453 |---------|--------|------|------| 453 |---------|--------|------|------|
454 | 基础标识 | `tenant_id` | keyword | 租户ID | 454 | 基础标识 | `tenant_id` | keyword | 租户ID |
455 | 基础标识 | `spu_id` | keyword | SPU ID | 455 | 基础标识 | `spu_id` | keyword | SPU ID |
456 -| 文本字段 | `title_zh`, `title_en` | text | 标题(中英文) |  
457 -| 文本字段 | `brief_zh`, `brief_en` | text | 简介(中英文) |  
458 -| 文本字段 | `description_zh`, `description_en` | text | 描述(中英文) |  
459 -| 文本字段 | `vendor_zh`, `vendor_en` | text | 供应商(中英文) |  
460 -| 类目字段 | `category_path_zh`, `category_path_en` | text | 类目路径(中英文) | 456 +| 文本字段 | `title.zh`, `title.en` | text | 标题(中英文) |
  457 +| 文本字段 | `brief.zh`, `brief.en` | text | 简介(中英文) |
  458 +| 文本字段 | `description.zh`, `description.en` | text | 描述(中英文) |
  459 +| 文本字段 | `vendor.zh`, `vendor.en` | text | 供应商(中英文) |
  460 +| 类目字段 | `category_path.zh`, `category_path.en` | text | 类目路径(中英文) |
461 | 类目字段 | `category1_name`, `category2_name`, `category3_name` | keyword | 分层类目名称 | 461 | 类目字段 | `category1_name`, `category2_name`, `category3_name` | keyword | 分层类目名称 |
462 | 价格字段 | `min_price`, `max_price` | float | 价格范围 | 462 | 价格字段 | `min_price`, `max_price` | float | 价格范围 |
463 | 库存字段 | `total_inventory` | long | 总库存 | 463 | 库存字段 | `total_inventory` | long | 总库存 |
docs/索引方案.md
@@ -178,12 +178,12 @@ category_path varchar(500) @@ -178,12 +178,12 @@ category_path varchar(500)
178 方案:采用方案2 178 方案:采用方案2
179 4. categoryPath索引 + Prefix 查询(categoryPath.keyword: "服装/男装")(如果满足条件的key太多的则性能较差,比如 查询的是一级类目,类目树叶子节点太多时性能较差) 179 4. categoryPath索引 + Prefix 查询(categoryPath.keyword: "服装/男装")(如果满足条件的key太多的则性能较差,比如 查询的是一级类目,类目树叶子节点太多时性能较差)
180 5. categoryPath支撑模糊查询 和 多级cate keyword索引支撑精确查询。 索引阶段冗余,查询性能高。 180 5. categoryPath支撑模糊查询 和 多级cate keyword索引支撑精确查询。 索引阶段冗余,查询性能高。
181 - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 181 + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
182 "type": "text", 182 "type": "text",
183 "analyzer": "hanlp_index", 183 "analyzer": "hanlp_index",
184 "search_analyzer": "hanlp_standard" 184 "search_analyzer": "hanlp_standard"
185 }, 185 },
186 - "category_path_en": { // 提供模糊查询功能,辅助相关性计算 186 + "category_path.en": { // 提供模糊查询功能,辅助相关性计算
187 "type": "text", 187 "type": "text",
188 "analyzer": "english", 188 "analyzer": "english",
189 "search_analyzer": "english" 189 "search_analyzer": "english"
docs/翻译功能测试说明.md
@@ -22,8 +22,8 @@ @@ -22,8 +22,8 @@
22 ```yaml 22 ```yaml
23 translation_prompts: 23 translation_prompts:
24 # 商品标题翻译提示词 24 # 商品标题翻译提示词
25 - product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"  
26 - product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." 25 + product_title.zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
  26 + product_title.en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
27 # query翻译提示词 27 # query翻译提示词
28 query_zh: "电商领域" 28 query_zh: "电商领域"
29 query_en: "e-commerce domain" 29 query_en: "e-commerce domain"
@@ -35,8 +35,8 @@ translation_prompts: @@ -35,8 +35,8 @@ translation_prompts:
35 ### 提示词使用规则 35 ### 提示词使用规则
36 36
37 1. **商品标题翻译**: 37 1. **商品标题翻译**:
38 - - 中文→英文:使用 `product_title_en`  
39 - - 英文→中文:使用 `product_title_zh` 38 + - 中文→英文:使用 `product_title.en`
  39 + - 英文→中文:使用 `product_title.zh`
40 40
41 2. **其他字段翻译**(brief, description, vendor): 41 2. **其他字段翻译**(brief, description, vendor):
42 - 根据目标语言选择 `default_zh` 或 `default_en` 42 - 根据目标语言选择 `default_zh` 或 `default_en`
@@ -72,7 +72,7 @@ translator = Translator( @@ -72,7 +72,7 @@ translator = Translator(
72 72
73 # 测试商品标题翻译 73 # 测试商品标题翻译
74 text = "蓝牙耳机" 74 text = "蓝牙耳机"
75 -prompt = config.query_config.translation_prompts.get('product_title_en') 75 +prompt = config.query_config.translation_prompts.get('product_title.en')
76 result = translator.translate( 76 result = translator.translate(
77 text, 77 text,
78 target_lang='en', 78 target_lang='en',
@@ -140,8 +140,8 @@ doc = transformer.transform_spu_to_doc( @@ -140,8 +140,8 @@ doc = transformer.transform_spu_to_doc(
140 options=pd.DataFrame() 140 options=pd.DataFrame()
141 ) 141 )
142 142
143 -print(f"title_zh: {doc.get('title_zh')}")  
144 -print(f"title_en: {doc.get('title_en')}") # 应该包含翻译结果 143 +print(f"title.zh: {doc.get('title.zh')}")
  144 +print(f"title.en: {doc.get('title.en')}") # 应该包含翻译结果
145 ``` 145 ```
146 146
147 ### 5. 测试缓存功能 147 ### 5. 测试缓存功能
indexer/document_transformer.py
@@ -184,17 +184,40 @@ class SPUDocumentTransformer: @@ -184,17 +184,40 @@ class SPUDocumentTransformer:
184 translate_to_en = bool(self.tenant_config.get('translate_to_en')) 184 translate_to_en = bool(self.tenant_config.get('translate_to_en'))
185 translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) 185 translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))
186 186
  187 + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None):
  188 + """
  189 + Write multilingual text field as an object, e.g.:
  190 + doc[field_name] = {"zh": "...", "en": "..."}
  191 + Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh.
  192 + """
  193 + if not source_text or not str(source_text).strip():
  194 + return
  195 +
  196 + obj: Dict[str, str] = {}
  197 + src = str(source_text)
  198 + obj[primary_lang] = src
  199 +
  200 + tr = translations or {}
  201 + if translate_to_en and primary_lang != "en":
  202 + en_text = tr.get("en")
  203 + if en_text and str(en_text).strip():
  204 + obj["en"] = str(en_text)
  205 + if translate_to_zh and primary_lang != "zh":
  206 + zh_text = tr.get("zh")
  207 + if zh_text and str(zh_text).strip():
  208 + obj["zh"] = str(zh_text)
  209 +
  210 + if obj:
  211 + doc[field_name] = obj
  212 +
187 # Title 213 # Title
188 if pd.notna(spu_row.get('title')): 214 if pd.notna(spu_row.get('title')):
189 title_text = str(spu_row['title']) 215 title_text = str(spu_row['title'])
190 -  
191 - # 使用translator的translate_for_indexing方法,自动处理多语言翻译 216 +
  217 + translations: Dict[str, str] = {}
192 if self.translator: 218 if self.translator:
193 - # 根据目标语言选择对应的提示词  
194 prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') 219 prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh')
195 prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') 220 prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en')
196 -  
197 - # 调用translate_for_indexing,自动处理翻译逻辑  
198 translations = self.translator.translate_for_indexing( 221 translations = self.translator.translate_for_indexing(
199 title_text, 222 title_text,
200 shop_language=primary_lang, 223 shop_language=primary_lang,
@@ -202,26 +225,14 @@ class SPUDocumentTransformer: @@ -202,26 +225,14 @@ class SPUDocumentTransformer:
202 prompt=prompt_zh if primary_lang == 'zh' else prompt_en, 225 prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
203 translate_to_en=translate_to_en, 226 translate_to_en=translate_to_en,
204 translate_to_zh=translate_to_zh, 227 translate_to_zh=translate_to_zh,
205 - )  
206 -  
207 - # 填充翻译结果  
208 - doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)  
209 - doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None)  
210 - else:  
211 - # 无翻译器,只填充主语言字段  
212 - if primary_lang == 'zh':  
213 - doc['title_zh'] = title_text  
214 - doc['title_en'] = None  
215 - else:  
216 - doc['title_zh'] = None  
217 - doc['title_en'] = title_text  
218 - else:  
219 - doc['title_zh'] = None  
220 - doc['title_en'] = None 228 + ) or {}
  229 +
  230 + _set_lang_obj("title", title_text, translations)
221 231
222 # Brief 232 # Brief
223 if pd.notna(spu_row.get('brief')): 233 if pd.notna(spu_row.get('brief')):
224 brief_text = str(spu_row['brief']) 234 brief_text = str(spu_row['brief'])
  235 + translations: Dict[str, str] = {}
225 if self.translator: 236 if self.translator:
226 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') 237 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
227 translations = self.translator.translate_for_indexing( 238 translations = self.translator.translate_for_indexing(
@@ -231,23 +242,13 @@ class SPUDocumentTransformer: @@ -231,23 +242,13 @@ class SPUDocumentTransformer:
231 prompt=prompt, 242 prompt=prompt,
232 translate_to_en=translate_to_en, 243 translate_to_en=translate_to_en,
233 translate_to_zh=translate_to_zh, 244 translate_to_zh=translate_to_zh,
234 - )  
235 - doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None)  
236 - doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None)  
237 - else:  
238 - if primary_lang == 'zh':  
239 - doc['brief_zh'] = brief_text  
240 - doc['brief_en'] = None  
241 - else:  
242 - doc['brief_zh'] = None  
243 - doc['brief_en'] = brief_text  
244 - else:  
245 - doc['brief_zh'] = None  
246 - doc['brief_en'] = None 245 + ) or {}
  246 + _set_lang_obj("brief", brief_text, translations)
247 247
248 # Description 248 # Description
249 if pd.notna(spu_row.get('description')): 249 if pd.notna(spu_row.get('description')):
250 desc_text = str(spu_row['description']) 250 desc_text = str(spu_row['description'])
  251 + translations: Dict[str, str] = {}
251 if self.translator: 252 if self.translator:
252 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') 253 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
253 translations = self.translator.translate_for_indexing( 254 translations = self.translator.translate_for_indexing(
@@ -257,23 +258,13 @@ class SPUDocumentTransformer: @@ -257,23 +258,13 @@ class SPUDocumentTransformer:
257 prompt=prompt, 258 prompt=prompt,
258 translate_to_en=translate_to_en, 259 translate_to_en=translate_to_en,
259 translate_to_zh=translate_to_zh, 260 translate_to_zh=translate_to_zh,
260 - )  
261 - doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None)  
262 - doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None)  
263 - else:  
264 - if primary_lang == 'zh':  
265 - doc['description_zh'] = desc_text  
266 - doc['description_en'] = None  
267 - else:  
268 - doc['description_zh'] = None  
269 - doc['description_en'] = desc_text  
270 - else:  
271 - doc['description_zh'] = None  
272 - doc['description_en'] = None 261 + ) or {}
  262 + _set_lang_obj("description", desc_text, translations)
273 263
274 # Vendor 264 # Vendor
275 if pd.notna(spu_row.get('vendor')): 265 if pd.notna(spu_row.get('vendor')):
276 vendor_text = str(spu_row['vendor']) 266 vendor_text = str(spu_row['vendor'])
  267 + translations: Dict[str, str] = {}
277 if self.translator: 268 if self.translator:
278 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') 269 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
279 translations = self.translator.translate_for_indexing( 270 translations = self.translator.translate_for_indexing(
@@ -283,19 +274,8 @@ class SPUDocumentTransformer: @@ -283,19 +274,8 @@ class SPUDocumentTransformer:
283 prompt=prompt, 274 prompt=prompt,
284 translate_to_en=translate_to_en, 275 translate_to_en=translate_to_en,
285 translate_to_zh=translate_to_zh, 276 translate_to_zh=translate_to_zh,
286 - )  
287 - doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None)  
288 - doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None)  
289 - else:  
290 - if primary_lang == 'zh':  
291 - doc['vendor_zh'] = vendor_text  
292 - doc['vendor_en'] = None  
293 - else:  
294 - doc['vendor_zh'] = None  
295 - doc['vendor_en'] = vendor_text  
296 - else:  
297 - doc['vendor_zh'] = None  
298 - doc['vendor_en'] = None 277 + ) or {}
  278 + _set_lang_obj("vendor", vendor_text, translations)
299 279
300 def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series): 280 def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series):
301 """填充类目相关字段。""" 281 """填充类目相关字段。"""
@@ -303,6 +283,8 @@ class SPUDocumentTransformer: @@ -303,6 +283,8 @@ class SPUDocumentTransformer:
303 # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) 283 # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目)
304 # - 仅记录错误日志,不阻塞索引流程 284 # - 仅记录错误日志,不阻塞索引流程
305 285
  286 + primary_lang = self.tenant_config.get('primary_language', 'zh')
  287 +
306 if pd.notna(spu_row.get('category_path')): 288 if pd.notna(spu_row.get('category_path')):
307 category_path = str(spu_row['category_path']) 289 category_path = str(spu_row['category_path'])
308 290
@@ -329,8 +311,7 @@ class SPUDocumentTransformer: @@ -329,8 +311,7 @@ class SPUDocumentTransformer:
329 # 构建类目路径字符串(用于搜索) 311 # 构建类目路径字符串(用于搜索)
330 if category_names: 312 if category_names:
331 category_path_str = '/'.join(category_names) 313 category_path_str = '/'.join(category_names)
332 - doc['category_path_zh'] = category_path_str  
333 - doc['category_path_en'] = None # 暂时设为空 314 + doc['category_path'] = {primary_lang: category_path_str}
334 315
335 # 填充分层类目名称 316 # 填充分层类目名称
336 if len(category_names) > 0: 317 if len(category_names) > 0:
@@ -342,8 +323,7 @@ class SPUDocumentTransformer: @@ -342,8 +323,7 @@ class SPUDocumentTransformer:
342 elif pd.notna(spu_row.get('category')): 323 elif pd.notna(spu_row.get('category')):
343 # 如果category_path为空,使用category字段作为category1_name的备选 324 # 如果category_path为空,使用category字段作为category1_name的备选
344 category = str(spu_row['category']) 325 category = str(spu_row['category'])
345 - doc['category_name_zh'] = category  
346 - doc['category_name_en'] = None 326 + doc['category_name_text'] = {primary_lang: category}
347 doc['category_name'] = category 327 doc['category_name'] = category
348 328
349 # 尝试从category字段解析多级分类 329 # 尝试从category字段解析多级分类
@@ -362,10 +342,8 @@ class SPUDocumentTransformer: @@ -362,10 +342,8 @@ class SPUDocumentTransformer:
362 if pd.notna(spu_row.get('category')): 342 if pd.notna(spu_row.get('category')):
363 # 确保category相关字段都被设置(如果前面没有设置) 343 # 确保category相关字段都被设置(如果前面没有设置)
364 category_name = str(spu_row['category']) 344 category_name = str(spu_row['category'])
365 - if 'category_name_zh' not in doc:  
366 - doc['category_name_zh'] = category_name  
367 - if 'category_name_en' not in doc:  
368 - doc['category_name_en'] = None 345 + if 'category_name_text' not in doc:
  346 + doc['category_name_text'] = {primary_lang: category_name}
369 if 'category_name' not in doc: 347 if 'category_name' not in doc:
370 doc['category_name'] = category_name 348 doc['category_name'] = category_name
371 349
@@ -587,13 +565,22 @@ class SPUDocumentTransformer: @@ -587,13 +565,22 @@ class SPUDocumentTransformer:
587 """ 565 """
588 填充标题向量化字段。 566 填充标题向量化字段。
589 567
590 - 使用英文标题(title_en)生成embedding。如果title_en不存在,则使用title_zh。 568 + 使用英文标题(title.en)生成embedding。如果title.en不存在,则使用title.zh。
591 569
592 Args: 570 Args:
593 doc: ES文档字典 571 doc: ES文档字典
594 """ 572 """
595 - # 优先使用英文标题,如果没有则使用中文标题  
596 - title_text = doc.get('title_en') or doc.get('title_zh') 573 + # 优先使用英文标题,如果没有则使用中文标题;再没有则取任意可用语言
  574 + title_obj = doc.get("title") or {}
  575 + if isinstance(title_obj, dict):
  576 + title_text = title_obj.get("en") or title_obj.get("zh")
  577 + if not title_text:
  578 + for v in title_obj.values():
  579 + if v and str(v).strip():
  580 + title_text = str(v)
  581 + break
  582 + else:
  583 + title_text = None
597 584
598 if not title_text or not title_text.strip(): 585 if not title_text or not title_text.strip():
599 logger.debug(f"No title text available for embedding, SPU: {doc.get('spu_id')}") 586 logger.debug(f"No title text available for embedding, SPU: {doc.get('spu_id')}")
indexer/incremental_service.py
@@ -130,7 +130,15 @@ class IncrementalIndexerService: @@ -130,7 +130,15 @@ class IncrementalIndexerService:
130 130
131 # 单条场景下也可补齐 embedding(仍走缓存) 131 # 单条场景下也可补齐 embedding(仍走缓存)
132 if enable_embedding and encoder: 132 if enable_embedding and encoder:
133 - title_text = doc.get("title_en") or doc.get("title_zh") 133 + title_obj = doc.get("title") or {}
  134 + title_text = None
  135 + if isinstance(title_obj, dict):
  136 + title_text = title_obj.get("en") or title_obj.get("zh")
  137 + if not title_text:
  138 + for v in title_obj.values():
  139 + if v and str(v).strip():
  140 + title_text = str(v)
  141 + break
134 if title_text and str(title_text).strip(): 142 if title_text and str(title_text).strip():
135 try: 143 try:
136 embeddings = encoder.encode(title_text) 144 embeddings = encoder.encode(title_text)
@@ -560,7 +568,15 @@ class IncrementalIndexerService: @@ -560,7 +568,15 @@ class IncrementalIndexerService:
560 title_texts: List[str] = [] 568 title_texts: List[str] = []
561 title_doc_indices: List[int] = [] 569 title_doc_indices: List[int] = []
562 for i, (_, doc) in enumerate(documents): 570 for i, (_, doc) in enumerate(documents):
563 - title_text = doc.get("title_en") or doc.get("title_zh") 571 + title_obj = doc.get("title") or {}
  572 + title_text = None
  573 + if isinstance(title_obj, dict):
  574 + title_text = title_obj.get("en") or title_obj.get("zh")
  575 + if not title_text:
  576 + for v in title_obj.values():
  577 + if v and str(v).strip():
  578 + title_text = str(v)
  579 + break
564 if title_text and str(title_text).strip(): 580 if title_text and str(title_text).strip():
565 title_texts.append(str(title_text)) 581 title_texts.append(str(title_text))
566 title_doc_indices.append(i) 582 title_doc_indices.append(i)
indexer/test_indexing.py
@@ -114,16 +114,17 @@ def test_full_indexing(tenant_id: str = &quot;162&quot;): @@ -114,16 +114,17 @@ def test_full_indexing(tenant_id: str = &quot;162&quot;):
114 print(f"\n文档 {i+1}:") 114 print(f"\n文档 {i+1}:")
115 print(f" SPU ID: {doc.get('spu_id')}") 115 print(f" SPU ID: {doc.get('spu_id')}")
116 print(f" Tenant ID: {doc.get('tenant_id')}") 116 print(f" Tenant ID: {doc.get('tenant_id')}")
117 - print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}")  
118 - print(f" 标题 (英文): {doc.get('title_en', 'N/A')}") 117 + title_obj = doc.get("title") or {}
  118 + print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
  119 + print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
119 120
120 # 检查租户162的翻译状态 121 # 检查租户162的翻译状态
121 if tenant_id == "162": 122 if tenant_id == "162":
122 - # 租户162翻译应该关闭,title_en应该为None  
123 - if doc.get('title_en') is None:  
124 - print(f" ✓ 翻译已关闭(title_en为None)") 123 + # 租户162翻译应该关闭:只写入主语言,不应出现 title.en
  124 + if isinstance(title_obj, dict) and title_obj.get("en") is None:
  125 + print(f" ✓ 翻译已关闭(title.en为空)")
125 else: 126 else:
126 - print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}") 127 + print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
127 128
128 return True 129 return True
129 130
@@ -192,17 +193,18 @@ def test_incremental_indexing(tenant_id: str = &quot;162&quot;): @@ -192,17 +193,18 @@ def test_incremental_indexing(tenant_id: str = &quot;162&quot;):
192 print(f"✓ SPU文档获取成功") 193 print(f"✓ SPU文档获取成功")
193 print(f" SPU ID: {doc.get('spu_id')}") 194 print(f" SPU ID: {doc.get('spu_id')}")
194 print(f" Tenant ID: {doc.get('tenant_id')}") 195 print(f" Tenant ID: {doc.get('tenant_id')}")
195 - print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}")  
196 - print(f" 标题 (英文): {doc.get('title_en', 'N/A')}") 196 + title_obj = doc.get("title") or {}
  197 + print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
  198 + print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
197 print(f" SKU数量: {len(doc.get('skus', []))}") 199 print(f" SKU数量: {len(doc.get('skus', []))}")
198 print(f" 规格数量: {len(doc.get('specifications', []))}") 200 print(f" 规格数量: {len(doc.get('specifications', []))}")
199 201
200 # 检查租户162的翻译状态 202 # 检查租户162的翻译状态
201 if tenant_id == "162": 203 if tenant_id == "162":
202 - if doc.get('title_en') is None:  
203 - print(f" ✓ 翻译已关闭(title_en为None)") 204 + if isinstance(title_obj, dict) and title_obj.get("en") is None:
  205 + print(f" ✓ 翻译已关闭(title.en为空)")
204 else: 206 else:
205 - print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}") 207 + print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
206 208
207 return True 209 return True
208 210
@@ -291,12 +293,13 @@ def test_document_transformer(): @@ -291,12 +293,13 @@ def test_document_transformer():
291 293
292 if doc: 294 if doc:
293 print(f"✓ 文档转换成功") 295 print(f"✓ 文档转换成功")
294 - print(f" title_zh: {doc.get('title_zh')}")  
295 - print(f" title_en: {doc.get('title_en')}") 296 + title_obj = doc.get("title") or {}
  297 + print(f" title.zh: {title_obj.get('zh') if isinstance(title_obj, dict) else None}")
  298 + print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
296 print(f" SKU数量: {len(doc.get('skus', []))}") 299 print(f" SKU数量: {len(doc.get('skus', []))}")
297 300
298 # 验证租户162翻译关闭 301 # 验证租户162翻译关闭
299 - if doc.get('title_en') is None: 302 + if isinstance(title_obj, dict) and title_obj.get("en") is None:
300 print(f" ✓ 翻译已关闭(符合租户162配置)") 303 print(f" ✓ 翻译已关闭(符合租户162配置)")
301 else: 304 else:
302 print(f" ⚠ 警告:翻译应该关闭") 305 print(f" ⚠ 警告:翻译应该关闭")
mappings/search_products.json
@@ -4,22 +4,12 @@ @@ -4,22 +4,12 @@
4 "number_of_replicas": 0, 4 "number_of_replicas": 0,
5 "refresh_interval": "30s", 5 "refresh_interval": "30s",
6 "analysis": { 6 "analysis": {
7 - "analyzer": {  
8 - "hanlp_index": {  
9 - "type": "custom",  
10 - "tokenizer": "standard",  
11 - "filter": ["lowercase", "asciifolding"]  
12 - },  
13 - "hanlp_standard": {  
14 - "type": "custom",  
15 - "tokenizer": "standard",  
16 - "filter": ["lowercase", "asciifolding"]  
17 - }  
18 - },  
19 "normalizer": { 7 "normalizer": {
20 "lowercase": { 8 "lowercase": {
21 "type": "custom", 9 "type": "custom",
22 - "filter": ["lowercase"] 10 + "filter": [
  11 + "lowercase"
  12 + ]
23 } 13 }
24 } 14 }
25 }, 15 },
@@ -45,75 +35,963 @@ @@ -45,75 +35,963 @@
45 "update_time": { 35 "update_time": {
46 "type": "date" 36 "type": "date"
47 }, 37 },
48 - "title_zh": {  
49 - "type": "text",  
50 - "analyzer": "hanlp_index",  
51 - "search_analyzer": "hanlp_standard"  
52 - },  
53 - "qanchors_zh": {  
54 - "type": "text",  
55 - "analyzer": "hanlp_index",  
56 - "search_analyzer": "hanlp_standard"  
57 - },  
58 - "keywords_zh": {  
59 - "type": "text",  
60 - "analyzer": "hanlp_index",  
61 - "search_analyzer": "hanlp_standard"  
62 - },  
63 - "brief_zh": {  
64 - "type": "text",  
65 - "analyzer": "hanlp_index",  
66 - "search_analyzer": "hanlp_standard"  
67 - },  
68 - "description_zh": {  
69 - "type": "text",  
70 - "analyzer": "hanlp_index",  
71 - "search_analyzer": "hanlp_standard"  
72 - },  
73 - "vendor_zh": {  
74 - "type": "text",  
75 - "analyzer": "hanlp_index",  
76 - "search_analyzer": "hanlp_standard",  
77 - "fields": {  
78 - "keyword": {  
79 - "type": "keyword",  
80 - "normalizer": "lowercase" 38 + "title": {
  39 + "type": "object",
  40 + "properties": {
  41 + "ar": {
  42 + "type": "text",
  43 + "analyzer": "arabic"
  44 + },
  45 + "hy": {
  46 + "type": "text",
  47 + "analyzer": "armenian"
  48 + },
  49 + "eu": {
  50 + "type": "text",
  51 + "analyzer": "basque"
  52 + },
  53 + "pt_br": {
  54 + "type": "text",
  55 + "analyzer": "brazilian"
  56 + },
  57 + "bg": {
  58 + "type": "text",
  59 + "analyzer": "bulgarian"
  60 + },
  61 + "ca": {
  62 + "type": "text",
  63 + "analyzer": "catalan"
  64 + },
  65 + "zh": {
  66 + "type": "text",
  67 + "analyzer": "icu_analyzer"
  68 + },
  69 + "chinese": {
  70 + "type": "text",
  71 + "analyzer": "chinese"
  72 + },
  73 + "cjk": {
  74 + "type": "text",
  75 + "analyzer": "cjk"
  76 + },
  77 + "cs": {
  78 + "type": "text",
  79 + "analyzer": "czech"
  80 + },
  81 + "da": {
  82 + "type": "text",
  83 + "analyzer": "danish"
  84 + },
  85 + "nl": {
  86 + "type": "text",
  87 + "analyzer": "dutch"
  88 + },
  89 + "en": {
  90 + "type": "text",
  91 + "analyzer": "english"
  92 + },
  93 + "fi": {
  94 + "type": "text",
  95 + "analyzer": "finnish"
  96 + },
  97 + "fr": {
  98 + "type": "text",
  99 + "analyzer": "french"
  100 + },
  101 + "gl": {
  102 + "type": "text",
  103 + "analyzer": "galician"
  104 + },
  105 + "de": {
  106 + "type": "text",
  107 + "analyzer": "german"
  108 + },
  109 + "el": {
  110 + "type": "text",
  111 + "analyzer": "greek"
  112 + },
  113 + "hi": {
  114 + "type": "text",
  115 + "analyzer": "hindi"
  116 + },
  117 + "hu": {
  118 + "type": "text",
  119 + "analyzer": "hungarian"
  120 + },
  121 + "id": {
  122 + "type": "text",
  123 + "analyzer": "indonesian"
  124 + },
  125 + "it": {
  126 + "type": "text",
  127 + "analyzer": "italian"
  128 + },
  129 + "no": {
  130 + "type": "text",
  131 + "analyzer": "norwegian"
  132 + },
  133 + "fa": {
  134 + "type": "text",
  135 + "analyzer": "persian"
  136 + },
  137 + "pt": {
  138 + "type": "text",
  139 + "analyzer": "portuguese"
  140 + },
  141 + "ro": {
  142 + "type": "text",
  143 + "analyzer": "romanian"
  144 + },
  145 + "ru": {
  146 + "type": "text",
  147 + "analyzer": "russian"
  148 + },
  149 + "es": {
  150 + "type": "text",
  151 + "analyzer": "spanish"
  152 + },
  153 + "sv": {
  154 + "type": "text",
  155 + "analyzer": "swedish"
  156 + },
  157 + "tr": {
  158 + "type": "text",
  159 + "analyzer": "turkish"
  160 + },
  161 + "th": {
  162 + "type": "text",
  163 + "analyzer": "thai"
81 } 164 }
82 } 165 }
83 }, 166 },
84 - "title_en": {  
85 - "type": "text",  
86 - "analyzer": "english",  
87 - "search_analyzer": "english"  
88 - },  
89 - "qanchors_en": {  
90 - "type": "text",  
91 - "analyzer": "english",  
92 - "search_analyzer": "english"  
93 - },  
94 - "keywords_en": {  
95 - "type": "text",  
96 - "analyzer": "english",  
97 - "search_analyzer": "english"  
98 - },  
99 - "brief_en": {  
100 - "type": "text",  
101 - "analyzer": "english",  
102 - "search_analyzer": "english"  
103 - },  
104 - "description_en": {  
105 - "type": "text",  
106 - "analyzer": "english",  
107 - "search_analyzer": "english"  
108 - },  
109 - "vendor_en": {  
110 - "type": "text",  
111 - "analyzer": "english",  
112 - "search_analyzer": "english",  
113 - "fields": {  
114 - "keyword": {  
115 - "type": "keyword",  
116 - "normalizer": "lowercase" 167 + "qanchors": {
  168 + "type": "object",
  169 + "properties": {
  170 + "ar": {
  171 + "type": "text",
  172 + "analyzer": "arabic"
  173 + },
  174 + "hy": {
  175 + "type": "text",
  176 + "analyzer": "armenian"
  177 + },
  178 + "eu": {
  179 + "type": "text",
  180 + "analyzer": "basque"
  181 + },
  182 + "pt_br": {
  183 + "type": "text",
  184 + "analyzer": "brazilian"
  185 + },
  186 + "bg": {
  187 + "type": "text",
  188 + "analyzer": "bulgarian"
  189 + },
  190 + "ca": {
  191 + "type": "text",
  192 + "analyzer": "catalan"
  193 + },
  194 + "zh": {
  195 + "type": "text",
  196 + "analyzer": "icu_analyzer"
  197 + },
  198 + "chinese": {
  199 + "type": "text",
  200 + "analyzer": "chinese"
  201 + },
  202 + "cjk": {
  203 + "type": "text",
  204 + "analyzer": "cjk"
  205 + },
  206 + "cs": {
  207 + "type": "text",
  208 + "analyzer": "czech"
  209 + },
  210 + "da": {
  211 + "type": "text",
  212 + "analyzer": "danish"
  213 + },
  214 + "nl": {
  215 + "type": "text",
  216 + "analyzer": "dutch"
  217 + },
  218 + "en": {
  219 + "type": "text",
  220 + "analyzer": "english"
  221 + },
  222 + "fi": {
  223 + "type": "text",
  224 + "analyzer": "finnish"
  225 + },
  226 + "fr": {
  227 + "type": "text",
  228 + "analyzer": "french"
  229 + },
  230 + "gl": {
  231 + "type": "text",
  232 + "analyzer": "galician"
  233 + },
  234 + "de": {
  235 + "type": "text",
  236 + "analyzer": "german"
  237 + },
  238 + "el": {
  239 + "type": "text",
  240 + "analyzer": "greek"
  241 + },
  242 + "hi": {
  243 + "type": "text",
  244 + "analyzer": "hindi"
  245 + },
  246 + "hu": {
  247 + "type": "text",
  248 + "analyzer": "hungarian"
  249 + },
  250 + "id": {
  251 + "type": "text",
  252 + "analyzer": "indonesian"
  253 + },
  254 + "it": {
  255 + "type": "text",
  256 + "analyzer": "italian"
  257 + },
  258 + "no": {
  259 + "type": "text",
  260 + "analyzer": "norwegian"
  261 + },
  262 + "fa": {
  263 + "type": "text",
  264 + "analyzer": "persian"
  265 + },
  266 + "pt": {
  267 + "type": "text",
  268 + "analyzer": "portuguese"
  269 + },
  270 + "ro": {
  271 + "type": "text",
  272 + "analyzer": "romanian"
  273 + },
  274 + "ru": {
  275 + "type": "text",
  276 + "analyzer": "russian"
  277 + },
  278 + "es": {
  279 + "type": "text",
  280 + "analyzer": "spanish"
  281 + },
  282 + "sv": {
  283 + "type": "text",
  284 + "analyzer": "swedish"
  285 + },
  286 + "tr": {
  287 + "type": "text",
  288 + "analyzer": "turkish"
  289 + },
  290 + "th": {
  291 + "type": "text",
  292 + "analyzer": "thai"
  293 + }
  294 + }
  295 + },
  296 + "keywords": {
  297 + "type": "object",
  298 + "properties": {
  299 + "ar": {
  300 + "type": "text",
  301 + "analyzer": "arabic"
  302 + },
  303 + "hy": {
  304 + "type": "text",
  305 + "analyzer": "armenian"
  306 + },
  307 + "eu": {
  308 + "type": "text",
  309 + "analyzer": "basque"
  310 + },
  311 + "pt_br": {
  312 + "type": "text",
  313 + "analyzer": "brazilian"
  314 + },
  315 + "bg": {
  316 + "type": "text",
  317 + "analyzer": "bulgarian"
  318 + },
  319 + "ca": {
  320 + "type": "text",
  321 + "analyzer": "catalan"
  322 + },
  323 + "zh": {
  324 + "type": "text",
  325 + "analyzer": "icu_analyzer"
  326 + },
  327 + "chinese": {
  328 + "type": "text",
  329 + "analyzer": "chinese"
  330 + },
  331 + "cjk": {
  332 + "type": "text",
  333 + "analyzer": "cjk"
  334 + },
  335 + "cs": {
  336 + "type": "text",
  337 + "analyzer": "czech"
  338 + },
  339 + "da": {
  340 + "type": "text",
  341 + "analyzer": "danish"
  342 + },
  343 + "nl": {
  344 + "type": "text",
  345 + "analyzer": "dutch"
  346 + },
  347 + "en": {
  348 + "type": "text",
  349 + "analyzer": "english"
  350 + },
  351 + "fi": {
  352 + "type": "text",
  353 + "analyzer": "finnish"
  354 + },
  355 + "fr": {
  356 + "type": "text",
  357 + "analyzer": "french"
  358 + },
  359 + "gl": {
  360 + "type": "text",
  361 + "analyzer": "galician"
  362 + },
  363 + "de": {
  364 + "type": "text",
  365 + "analyzer": "german"
  366 + },
  367 + "el": {
  368 + "type": "text",
  369 + "analyzer": "greek"
  370 + },
  371 + "hi": {
  372 + "type": "text",
  373 + "analyzer": "hindi"
  374 + },
  375 + "hu": {
  376 + "type": "text",
  377 + "analyzer": "hungarian"
  378 + },
  379 + "id": {
  380 + "type": "text",
  381 + "analyzer": "indonesian"
  382 + },
  383 + "it": {
  384 + "type": "text",
  385 + "analyzer": "italian"
  386 + },
  387 + "no": {
  388 + "type": "text",
  389 + "analyzer": "norwegian"
  390 + },
  391 + "fa": {
  392 + "type": "text",
  393 + "analyzer": "persian"
  394 + },
  395 + "pt": {
  396 + "type": "text",
  397 + "analyzer": "portuguese"
  398 + },
  399 + "ro": {
  400 + "type": "text",
  401 + "analyzer": "romanian"
  402 + },
  403 + "ru": {
  404 + "type": "text",
  405 + "analyzer": "russian"
  406 + },
  407 + "es": {
  408 + "type": "text",
  409 + "analyzer": "spanish"
  410 + },
  411 + "sv": {
  412 + "type": "text",
  413 + "analyzer": "swedish"
  414 + },
  415 + "tr": {
  416 + "type": "text",
  417 + "analyzer": "turkish"
  418 + },
  419 + "th": {
  420 + "type": "text",
  421 + "analyzer": "thai"
  422 + }
  423 + }
  424 + },
  425 + "brief": {
  426 + "type": "object",
  427 + "properties": {
  428 + "ar": {
  429 + "type": "text",
  430 + "analyzer": "arabic"
  431 + },
  432 + "hy": {
  433 + "type": "text",
  434 + "analyzer": "armenian"
  435 + },
  436 + "eu": {
  437 + "type": "text",
  438 + "analyzer": "basque"
  439 + },
  440 + "pt_br": {
  441 + "type": "text",
  442 + "analyzer": "brazilian"
  443 + },
  444 + "bg": {
  445 + "type": "text",
  446 + "analyzer": "bulgarian"
  447 + },
  448 + "ca": {
  449 + "type": "text",
  450 + "analyzer": "catalan"
  451 + },
  452 + "zh": {
  453 + "type": "text",
  454 + "analyzer": "icu_analyzer"
  455 + },
  456 + "chinese": {
  457 + "type": "text",
  458 + "analyzer": "chinese"
  459 + },
  460 + "cjk": {
  461 + "type": "text",
  462 + "analyzer": "cjk"
  463 + },
  464 + "cs": {
  465 + "type": "text",
  466 + "analyzer": "czech"
  467 + },
  468 + "da": {
  469 + "type": "text",
  470 + "analyzer": "danish"
  471 + },
  472 + "nl": {
  473 + "type": "text",
  474 + "analyzer": "dutch"
  475 + },
  476 + "en": {
  477 + "type": "text",
  478 + "analyzer": "english"
  479 + },
  480 + "fi": {
  481 + "type": "text",
  482 + "analyzer": "finnish"
  483 + },
  484 + "fr": {
  485 + "type": "text",
  486 + "analyzer": "french"
  487 + },
  488 + "gl": {
  489 + "type": "text",
  490 + "analyzer": "galician"
  491 + },
  492 + "de": {
  493 + "type": "text",
  494 + "analyzer": "german"
  495 + },
  496 + "el": {
  497 + "type": "text",
  498 + "analyzer": "greek"
  499 + },
  500 + "hi": {
  501 + "type": "text",
  502 + "analyzer": "hindi"
  503 + },
  504 + "hu": {
  505 + "type": "text",
  506 + "analyzer": "hungarian"
  507 + },
  508 + "id": {
  509 + "type": "text",
  510 + "analyzer": "indonesian"
  511 + },
  512 + "it": {
  513 + "type": "text",
  514 + "analyzer": "italian"
  515 + },
  516 + "no": {
  517 + "type": "text",
  518 + "analyzer": "norwegian"
  519 + },
  520 + "fa": {
  521 + "type": "text",
  522 + "analyzer": "persian"
  523 + },
  524 + "pt": {
  525 + "type": "text",
  526 + "analyzer": "portuguese"
  527 + },
  528 + "ro": {
  529 + "type": "text",
  530 + "analyzer": "romanian"
  531 + },
  532 + "ru": {
  533 + "type": "text",
  534 + "analyzer": "russian"
  535 + },
  536 + "es": {
  537 + "type": "text",
  538 + "analyzer": "spanish"
  539 + },
  540 + "sv": {
  541 + "type": "text",
  542 + "analyzer": "swedish"
  543 + },
  544 + "tr": {
  545 + "type": "text",
  546 + "analyzer": "turkish"
  547 + },
  548 + "th": {
  549 + "type": "text",
  550 + "analyzer": "thai"
  551 + }
  552 + }
  553 + },
  554 + "description": {
  555 + "type": "object",
  556 + "properties": {
  557 + "ar": {
  558 + "type": "text",
  559 + "analyzer": "arabic"
  560 + },
  561 + "hy": {
  562 + "type": "text",
  563 + "analyzer": "armenian"
  564 + },
  565 + "eu": {
  566 + "type": "text",
  567 + "analyzer": "basque"
  568 + },
  569 + "pt_br": {
  570 + "type": "text",
  571 + "analyzer": "brazilian"
  572 + },
  573 + "bg": {
  574 + "type": "text",
  575 + "analyzer": "bulgarian"
  576 + },
  577 + "ca": {
  578 + "type": "text",
  579 + "analyzer": "catalan"
  580 + },
  581 + "zh": {
  582 + "type": "text",
  583 + "analyzer": "icu_analyzer"
  584 + },
  585 + "chinese": {
  586 + "type": "text",
  587 + "analyzer": "chinese"
  588 + },
  589 + "cjk": {
  590 + "type": "text",
  591 + "analyzer": "cjk"
  592 + },
  593 + "cs": {
  594 + "type": "text",
  595 + "analyzer": "czech"
  596 + },
  597 + "da": {
  598 + "type": "text",
  599 + "analyzer": "danish"
  600 + },
  601 + "nl": {
  602 + "type": "text",
  603 + "analyzer": "dutch"
  604 + },
  605 + "en": {
  606 + "type": "text",
  607 + "analyzer": "english"
  608 + },
  609 + "fi": {
  610 + "type": "text",
  611 + "analyzer": "finnish"
  612 + },
  613 + "fr": {
  614 + "type": "text",
  615 + "analyzer": "french"
  616 + },
  617 + "gl": {
  618 + "type": "text",
  619 + "analyzer": "galician"
  620 + },
  621 + "de": {
  622 + "type": "text",
  623 + "analyzer": "german"
  624 + },
  625 + "el": {
  626 + "type": "text",
  627 + "analyzer": "greek"
  628 + },
  629 + "hi": {
  630 + "type": "text",
  631 + "analyzer": "hindi"
  632 + },
  633 + "hu": {
  634 + "type": "text",
  635 + "analyzer": "hungarian"
  636 + },
  637 + "id": {
  638 + "type": "text",
  639 + "analyzer": "indonesian"
  640 + },
  641 + "it": {
  642 + "type": "text",
  643 + "analyzer": "italian"
  644 + },
  645 + "no": {
  646 + "type": "text",
  647 + "analyzer": "norwegian"
  648 + },
  649 + "fa": {
  650 + "type": "text",
  651 + "analyzer": "persian"
  652 + },
  653 + "pt": {
  654 + "type": "text",
  655 + "analyzer": "portuguese"
  656 + },
  657 + "ro": {
  658 + "type": "text",
  659 + "analyzer": "romanian"
  660 + },
  661 + "ru": {
  662 + "type": "text",
  663 + "analyzer": "russian"
  664 + },
  665 + "es": {
  666 + "type": "text",
  667 + "analyzer": "spanish"
  668 + },
  669 + "sv": {
  670 + "type": "text",
  671 + "analyzer": "swedish"
  672 + },
  673 + "tr": {
  674 + "type": "text",
  675 + "analyzer": "turkish"
  676 + },
  677 + "th": {
  678 + "type": "text",
  679 + "analyzer": "thai"
  680 + }
  681 + }
  682 + },
  683 + "vendor": {
  684 + "type": "object",
  685 + "properties": {
  686 + "ar": {
  687 + "type": "text",
  688 + "analyzer": "arabic",
  689 + "fields": {
  690 + "keyword": {
  691 + "type": "keyword",
  692 + "normalizer": "lowercase"
  693 + }
  694 + }
  695 + },
  696 + "hy": {
  697 + "type": "text",
  698 + "analyzer": "armenian",
  699 + "fields": {
  700 + "keyword": {
  701 + "type": "keyword",
  702 + "normalizer": "lowercase"
  703 + }
  704 + }
  705 + },
  706 + "eu": {
  707 + "type": "text",
  708 + "analyzer": "basque",
  709 + "fields": {
  710 + "keyword": {
  711 + "type": "keyword",
  712 + "normalizer": "lowercase"
  713 + }
  714 + }
  715 + },
  716 + "pt_br": {
  717 + "type": "text",
  718 + "analyzer": "brazilian",
  719 + "fields": {
  720 + "keyword": {
  721 + "type": "keyword",
  722 + "normalizer": "lowercase"
  723 + }
  724 + }
  725 + },
  726 + "bg": {
  727 + "type": "text",
  728 + "analyzer": "bulgarian",
  729 + "fields": {
  730 + "keyword": {
  731 + "type": "keyword",
  732 + "normalizer": "lowercase"
  733 + }
  734 + }
  735 + },
  736 + "ca": {
  737 + "type": "text",
  738 + "analyzer": "catalan",
  739 + "fields": {
  740 + "keyword": {
  741 + "type": "keyword",
  742 + "normalizer": "lowercase"
  743 + }
  744 + }
  745 + },
  746 + "zh": {
  747 + "type": "text",
  748 + "analyzer": "icu_analyzer",
  749 + "fields": {
  750 + "keyword": {
  751 + "type": "keyword",
  752 + "normalizer": "lowercase"
  753 + }
  754 + }
  755 + },
  756 + "chinese": {
  757 + "type": "text",
  758 + "analyzer": "chinese",
  759 + "fields": {
  760 + "keyword": {
  761 + "type": "keyword",
  762 + "normalizer": "lowercase"
  763 + }
  764 + }
  765 + },
  766 + "cjk": {
  767 + "type": "text",
  768 + "analyzer": "cjk",
  769 + "fields": {
  770 + "keyword": {
  771 + "type": "keyword",
  772 + "normalizer": "lowercase"
  773 + }
  774 + }
  775 + },
  776 + "cs": {
  777 + "type": "text",
  778 + "analyzer": "czech",
  779 + "fields": {
  780 + "keyword": {
  781 + "type": "keyword",
  782 + "normalizer": "lowercase"
  783 + }
  784 + }
  785 + },
  786 + "da": {
  787 + "type": "text",
  788 + "analyzer": "danish",
  789 + "fields": {
  790 + "keyword": {
  791 + "type": "keyword",
  792 + "normalizer": "lowercase"
  793 + }
  794 + }
  795 + },
  796 + "nl": {
  797 + "type": "text",
  798 + "analyzer": "dutch",
  799 + "fields": {
  800 + "keyword": {
  801 + "type": "keyword",
  802 + "normalizer": "lowercase"
  803 + }
  804 + }
  805 + },
  806 + "en": {
  807 + "type": "text",
  808 + "analyzer": "english",
  809 + "fields": {
  810 + "keyword": {
  811 + "type": "keyword",
  812 + "normalizer": "lowercase"
  813 + }
  814 + }
  815 + },
  816 + "fi": {
  817 + "type": "text",
  818 + "analyzer": "finnish",
  819 + "fields": {
  820 + "keyword": {
  821 + "type": "keyword",
  822 + "normalizer": "lowercase"
  823 + }
  824 + }
  825 + },
  826 + "fr": {
  827 + "type": "text",
  828 + "analyzer": "french",
  829 + "fields": {
  830 + "keyword": {
  831 + "type": "keyword",
  832 + "normalizer": "lowercase"
  833 + }
  834 + }
  835 + },
  836 + "gl": {
  837 + "type": "text",
  838 + "analyzer": "galician",
  839 + "fields": {
  840 + "keyword": {
  841 + "type": "keyword",
  842 + "normalizer": "lowercase"
  843 + }
  844 + }
  845 + },
  846 + "de": {
  847 + "type": "text",
  848 + "analyzer": "german",
  849 + "fields": {
  850 + "keyword": {
  851 + "type": "keyword",
  852 + "normalizer": "lowercase"
  853 + }
  854 + }
  855 + },
  856 + "el": {
  857 + "type": "text",
  858 + "analyzer": "greek",
  859 + "fields": {
  860 + "keyword": {
  861 + "type": "keyword",
  862 + "normalizer": "lowercase"
  863 + }
  864 + }
  865 + },
  866 + "hi": {
  867 + "type": "text",
  868 + "analyzer": "hindi",
  869 + "fields": {
  870 + "keyword": {
  871 + "type": "keyword",
  872 + "normalizer": "lowercase"
  873 + }
  874 + }
  875 + },
  876 + "hu": {
  877 + "type": "text",
  878 + "analyzer": "hungarian",
  879 + "fields": {
  880 + "keyword": {
  881 + "type": "keyword",
  882 + "normalizer": "lowercase"
  883 + }
  884 + }
  885 + },
  886 + "id": {
  887 + "type": "text",
  888 + "analyzer": "indonesian",
  889 + "fields": {
  890 + "keyword": {
  891 + "type": "keyword",
  892 + "normalizer": "lowercase"
  893 + }
  894 + }
  895 + },
  896 + "it": {
  897 + "type": "text",
  898 + "analyzer": "italian",
  899 + "fields": {
  900 + "keyword": {
  901 + "type": "keyword",
  902 + "normalizer": "lowercase"
  903 + }
  904 + }
  905 + },
  906 + "no": {
  907 + "type": "text",
  908 + "analyzer": "norwegian",
  909 + "fields": {
  910 + "keyword": {
  911 + "type": "keyword",
  912 + "normalizer": "lowercase"
  913 + }
  914 + }
  915 + },
  916 + "fa": {
  917 + "type": "text",
  918 + "analyzer": "persian",
  919 + "fields": {
  920 + "keyword": {
  921 + "type": "keyword",
  922 + "normalizer": "lowercase"
  923 + }
  924 + }
  925 + },
  926 + "pt": {
  927 + "type": "text",
  928 + "analyzer": "portuguese",
  929 + "fields": {
  930 + "keyword": {
  931 + "type": "keyword",
  932 + "normalizer": "lowercase"
  933 + }
  934 + }
  935 + },
  936 + "ro": {
  937 + "type": "text",
  938 + "analyzer": "romanian",
  939 + "fields": {
  940 + "keyword": {
  941 + "type": "keyword",
  942 + "normalizer": "lowercase"
  943 + }
  944 + }
  945 + },
  946 + "ru": {
  947 + "type": "text",
  948 + "analyzer": "russian",
  949 + "fields": {
  950 + "keyword": {
  951 + "type": "keyword",
  952 + "normalizer": "lowercase"
  953 + }
  954 + }
  955 + },
  956 + "es": {
  957 + "type": "text",
  958 + "analyzer": "spanish",
  959 + "fields": {
  960 + "keyword": {
  961 + "type": "keyword",
  962 + "normalizer": "lowercase"
  963 + }
  964 + }
  965 + },
  966 + "sv": {
  967 + "type": "text",
  968 + "analyzer": "swedish",
  969 + "fields": {
  970 + "keyword": {
  971 + "type": "keyword",
  972 + "normalizer": "lowercase"
  973 + }
  974 + }
  975 + },
  976 + "tr": {
  977 + "type": "text",
  978 + "analyzer": "turkish",
  979 + "fields": {
  980 + "keyword": {
  981 + "type": "keyword",
  982 + "normalizer": "lowercase"
  983 + }
  984 + }
  985 + },
  986 + "th": {
  987 + "type": "text",
  988 + "analyzer": "thai",
  989 + "fields": {
  990 + "keyword": {
  991 + "type": "keyword",
  992 + "normalizer": "lowercase"
  993 + }
  994 + }
117 } 995 }
118 } 996 }
119 }, 997 },
@@ -158,25 +1036,263 @@ @@ -158,25 +1036,263 @@
158 } 1036 }
159 } 1037 }
160 }, 1038 },
161 - "category_path_zh": {  
162 - "type": "text",  
163 - "analyzer": "hanlp_index",  
164 - "search_analyzer": "hanlp_standard"  
165 - },  
166 - "category_path_en": {  
167 - "type": "text",  
168 - "analyzer": "english",  
169 - "search_analyzer": "english"  
170 - },  
171 - "category_name_zh": {  
172 - "type": "text",  
173 - "analyzer": "hanlp_index",  
174 - "search_analyzer": "hanlp_standard" 1039 + "category_path": {
  1040 + "type": "object",
  1041 + "properties": {
  1042 + "ar": {
  1043 + "type": "text",
  1044 + "analyzer": "arabic"
  1045 + },
  1046 + "hy": {
  1047 + "type": "text",
  1048 + "analyzer": "armenian"
  1049 + },
  1050 + "eu": {
  1051 + "type": "text",
  1052 + "analyzer": "basque"
  1053 + },
  1054 + "pt_br": {
  1055 + "type": "text",
  1056 + "analyzer": "brazilian"
  1057 + },
  1058 + "bg": {
  1059 + "type": "text",
  1060 + "analyzer": "bulgarian"
  1061 + },
  1062 + "ca": {
  1063 + "type": "text",
  1064 + "analyzer": "catalan"
  1065 + },
  1066 + "zh": {
  1067 + "type": "text",
  1068 + "analyzer": "icu_analyzer"
  1069 + },
  1070 + "chinese": {
  1071 + "type": "text",
  1072 + "analyzer": "chinese"
  1073 + },
  1074 + "cjk": {
  1075 + "type": "text",
  1076 + "analyzer": "cjk"
  1077 + },
  1078 + "cs": {
  1079 + "type": "text",
  1080 + "analyzer": "czech"
  1081 + },
  1082 + "da": {
  1083 + "type": "text",
  1084 + "analyzer": "danish"
  1085 + },
  1086 + "nl": {
  1087 + "type": "text",
  1088 + "analyzer": "dutch"
  1089 + },
  1090 + "en": {
  1091 + "type": "text",
  1092 + "analyzer": "english"
  1093 + },
  1094 + "fi": {
  1095 + "type": "text",
  1096 + "analyzer": "finnish"
  1097 + },
  1098 + "fr": {
  1099 + "type": "text",
  1100 + "analyzer": "french"
  1101 + },
  1102 + "gl": {
  1103 + "type": "text",
  1104 + "analyzer": "galician"
  1105 + },
  1106 + "de": {
  1107 + "type": "text",
  1108 + "analyzer": "german"
  1109 + },
  1110 + "el": {
  1111 + "type": "text",
  1112 + "analyzer": "greek"
  1113 + },
  1114 + "hi": {
  1115 + "type": "text",
  1116 + "analyzer": "hindi"
  1117 + },
  1118 + "hu": {
  1119 + "type": "text",
  1120 + "analyzer": "hungarian"
  1121 + },
  1122 + "id": {
  1123 + "type": "text",
  1124 + "analyzer": "indonesian"
  1125 + },
  1126 + "it": {
  1127 + "type": "text",
  1128 + "analyzer": "italian"
  1129 + },
  1130 + "no": {
  1131 + "type": "text",
  1132 + "analyzer": "norwegian"
  1133 + },
  1134 + "fa": {
  1135 + "type": "text",
  1136 + "analyzer": "persian"
  1137 + },
  1138 + "pt": {
  1139 + "type": "text",
  1140 + "analyzer": "portuguese"
  1141 + },
  1142 + "ro": {
  1143 + "type": "text",
  1144 + "analyzer": "romanian"
  1145 + },
  1146 + "ru": {
  1147 + "type": "text",
  1148 + "analyzer": "russian"
  1149 + },
  1150 + "es": {
  1151 + "type": "text",
  1152 + "analyzer": "spanish"
  1153 + },
  1154 + "sv": {
  1155 + "type": "text",
  1156 + "analyzer": "swedish"
  1157 + },
  1158 + "tr": {
  1159 + "type": "text",
  1160 + "analyzer": "turkish"
  1161 + },
  1162 + "th": {
  1163 + "type": "text",
  1164 + "analyzer": "thai"
  1165 + }
  1166 + }
175 }, 1167 },
176 - "category_name_en": {  
177 - "type": "text",  
178 - "analyzer": "english",  
179 - "search_analyzer": "english" 1168 + "category_name_text": {
  1169 + "type": "object",
  1170 + "properties": {
  1171 + "ar": {
  1172 + "type": "text",
  1173 + "analyzer": "arabic"
  1174 + },
  1175 + "hy": {
  1176 + "type": "text",
  1177 + "analyzer": "armenian"
  1178 + },
  1179 + "eu": {
  1180 + "type": "text",
  1181 + "analyzer": "basque"
  1182 + },
  1183 + "pt_br": {
  1184 + "type": "text",
  1185 + "analyzer": "brazilian"
  1186 + },
  1187 + "bg": {
  1188 + "type": "text",
  1189 + "analyzer": "bulgarian"
  1190 + },
  1191 + "ca": {
  1192 + "type": "text",
  1193 + "analyzer": "catalan"
  1194 + },
  1195 + "zh": {
  1196 + "type": "text",
  1197 + "analyzer": "icu_analyzer"
  1198 + },
  1199 + "chinese": {
  1200 + "type": "text",
  1201 + "analyzer": "chinese"
  1202 + },
  1203 + "cjk": {
  1204 + "type": "text",
  1205 + "analyzer": "cjk"
  1206 + },
  1207 + "cs": {
  1208 + "type": "text",
  1209 + "analyzer": "czech"
  1210 + },
  1211 + "da": {
  1212 + "type": "text",
  1213 + "analyzer": "danish"
  1214 + },
  1215 + "nl": {
  1216 + "type": "text",
  1217 + "analyzer": "dutch"
  1218 + },
  1219 + "en": {
  1220 + "type": "text",
  1221 + "analyzer": "english"
  1222 + },
  1223 + "fi": {
  1224 + "type": "text",
  1225 + "analyzer": "finnish"
  1226 + },
  1227 + "fr": {
  1228 + "type": "text",
  1229 + "analyzer": "french"
  1230 + },
  1231 + "gl": {
  1232 + "type": "text",
  1233 + "analyzer": "galician"
  1234 + },
  1235 + "de": {
  1236 + "type": "text",
  1237 + "analyzer": "german"
  1238 + },
  1239 + "el": {
  1240 + "type": "text",
  1241 + "analyzer": "greek"
  1242 + },
  1243 + "hi": {
  1244 + "type": "text",
  1245 + "analyzer": "hindi"
  1246 + },
  1247 + "hu": {
  1248 + "type": "text",
  1249 + "analyzer": "hungarian"
  1250 + },
  1251 + "id": {
  1252 + "type": "text",
  1253 + "analyzer": "indonesian"
  1254 + },
  1255 + "it": {
  1256 + "type": "text",
  1257 + "analyzer": "italian"
  1258 + },
  1259 + "no": {
  1260 + "type": "text",
  1261 + "analyzer": "norwegian"
  1262 + },
  1263 + "fa": {
  1264 + "type": "text",
  1265 + "analyzer": "persian"
  1266 + },
  1267 + "pt": {
  1268 + "type": "text",
  1269 + "analyzer": "portuguese"
  1270 + },
  1271 + "ro": {
  1272 + "type": "text",
  1273 + "analyzer": "romanian"
  1274 + },
  1275 + "ru": {
  1276 + "type": "text",
  1277 + "analyzer": "russian"
  1278 + },
  1279 + "es": {
  1280 + "type": "text",
  1281 + "analyzer": "spanish"
  1282 + },
  1283 + "sv": {
  1284 + "type": "text",
  1285 + "analyzer": "swedish"
  1286 + },
  1287 + "tr": {
  1288 + "type": "text",
  1289 + "analyzer": "turkish"
  1290 + },
  1291 + "th": {
  1292 + "type": "text",
  1293 + "analyzer": "thai"
  1294 + }
  1295 + }
180 }, 1296 },
181 "category_id": { 1297 "category_id": {
182 "type": "keyword" 1298 "type": "keyword"
@@ -294,4 +1410,3 @@ @@ -294,4 +1410,3 @@
294 } 1410 }
295 } 1411 }
296 } 1412 }
297 -  
scripts/check_es_data.py
@@ -28,12 +28,12 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): @@ -28,12 +28,12 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
28 "size": size, 28 "size": size,
29 "_source": [ 29 "_source": [
30 "spu_id", 30 "spu_id",
31 - "title_zh", 31 + "title",
32 "category1_name", 32 "category1_name",
33 "category2_name", 33 "category2_name",
34 "category3_name", 34 "category3_name",
35 "category_name", 35 "category_name",
36 - "category_path_zh", 36 + "category_path",
37 "specifications", 37 "specifications",
38 "option1_name", 38 "option1_name",
39 "option2_name", 39 "option2_name",
@@ -51,14 +51,16 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): @@ -51,14 +51,16 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
51 51
52 for i, hit in enumerate(hits, 1): 52 for i, hit in enumerate(hits, 1):
53 source = hit.get('_source', {}) 53 source = hit.get('_source', {})
  54 + title_obj = source.get("title") or {}
  55 + category_path_obj = source.get("category_path") or {}
54 print(f"文档 {i}:") 56 print(f"文档 {i}:")
55 print(f" spu_id: {source.get('spu_id')}") 57 print(f" spu_id: {source.get('spu_id')}")
56 - print(f" title_zh: {source.get('title_zh', '')[:50]}") 58 + print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}")
57 print(f" category1_name: {source.get('category1_name')}") 59 print(f" category1_name: {source.get('category1_name')}")
58 print(f" category2_name: {source.get('category2_name')}") 60 print(f" category2_name: {source.get('category2_name')}")
59 print(f" category3_name: {source.get('category3_name')}") 61 print(f" category3_name: {source.get('category3_name')}")
60 print(f" category_name: {source.get('category_name')}") 62 print(f" category_name: {source.get('category_name')}")
61 - print(f" category_path_zh: {source.get('category_path_zh')}") 63 + print(f" category_path.zh: {category_path_obj.get('zh') if isinstance(category_path_obj, dict) else None}")
62 print(f" option1_name: {source.get('option1_name')}") 64 print(f" option1_name: {source.get('option1_name')}")
63 print(f" option2_name: {source.get('option2_name')}") 65 print(f" option2_name: {source.get('option2_name')}")
64 print(f" option3_name: {source.get('option3_name')}") 66 print(f" option3_name: {source.get('option3_name')}")
scripts/check_index_mapping.py
@@ -20,11 +20,19 @@ def check_field_mapping(mapping_dict, field_path): @@ -20,11 +20,19 @@ def check_field_mapping(mapping_dict, field_path):
20 current = mapping_dict 20 current = mapping_dict
21 21
22 for part in parts: 22 for part in parts:
23 - if isinstance(current, dict):  
24 - current = current.get(part)  
25 - if current is None:  
26 - return None  
27 - else: 23 + if not isinstance(current, dict):
  24 + return None
  25 +
  26 + # ES mapping nesting: object fields store subfields under "properties"
  27 + if "properties" in current and isinstance(current["properties"], dict):
  28 + current = current["properties"]
  29 +
  30 + # multi-fields store subfields under "fields" (e.g. vendor.zh.keyword)
  31 + if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]:
  32 + current = current["fields"]
  33 +
  34 + current = current.get(part)
  35 + if current is None:
28 return None 36 return None
29 return current 37 return current
30 38
@@ -70,12 +78,13 @@ def main(): @@ -70,12 +78,13 @@ def main():
70 78
71 # 检查关键字段 79 # 检查关键字段
72 fields_to_check = [ 80 fields_to_check = [
73 - 'title_zh',  
74 - 'brief_zh',  
75 - 'description_zh',  
76 - 'vendor_zh',  
77 - 'category_path_zh',  
78 - 'category_name_zh' 81 + "title.zh",
  82 + "brief.zh",
  83 + "description.zh",
  84 + "vendor.zh",
  85 + "vendor.zh.keyword",
  86 + "category_path.zh",
  87 + "category_name_text.zh"
79 ] 88 ]
80 89
81 print("=" * 80) 90 print("=" * 80)
@@ -83,7 +92,7 @@ def main(): @@ -83,7 +92,7 @@ def main():
83 print("=" * 80) 92 print("=" * 80)
84 93
85 for field_name in fields_to_check: 94 for field_name in fields_to_check:
86 - field_mapping = index_mapping.get(field_name) 95 + field_mapping = check_field_mapping(index_mapping, field_name)
87 96
88 if field_mapping is None: 97 if field_mapping is None:
89 print(f"\n❌ {field_name}: 字段不存在") 98 print(f"\n❌ {field_name}: 字段不存在")
search/es_query_builder.py
@@ -367,37 +367,37 @@ class ESQueryBuilder: @@ -367,37 +367,37 @@ class ESQueryBuilder:
367 """ 367 """
368 if language == 'zh': 368 if language == 'zh':
369 all_fields = [ 369 all_fields = [
370 - "title_zh^3.0",  
371 - "brief_zh^1.5",  
372 - "description_zh",  
373 - "vendor_zh^1.5", 370 + "title.zh^3.0",
  371 + "brief.zh^1.5",
  372 + "description.zh",
  373 + "vendor.zh^1.5",
374 "tags", 374 "tags",
375 - "category_path_zh^1.5",  
376 - "category_name_zh^1.5", 375 + "category_path.zh^1.5",
  376 + "category_name_text.zh^1.5",
377 "option1_values^0.5" 377 "option1_values^0.5"
378 ] 378 ]
379 core_fields = [ 379 core_fields = [
380 - "title_zh^3.0",  
381 - "brief_zh^1.5",  
382 - "vendor_zh^1.5",  
383 - "category_name_zh^1.5" 380 + "title.zh^3.0",
  381 + "brief.zh^1.5",
  382 + "vendor.zh^1.5",
  383 + "category_name_text.zh^1.5"
384 ] 384 ]
385 else: # en 385 else: # en
386 all_fields = [ 386 all_fields = [
387 - "title_en^3.0",  
388 - "brief_en^1.5",  
389 - "description_en",  
390 - "vendor_en^1.5", 387 + "title.en^3.0",
  388 + "brief.en^1.5",
  389 + "description.en",
  390 + "vendor.en^1.5",
391 "tags", 391 "tags",
392 - "category_path_en^1.5",  
393 - "category_name_en^1.5", 392 + "category_path.en^1.5",
  393 + "category_name_text.en^1.5",
394 "option1_values^0.5" 394 "option1_values^0.5"
395 ] 395 ]
396 core_fields = [ 396 core_fields = [
397 - "title_en^3.0",  
398 - "brief_en^1.5",  
399 - "vendor_en^1.5",  
400 - "category_name_en^1.5" 397 + "title.en^3.0",
  398 + "brief.en^1.5",
  399 + "vendor.en^1.5",
  400 + "category_name_text.en^1.5"
401 ] 401 ]
402 return all_fields, core_fields 402 return all_fields, core_fields
403 403
@@ -28,7 +28,7 @@ def sample_index_config() -&gt; IndexConfig: @@ -28,7 +28,7 @@ def sample_index_config() -&gt; IndexConfig:
28 return IndexConfig( 28 return IndexConfig(
29 name="default", 29 name="default",
30 label="默认索引", 30 label="默认索引",
31 - fields=["title_zh", "brief_zh", "tags"], 31 + fields=["title.zh", "brief.zh", "tags"],
32 boost=1.0 32 boost=1.0
33 ) 33 )
34 34
@@ -60,10 +60,10 @@ def sample_search_config(sample_index_config) -&gt; SearchConfig: @@ -60,10 +60,10 @@ def sample_search_config(sample_index_config) -&gt; SearchConfig:
60 es_index_name="test_products", 60 es_index_name="test_products",
61 field_boosts={ 61 field_boosts={
62 "tenant_id": 1.0, 62 "tenant_id": 1.0,
63 - "title_zh": 3.0,  
64 - "brief_zh": 1.5, 63 + "title.zh": 3.0,
  64 + "brief.zh": 1.5,
65 "tags": 1.0, 65 "tags": 1.0,
66 - "category_path_zh": 1.5, 66 + "category_path.zh": 1.5,
67 }, 67 },
68 indexes=[sample_index_config], 68 indexes=[sample_index_config],
69 query_config=query_config, 69 query_config=query_config,
@@ -89,8 +89,8 @@ def mock_es_client() -&gt; Mock: @@ -89,8 +89,8 @@ def mock_es_client() -&gt; Mock:
89 "_id": "1", 89 "_id": "1",
90 "_score": 2.5, 90 "_score": 2.5,
91 "_source": { 91 "_source": {
92 - "title_zh": "红色连衣裙",  
93 - "vendor_zh": "测试品牌", 92 + "title": {"zh": "红色连衣裙"},
  93 + "vendor": {"zh": "测试品牌"},
94 "min_price": 299.0, 94 "min_price": 299.0,
95 "category_id": "1" 95 "category_id": "1"
96 } 96 }
@@ -99,8 +99,8 @@ def mock_es_client() -&gt; Mock: @@ -99,8 +99,8 @@ def mock_es_client() -&gt; Mock:
99 "_id": "2", 99 "_id": "2",
100 "_score": 2.2, 100 "_score": 2.2,
101 "_source": { 101 "_source": {
102 - "title_zh": "蓝色连衣裙",  
103 - "vendor_zh": "测试品牌", 102 + "title": {"zh": "蓝色连衣裙"},
  103 + "vendor": {"zh": "测试品牌"},
104 "min_price": 399.0, 104 "min_price": 399.0,
105 "category_id": "1" 105 "category_id": "1"
106 } 106 }
@@ -142,8 +142,8 @@ def sample_search_results() -&gt; Dict[str, Any]: @@ -142,8 +142,8 @@ def sample_search_results() -&gt; Dict[str, Any]:
142 "query": "红色连衣裙", 142 "query": "红色连衣裙",
143 "expected_total": 2, 143 "expected_total": 2,
144 "expected_products": [ 144 "expected_products": [
145 - {"title_zh": "红色连衣裙", "min_price": 299.0},  
146 - {"title_zh": "蓝色连衣裙", "min_price": 399.0} 145 + {"title": "红色连衣裙", "min_price": 299.0},
  146 + {"title": "蓝色连衣裙", "min_price": 399.0}
147 ] 147 ]
148 } 148 }
149 149
@@ -157,16 +157,16 @@ def temp_config_file() -&gt; Generator[str, None, None]: @@ -157,16 +157,16 @@ def temp_config_file() -&gt; Generator[str, None, None]:
157 config_data = { 157 config_data = {
158 "es_index_name": "test_products", 158 "es_index_name": "test_products",
159 "field_boosts": { 159 "field_boosts": {
160 - "title_zh": 3.0,  
161 - "brief_zh": 1.5, 160 + "title.zh": 3.0,
  161 + "brief.zh": 1.5,
162 "tags": 1.0, 162 "tags": 1.0,
163 - "category_path_zh": 1.5 163 + "category_path.zh": 1.5
164 }, 164 },
165 "indexes": [ 165 "indexes": [
166 { 166 {
167 "name": "default", 167 "name": "default",
168 "label": "默认索引", 168 "label": "默认索引",
169 - "fields": ["title_zh", "brief_zh", "tags"], 169 + "fields": ["title.zh", "brief.zh", "tags"],
170 "boost": 1.0 170 "boost": 1.0
171 } 171 }
172 ], 172 ],