diff --git a/CLAUDE.md b/CLAUDE.md
index 78a26b6..c617e62 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -256,12 +256,12 @@ The system uses centralized configuration through `config/config.yaml`:
{
"tenant_id": "keyword", // Multi-tenant isolation
"spu_id": "keyword", // Product identifier
- "title_zh/en": "text", // Multi-language titles
- "brief_zh/en": "text", // Short descriptions
- "description_zh/en": "text", // Detailed descriptions
- "vendor_zh/en": "text", // Supplier/brand with keyword subfield
- "category_path_zh/en": "text", // Hierarchical category paths
- "category_name_zh/en": "text", // Category names for search
+ "title.zh/en": "text", // Multi-language titles
+ "brief.zh/en": "text", // Short descriptions
+ "description.zh/en": "text", // Detailed descriptions
+ "vendor.zh/en": "text", // Supplier/brand with keyword subfield
+ "category_path.zh/en": "text", // Hierarchical category paths
+ "category_name_text.zh/en": "text", // Category names for search
"category1/2/3_name": "keyword", // Multi-level category filtering
"tags": "keyword", // Product tags
"specifications": "nested", // Product variants (color, size, etc.)
@@ -318,11 +318,11 @@ The system uses centralized configuration through `config/config.yaml`:
**Field Boost Configuration**:
```yaml
field_boosts:
- title_zh/en: 3.0 # Highest priority
- brief_zh/en: 1.5 # Medium priority
- description_zh/en: 1.0 # Lower priority
- vendor_zh/en: 1.5 # Brand emphasis
- category_path_zh/en: 1.5 # Category relevance
+ title.zh/en: 3.0 # Highest priority
+ brief.zh/en: 1.5 # Medium priority
+ description.zh/en: 1.0 # Lower priority
+ vendor.zh/en: 1.5 # Brand emphasis
+ category_path.zh/en: 1.5 # Category relevance
tags: 1.0 # Tag matching
```
diff --git a/README.md b/README.md
index 972a7bb..1eef882 100644
--- a/README.md
+++ b/README.md
@@ -15,8 +15,8 @@
"query" : {
"dis_max" : {
"queries" : [
- {"match" : { "title_en" : xxx }},
- {"match" : { "title_zh" : xxx }},
+ {"match" : { "title.en" : xxx }},
+ {"match" : { "title.zh" : xxx }},
{"match" : { "title_xx" : xxx }}
],
"tie_breakler" : 0.8
diff --git a/api/models.py b/api/models.py
index 0b5ab3c..661c24d 100644
--- a/api/models.py
+++ b/api/models.py
@@ -86,7 +86,7 @@ class SearchRequest(BaseModel):
"examples": [
{
"category_name": ["手机", "电子产品"],
- "vendor_zh.keyword": "奇乐",
+ "vendor.zh.keyword": "奇乐",
"specifications": {"name": "颜色", "value": "白色"}
},
{
diff --git a/api/result_formatter.py b/api/result_formatter.py
index f3fd9f7..56e2b1e 100644
--- a/api/result_formatter.py
+++ b/api/result_formatter.py
@@ -32,13 +32,22 @@ class ResultFormatter:
lang = "en"
def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
- """从 *_zh / *_en 字段中按语言选择一个值,若目标语言缺失则回退到另一种。"""
- zh_val = src.get(f"{base}_zh")
- en_val = src.get(f"{base}_en")
+ """
+ 从多语言对象字段中按语言选择一个值:
+ - 新结构: {base: {"zh": "...", "en": "...", ...}}
+ - 兼容旧结构: {base_zh: "...", base_en: "..."}
+ 若目标语言缺失则回退到另一种。
+ """
+ obj = src.get(base)
+ if isinstance(obj, dict):
+ zh_val = obj.get("zh")
+ en_val = obj.get("en")
+ else:
+ zh_val = src.get(f"{base}_zh")
+ en_val = src.get(f"{base}_en")
if lang == "zh":
return zh_val or en_val
- else:
- return en_val or zh_val
+ return en_val or zh_val
for hit in es_hits:
source = hit.get('_source', {})
@@ -60,7 +69,7 @@ class ResultFormatter:
description = pick_lang_field(source, "description")
vendor = pick_lang_field(source, "vendor")
category_path = pick_lang_field(source, "category_path")
- category_name = pick_lang_field(source, "category_name")
+ category_name = pick_lang_field(source, "category_name_text") or source.get("category_name")
# Extract SKUs
skus = []
diff --git a/config/config.yaml b/config/config.yaml
index 7296ff5..25eefbe 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -15,20 +15,20 @@ es_settings:
# 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义)
field_boosts:
# 文本相关性字段
- title_zh: 3.0
- brief_zh: 1.5
- description_zh: 1.0
- vendor_zh: 1.5
- title_en: 3.0
- brief_en: 1.5
- description_en: 1.0
- vendor_en: 1.5
+ "title.zh": 3.0
+ "brief.zh": 1.5
+ "description.zh": 1.0
+ "vendor.zh": 1.5
+ "title.en": 3.0
+ "brief.en": 1.5
+ "description.en": 1.0
+ "vendor.en": 1.5
# 分类相关字段
- category_path_zh: 1.5
- category_name_zh: 1.5
- category_path_en: 1.5
- category_name_en: 1.5
+ "category_path.zh": 1.5
+ "category_name_text.zh": 1.5
+ "category_path.en": 1.5
+ "category_name_text.en": 1.5
# 标签和属性值字段
tags: 1.0
@@ -42,33 +42,33 @@ indexes:
- name: "default"
label: "默认搜索"
fields:
- - "title_zh"
- - "brief_zh"
- - "description_zh"
- - "vendor_zh"
+ - "title.zh"
+ - "brief.zh"
+ - "description.zh"
+ - "vendor.zh"
- "tags"
- - "category_path_zh"
- - "category_name_zh"
+ - "category_path.zh"
+ - "category_name_text.zh"
- "option1_values"
boost: 1.0
- name: "title"
label: "标题搜索"
fields:
- - "title_zh"
+ - "title.zh"
boost: 2.0
- name: "vendor"
label: "品牌搜索"
fields:
- - "vendor_zh"
+ - "vendor.zh"
boost: 1.5
- name: "category"
label: "类目搜索"
fields:
- - "category_path_zh"
- - "category_name_zh"
+ - "category_path.zh"
+ - "category_name_text.zh"
boost: 1.5
- name: "tags"
diff --git a/config/utils.py b/config/utils.py
index 0bd7ff1..b0247e0 100644
--- a/config/utils.py
+++ b/config/utils.py
@@ -17,7 +17,7 @@ def get_match_fields_for_index(config: SearchConfig, index_name: str = "default"
index_name: Name of the index domain (default: "default")
Returns:
- List of field names with boost, e.g., ["title_zh^3.0", "brief_zh^1.5"]
+ List of field names with boost, e.g., ["title.zh^3.0", "brief.zh^1.5"]
"""
# Find the index config
index_config = None
diff --git a/docs/MySQL到ES字段映射说明-业务版.md b/docs/MySQL到ES字段映射说明-业务版.md
index 33c9ed1..7ca7260 100644
--- a/docs/MySQL到ES字段映射说明-业务版.md
+++ b/docs/MySQL到ES字段映射说明-业务版.md
@@ -28,14 +28,14 @@
| ES 字段 | 数据来源表 | 表中字段 | 转换说明 |
|---------|-----------|----------|----------|
-| `title_zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 |
-| `title_en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 |
-| `brief_zh` | SPU 表 | `brief` | 同上 |
-| `brief_en` | SPU 表 | `brief` | 同上 |
-| `description_zh` | SPU 表 | `description` | 同上 |
-| `description_en` | SPU 表 | `description` | 同上 |
-| `vendor_zh` | SPU 表 | `vendor` | 同上 |
-| `vendor_en` | SPU 表 | `vendor` | 同上 |
+| `title.zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 |
+| `title.en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 |
+| `brief.zh` | SPU 表 | `brief` | 同上 |
+| `brief.en` | SPU 表 | `brief` | 同上 |
+| `description.zh` | SPU 表 | `description` | 同上 |
+| `description.en` | SPU 表 | `description` | 同上 |
+| `vendor.zh` | SPU 表 | `vendor` | 同上 |
+| `vendor.en` | SPU 表 | `vendor` | 同上 |
**翻译规则:**
- 根据租户配置的 `primary_language` 确定主语言
@@ -104,7 +104,7 @@ category_ids = ["1", "2", "3"]
| ES 字段 | 数据来源 | 转换说明 |
|---------|----------|----------|
-| `category_path_zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` |
+| `category_path.zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` |
| `category1_name` | 类目名称列表[0] | 一级类目:`"电子产品"` |
| `category2_name` | 类目名称列表[1] | 二级类目:`"手机"` |
| `category3_name` | 类目名称列表[2] | 三级类目:`"iPhone"` |
@@ -300,13 +300,13 @@ sku_weight_units = 去重后的重量单位列表
| ES 字段 | 数据来源 | 转换说明 |
|---------|----------|----------|
-| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量
优先使用 `title_en`,如果没有则使用 `title_zh` |
+| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量
优先使用 `title.en`,如果没有则使用 `title.zh` |
**生成逻辑:**
```
如果启用向量搜索:
- 文本 = title_en 或 title_zh
+ 文本 = title.en 或 title.zh
向量 = 文本编码器.encode(文本)
title_embedding = 向量(1024 维浮点数组)
```
@@ -322,17 +322,17 @@ sku_weight_units = 去重后的重量单位列表
| **基础字段** |
| `tenant_id` | SPU | `tenant_id` | 租户ID |
| `spu_id` | SPU | `id` | 商品ID |
-| `title_zh/en` | SPU | `title` | 标题(多语言) |
-| `brief_zh/en` | SPU | `brief` | 简介(多语言) |
-| `description_zh/en` | SPU | `description` | 描述(多语言) |
-| `vendor_zh/en` | SPU | `vendor` | 品牌(多语言) |
+| `title.zh/en` | SPU | `title` | 标题(多语言) |
+| `brief.zh/en` | SPU | `brief` | 简介(多语言) |
+| `description.zh/en` | SPU | `description` | 描述(多语言) |
+| `vendor.zh/en` | SPU | `vendor` | 品牌(多语言) |
| `tags` | SPU | `tags` | 标签数组 |
| `image_url` | SPU | `image_src` | 主图URL |
| `sales` | SPU | `fake_sales` | 销量 |
| `create_time` | SPU | `create_time` | 创建时间 |
| `update_time` | SPU | `update_time` | 更新时间 |
| **类别字段** |
-| `category_path_zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 |
+| `category_path.zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 |
| `category1_name` | SPU + 类目映射 | `category_path` → 类目名称[0] | 一级类目 |
| `category2_name` | SPU + 类目映射 | `category_path` → 类目名称[1] | 二级类目 |
| `category3_name` | SPU + 类目映射 | `category_path` → 类目名称[2] | 三级类目 |
@@ -451,14 +451,14 @@ GET /search_products/_search
"_source": {
"tenant_id": "162",
"spu_id": "74174",
- "title_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
- "title_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
- "brief_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
- "brief_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging",
- "description_zh": "
实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】
",
- "description_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]
实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】",
+ "description.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]
0:
@@ -358,7 +358,7 @@ if category_names:
```python
elif pd.notna(spu_row.get('category')):
category = str(spu_row['category'])
- doc['category_name_zh'] = category
+ doc['category_name_text.zh'] = category
# 尝试从 category 字段解析多级分类(如果包含 "/")
if '/' in category:
@@ -375,8 +375,8 @@ elif pd.notna(spu_row.get('category')):
| ES 字段 | 类型 | 说明 |
|---------|------|------|
-| `category_path_zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") |
-| `category_path_en` | text | 类目路径英文(暂未实现) |
+| `category_path.zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") |
+| `category_path.en` | text | 类目路径英文(暂未实现) |
| `category1_name` | keyword | 一级类目名称 |
| `category2_name` | keyword | 二级类目名称 |
| `category3_name` | keyword | 三级类目名称 |
@@ -673,7 +673,7 @@ if enable_embedding and encoder and documents:
title_texts = []
title_doc_indices = []
for i, (_, doc) in enumerate(documents):
- title_text = doc.get("title_en") or doc.get("title_zh")
+ title_text = doc.get("title.en") or doc.get("title.zh")
if title_text and str(title_text).strip():
title_texts.append(str(title_text))
title_doc_indices.append(i)
@@ -858,16 +858,16 @@ spu_df["_is_deleted"] = spu_df["deleted"].apply(_is_deleted_value)
{
"tenant_id": "1",
"spu_id": "12345",
- "title_zh": "iPhone 15 Pro Max",
- "title_en": "iPhone 15 Pro Max",
- "brief_zh": "最新款 iPhone",
- "brief_en": "Latest iPhone",
- "description_zh": "详细描述...",
- "description_en": "Detailed description...",
- "vendor_zh": "Apple",
- "vendor_en": "Apple",
+ "title.zh": "iPhone 15 Pro Max",
+ "title.en": "iPhone 15 Pro Max",
+ "brief.zh": "最新款 iPhone",
+ "brief.en": "Latest iPhone",
+ "description.zh": "详细描述...",
+ "description.en": "Detailed description...",
+ "vendor.zh": "Apple",
+ "vendor.en": "Apple",
"tags": ["手机", "智能手机", "Apple"],
- "category_path_zh": "电子产品/手机/iPhone",
+ "category_path.zh": "电子产品/手机/iPhone",
"category1_name": "电子产品",
"category2_name": "手机",
"category3_name": "iPhone",
diff --git a/docs/Search-API-Examples.md b/docs/Search-API-Examples.md
index c8efa67..2427edb 100644
--- a/docs/Search-API-Examples.md
+++ b/docs/Search-API-Examples.md
@@ -132,7 +132,7 @@ curl -X POST "http://localhost:6002/search/" \
"language": "zh",
"filters": {
"category_name": "手机",
- "vendor_zh.keyword": "奇乐"
+ "vendor.zh.keyword": "奇乐"
}
}'
```
@@ -294,7 +294,7 @@ curl -X POST "http://localhost:6002/search/" \
"language": "zh",
"filters": {
"category_name": ["手机", "电子产品"],
- "vendor_zh.keyword": "品牌A"
+ "vendor.zh.keyword": "品牌A"
},
"range_filters": {
"min_price": {
diff --git a/docs/基础配置指南.md b/docs/基础配置指南.md
index ca67edd..6e060fe 100644
--- a/docs/基础配置指南.md
+++ b/docs/基础配置指南.md
@@ -26,12 +26,12 @@
- `create_time`, `update_time` (date) - 时间字段
#### 多语言文本字段
-- `title_zh`, `title_en` (text) - 标题(中英文)
-- `brief_zh`, `brief_en` (text) - 短描述(中英文)
-- `description_zh`, `description_en` (text) - 详细描述(中英文)
-- `vendor_zh`, `vendor_en` (text) - 供应商/品牌(中英文,含keyword子字段)
-- `category_path_zh`, `category_path_en` (text) - 类目路径(中英文)
-- `category_name_zh`, `category_name_en` (text) - 类目名称(中英文)
+- `title.zh`, `title.en` (text) - 标题(中英文)
+- `brief.zh`, `brief.en` (text) - 短描述(中英文)
+- `description.zh`, `description.en` (text) - 详细描述(中英文)
+- `vendor.zh`, `vendor.en` (text) - 供应商/品牌(中英文,含keyword子字段)
+- `category_path.zh`, `category_path.en` (text) - 类目路径(中英文)
+- `category_name_text.zh`, `category_name_text.en` (text) - 类目名称(中英文)
#### 类目字段
- `category_id` (keyword) - 类目ID
@@ -64,12 +64,12 @@
### 文本召回字段
默认同时搜索以下字段(中英文都包含):
-- `title_zh^3.0`, `title_en^3.0`
-- `brief_zh^1.5`, `brief_en^1.5`
-- `description_zh^1.0`, `description_en^1.0`
-- `vendor_zh^1.5`, `vendor_en^1.5`
-- `category_path_zh^1.5`, `category_path_en^1.5`
-- `category_name_zh^1.5`, `category_name_en^1.5`
+- `title.zh^3.0`, `title.en^3.0`
+- `brief.zh^1.5`, `brief.en^1.5`
+- `description.zh^1.0`, `description.en^1.0`
+- `vendor.zh^1.5`, `vendor.en^1.5`
+- `category_path.zh^1.5`, `category_path.en^1.5`
+- `category_name_text.zh^1.5`, `category_name_text.en^1.5`
- `tags^1.0`
### 查询架构
@@ -126,12 +126,12 @@
- `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
映射规则:
-- `title_zh/en` → `title`
-- `brief_zh/en` → `brief`
-- `description_zh/en` → `description`
-- `vendor_zh/en` → `vendor`
-- `category_path_zh/en` → `category_path`
-- `category_name_zh/en` → `category_name`
+- `title.zh/en` → `title`
+- `brief.zh/en` → `brief`
+- `description.zh/en` → `description`
+- `vendor.zh/en` → `vendor`
+- `category_path.zh/en` → `category_path`
+- `category_name_text.zh/en` → `category_name`
## 配置修改
diff --git a/docs/常用查询 - ES.md b/docs/常用查询 - ES.md
index e2f4f23..db51ad1 100644
--- a/docs/常用查询 - ES.md
+++ b/docs/常用查询 - ES.md
@@ -22,7 +22,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_
curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
"size": 100,
- "_source": ["title_zh", "title_en"],
+ "_source": ["title"],
"query": {
"bool": {
"filter": [
@@ -34,13 +34,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/
curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
"size": 1,
- "_source": ["title_zh", "title_en"],
+ "_source": ["title"],
"query": {
"bool": {
"must": [
{
"match": {
- "title_zh": {
+ "title.zh": {
"query": "裙子"
}
}
@@ -54,7 +54,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/
}'
curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{
- "analyzer": "query_ansj",
+ "analyzer": "icu_analyzer",
"text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
}'
@@ -73,13 +73,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/
"multi_match": {
"_name": "base_query",
"fields": [
- "title_zh^3.0",
- "brief_zh^1.5",
- "description_zh",
- "vendor_zh^1.5",
+ "title.zh^3.0",
+ "brief.zh^1.5",
+ "description.zh",
+ "vendor.zh^1.5",
"tags",
- "category_path_zh^1.5",
- "category_name_zh^1.5",
+ "category_path.zh^1.5",
+ "category_name_text.zh^1.5",
"option1_values^0.5"
],
"minimum_should_match": "75%",
@@ -110,13 +110,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/
"multi_match": {
"_name": "base_query",
"fields": [
- "title_zh^3.0",
- "brief_zh^1.5",
- "description_zh",
- "vendor_zh^1.5",
+ "title.zh^3.0",
+ "brief.zh^1.5",
+ "description.zh",
+ "vendor.zh^1.5",
"tags",
- "category_path_zh^1.5",
- "category_name_zh^1.5",
+ "category_path.zh^1.5",
+ "category_name_text.zh^1.5",
"option1_values^0.5"
],
"minimum_should_match": "75%",
@@ -271,7 +271,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s
"size": 1,
"_source": [
"spu_id",
- "title_zh",
+ "title",
"category1_name",
"category2_name",
"category3_name",
@@ -552,7 +552,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s
}
},
"size": 1,
- "_source": ["spu_id", "title_zh", "specifications"]
+ "_source": ["spu_id", "title", "specifications"]
}'
## 4. 统计查询
@@ -597,7 +597,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s
}
},
"size": 10,
- "_source": ["spu_id", "title_zh", "category_name_zh", "category_path_zh"]
+ "_source": ["spu_id", "title", "category_name_text", "category_path"]
}'
### 5.2 查找有option但没有specifications的文档(数据转换问题)
@@ -614,7 +614,32 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s
}
},
"size": 10,
- "_source": ["spu_id", "title_zh", "option1_name", "option2_name", "option3_name", "specifications"]
+ "_source": ["spu_id", "title", "option1_name", "option2_name", "option3_name", "specifications"]
}'
+重排序:
+GET /search_products/_search
+{
+ "query": {
+ "match": {
+ "title.en": {
+ "query": "quick brown fox",
+ "minimum_should_match": "90%"
+ }
+ }
+ },
+ "rescore": {
+ "window_size": 50,
+ "query": {
+ "rescore_query": {
+ "match_phrase": {
+ "title.en": {
+ "query": "quick brown fox",
+ "slop": 50
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md
index 21dd5dd..09f72d7 100644
--- a/docs/搜索API对接指南.md
+++ b/docs/搜索API对接指南.md
@@ -203,7 +203,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \
"category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个
"category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个
"category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个
- "vendor_zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个
+ "vendor.zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个
"tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个
// specifications 嵌套过滤(特殊格式)
"specifications": {
@@ -274,7 +274,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \
- `category_name`: 类目名称
- `category1_name`, `category2_name`, `category3_name`: 多级类目
- `category_id`: 类目ID
-- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段)
+- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
- `tags`: 标签(keyword类型,支持数组)
- `option1_name`, `option2_name`, `option3_name`: 选项名称
- `specifications`: 规格过滤(嵌套字段,格式见上文)
@@ -602,7 +602,9 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5"
{
"id": "12345",
"source": {
- "title_zh": "芭比时尚娃娃",
+ "title": {
+ "zh": "芭比时尚娃娃"
+ },
"min_price": 89.99,
"category1_name": "玩具"
}
@@ -739,7 +741,7 @@ curl "http://localhost:6002/search/12345"
| 字段 | 类型 | 说明 |
|------|------|------|
| `spu_id` | string | SPU ID |
-| `title` | string | 商品标题(根据language参数自动选择title_zh或title_en) |
+| `title` | string | 商品标题(根据language参数自动选择 `title.zh` 或 `title.en`) |
| `brief` | string | 商品短描述(根据language参数自动选择) |
| `description` | string | 商品详细描述(根据language参数自动选择) |
| `vendor` | string | 供应商/品牌(根据language参数自动选择) |
@@ -1088,7 +1090,9 @@ curl -X POST "http://localhost:6004/indexer/index" \
"document": {
"tenant_id": "162",
"spu_id": "123",
- "title_zh": "商品标题",
+ "title": {
+ "zh": "商品标题"
+ },
...
}
},
@@ -1298,7 +1302,7 @@ curl -X GET "http://localhost:6004/indexer/health"
"size": 20,
"language": "zh",
"filters": {
- "vendor_zh.keyword": ["品牌A", "品牌B"]
+ "vendor.zh.keyword": ["品牌A", "品牌B"]
},
"range_filters": {
"min_price": {
@@ -1505,12 +1509,12 @@ curl -X GET "http://localhost:6004/indexer/health"
|--------|------|------|
| `tenant_id` | keyword | 租户ID(多租户隔离) |
| `spu_id` | keyword | SPU ID |
-| `title_zh`, `title_en` | text | 商品标题(中英文) |
-| `brief_zh`, `brief_en` | text | 商品短描述(中英文) |
-| `description_zh`, `description_en` | text | 商品详细描述(中英文) |
-| `vendor_zh`, `vendor_en` | text | 供应商/品牌(中英文,含keyword子字段) |
-| `category_path_zh`, `category_path_en` | text | 类目路径(中英文,用于搜索) |
-| `category_name_zh`, `category_name_en` | text | 类目名称(中英文,用于搜索) |
+| `title.` | object/text | 商品标题(多语言对象,如 `title.zh`, `title.en`) |
+| `brief.` | object/text | 商品短描述(多语言对象,如 `brief.zh`, `brief.en`) |
+| `description.` | object/text | 商品详细描述(多语言对象,如 `description.zh`, `description.en`) |
+| `vendor.` | object/text | 供应商/品牌(多语言对象,且带 keyword 子字段,如 `vendor.zh.keyword`) |
+| `category_path.` | object/text | 类目路径(多语言对象,用于搜索,如 `category_path.zh`) |
+| `category_name_text.` | object/text | 类目名称(多语言对象,用于搜索,如 `category_name_text.zh`) |
| `category_id` | keyword | 类目ID |
| `category_name` | keyword | 类目名称(用于过滤) |
| `category_level` | integer | 类目层级 |
@@ -1552,7 +1556,7 @@ curl -X GET "http://localhost:6004/indexer/health"
- `category_name`: 类目名称
- `category1_name`, `category2_name`, `category3_name`: 多级类目
- `category_id`: 类目ID
-- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段)
+- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
- `tags`: 标签(keyword类型)
- `option1_name`, `option2_name`, `option3_name`: 选项名称
- `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解))
diff --git a/docs/搜索API速查表.md b/docs/搜索API速查表.md
index 0c313c2..e485230 100644
--- a/docs/搜索API速查表.md
+++ b/docs/搜索API速查表.md
@@ -19,7 +19,7 @@ POST /search/
"filters": {
"category_name": "手机", // 单值
"category1_name": "服装", // 一级类目
- "vendor_zh.keyword": ["奇乐", "品牌A"], // 多值(OR)
+ "vendor.zh.keyword": ["奇乐", "品牌A"], // 多值(OR)
"tags": "手机", // 标签
// specifications 嵌套过滤
"specifications": {
diff --git a/docs/相关性检索优化说明.md b/docs/相关性检索优化说明.md
index 638029a..f3eeccf 100644
--- a/docs/相关性检索优化说明.md
+++ b/docs/相关性检索优化说明.md
@@ -20,7 +20,7 @@
{
"multi_match": {
"query": "戏水动物",
- "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
+ "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
"minimum_should_match": "67%",
"tie_breaker": 0.9,
"boost": 1,
@@ -40,7 +40,7 @@
{
"multi_match": {
"_name": "base_query",
- "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
+ "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
"minimum_should_match": "75%",
"operator": "AND",
"query": "戏水动物",
@@ -51,7 +51,7 @@
"multi_match": {
"_name": "base_query_trans_en",
"boost": 0.4,
- "fields": ["title_en^3.0", ...],
+ "fields": ["title.en^3.0", ...],
"minimum_should_match": "75%",
"operator": "AND",
"query": "water sports",
@@ -61,7 +61,7 @@
{
"multi_match": {
"query": "戏水动物",
- "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
+ "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
"type": "phrase",
"slop": 2,
"boost": 1.0,
@@ -71,7 +71,7 @@
{
"multi_match": {
"query": "戏水 动物",
- "fields": ["title_zh^3.0", "brief_zh^1.5", ...],
+ "fields": ["title.zh^3.0", "brief.zh^1.5", ...],
"operator": "AND",
"tie_breaker": 0.9,
"boost": 0.1,
diff --git a/docs/系统设计文档.md b/docs/系统设计文档.md
index 3aeaeb4..1b9890d 100644
--- a/docs/系统设计文档.md
+++ b/docs/系统设计文档.md
@@ -31,18 +31,18 @@
{
"tenant_id": "1",
"spu_id": "123",
- "title_zh": "蓝牙耳机",
- "title_en": "Bluetooth Headphones",
- "brief_zh": "高品质蓝牙耳机",
- "brief_en": "High-quality Bluetooth headphones",
+ "title.zh": "蓝牙耳机",
+ "title.en": "Bluetooth Headphones",
+ "brief.zh": "高品质蓝牙耳机",
+ "brief.en": "High-quality Bluetooth headphones",
"category_name": "电子产品",
- "category_path_zh": "电子产品/音频设备/耳机",
- "category_path_en": "Electronics/Audio/Headphones",
+ "category_path.zh": "电子产品/音频设备/耳机",
+ "category_path.en": "Electronics/Audio/Headphones",
"category1_name": "电子产品",
"category2_name": "音频设备",
"category3_name": "耳机",
- "vendor_zh": "品牌A",
- "vendor_en": "Brand A",
+ "vendor.zh": "品牌A",
+ "vendor.en": "Brand A",
"min_price": 199.99,
"max_price": 299.99,
"option1_name": "color",
@@ -93,7 +93,7 @@
- **查询配置硬编码**:查询相关配置(字段 boost、查询域等)硬编码在 `search/query_config.py`
**索引结构特点**:
-1. **多语言字段**:所有文本字段支持中英文(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`)
+1. **多语言字段**:所有文本字段支持中英文(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`)
2. **嵌套字段**:
- `skus`: SKU 嵌套数组(包含价格、库存、选项值等)
- `specifications`: 规格嵌套数组(包含 name、value、sku_id)
@@ -125,12 +125,12 @@
- `create_time`, `update_time` (date): 创建和更新时间
#### 多语言文本字段
-- `title_zh/en` (text): 标题(中英文)
-- `brief_zh/en` (text): 短描述(中英文)
-- `description_zh/en` (text): 详细描述(中英文)
-- `vendor_zh/en` (text): 供应商/品牌(中英文)
-- `category_path_zh/en` (text): 类目路径(中英文)
-- `category_name_zh/en` (text): 类目名称(中英文)
+- `title.zh/en` (text): 标题(中英文)
+- `brief.zh/en` (text): 短描述(中英文)
+- `description.zh/en` (text): 详细描述(中英文)
+- `vendor.zh/en` (text): 供应商/品牌(中英文)
+- `category_path.zh/en` (text): 类目路径(中英文)
+- `category_name_text.zh/en` (text): 类目名称(中英文)
**分析器配置**:
- 中文字段:`hanlp_index`(索引时)/ `hanlp_standard`(查询时)
@@ -282,7 +282,7 @@ indexes:
2. **数据转换**(`indexer/spu_transformer.py`):
- 按`spu_id`和`tenant_id`关联SPU和SKU数据
- **多语言字段映射**:
- - MySQL的`title` → ES的`title_zh`(英文字段设为空)
+ - MySQL的`title` → ES的`title.zh`(英文字段设为空)
- 其他文本字段类似处理
- **分类字段映射**:
- 从SPU表的`category_path`解析多级类目(`category1_name`, `category2_name`, `category3_name`)
@@ -483,7 +483,7 @@ laptop AND (gaming OR professional) ANDNOT cheap
- 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系
- 使用ES的`nested`查询实现
- **text_recall**: 文本相关性召回
- - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`)
+ - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`)
- 使用 `multi_match` 查询,支持字段 boost
- 中文字段使用中文分词器,英文字段使用英文分析器
- **embedding_recall**: 向量召回(KNN)
@@ -503,8 +503,8 @@ laptop AND (gaming OR professional) ANDNOT cheap
"multi_match": {
"query": "手机",
"fields": [
- "title_zh^3.0", "title_en^3.0",
- "brief_zh^1.5", "brief_en^1.5",
+ "title.zh^3.0", "title.en^3.0",
+ "brief.zh^1.5", "brief.en^1.5",
...
]
}
diff --git a/docs/系统设计文档v1.md b/docs/系统设计文档v1.md
index 63dfd61..8b32944 100644
--- a/docs/系统设计文档v1.md
+++ b/docs/系统设计文档v1.md
@@ -424,7 +424,7 @@ laptop AND (gaming OR professional) ANDNOT cheap
- **结构**: `filters AND (text_recall OR embedding_recall)`
- **filters**: 前端传递的过滤条件(永远起作用,放在 `filter` 中)
- **text_recall**: 文本相关性召回
- - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`)
+ - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`)
- 使用 `multi_match` 查询,支持字段 boost
- **embedding_recall**: 向量召回(KNN)
- 使用 `title_embedding` 字段进行 KNN 搜索
@@ -443,8 +443,8 @@ laptop AND (gaming OR professional) ANDNOT cheap
"multi_match": {
"query": "手机",
"fields": [
- "title_zh^3.0", "title_en^3.0",
- "brief_zh^1.5", "brief_en^1.5",
+ "title.zh^3.0", "title.en^3.0",
+ "brief.zh^1.5", "brief.en^1.5",
...
]
}
diff --git a/docs/索引字段说明v1.md b/docs/索引字段说明v1.md
index 219bf41..c24cc61 100644
--- a/docs/索引字段说明v1.md
+++ b/docs/索引字段说明v1.md
@@ -539,7 +539,7 @@ title brief description seo_title seo_description seo_keywords vendor vendor_key
2. tenant - 数据灌入:
对每个tenant设置一一个语言,作为tenant的一个基本配置。
-写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title_en)
+写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title.en)
查询的时候,将query转为商家所用语言,并到对应的field去查。
diff --git a/docs/索引字段说明v2-mapping结构.md b/docs/索引字段说明v2-mapping结构.md
index 5698bbe..1c5598e 100644
--- a/docs/索引字段说明v2-mapping结构.md
+++ b/docs/索引字段说明v2-mapping结构.md
@@ -16,22 +16,22 @@
},
// 文本相关性相关字段
- "title_zh": {
+ "title.zh": {
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "brief_zh": {
+ "brief.zh": {
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "description_zh": {
+ "description.zh": {
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "vendor_zh": {
+ "vendor.zh": {
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard",
@@ -43,23 +43,23 @@
}
},
- "title_en": {
+ "title.en": {
"type": "text",
"analyzer": "english",
"search_analyzer": "english",
},
- "brief_en": {
+ "brief.en": {
"type": "text",
"analyzer": "english",
"search_analyzer": "english",
},
- "description_en": {
+ "description.en": {
"type": "text",
"analyzer": "english",
"search_analyzer": "english",
},
- "vendor_en": {
+ "vendor.en": {
"type": "text",
"analyzer": "english",
"search_analyzer": "english",
@@ -103,22 +103,22 @@
},
// 分类相关
- "category_path_zh": { // 提供模糊查询功能,辅助相关性计算
+ "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "category_path_en": { // 提供模糊查询功能,辅助相关性计算
+ "category_path.en": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
},
- "category_name_zh": { // 提供模糊查询功能,辅助相关性计算
+ "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "category_name_en": { // 提供模糊查询功能,辅助相关性计算
+ "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
diff --git a/docs/索引字段说明v2-plan.md b/docs/索引字段说明v2-plan.md
index a50c428..5cdbfa1 100644
--- a/docs/索引字段说明v2-plan.md
+++ b/docs/索引字段说明v2-plan.md
@@ -10,7 +10,7 @@
#### 1.1 多语言文本字段
-- 为文本字段添加中英文双字段支持(title_zh/title_en, brief_zh/brief_en, description_zh/description_en, vendor_zh/vendor_en)
+- 为文本字段添加中英文双字段支持(title.zh/title.en, brief.zh/brief.en, description.zh/description.en, vendor.zh/vendor.en)
- 中文字段使用 `index_ansj`/`query_ansj` 分析器(对应文档中的hanlp_index/hanlp_standard)
- 英文字段使用 `english` 分析器
- **暂时只填充中文字段,英文字段设为空**(不需要语言检测,每个tenant的语言预先知道)
@@ -28,22 +28,22 @@ category_path varchar(500)
mapping:
- "category_path_zh": { // 提供模糊查询功能,辅助相关性计算
+ "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "category_path_en": { // 提供模糊查询功能,辅助相关性计算
+ "category_path.en": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
},
- "category_name_zh": { // 提供模糊查询功能,辅助相关性计算
+ "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "category_name_en": { // 提供模糊查询功能,辅助相关性计算
+ "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
@@ -104,15 +104,15 @@ mapping:
#### 2.1 多语言文本处理
-- **简化处理**:暂时只填充中文字段(title_zh, brief_zh, description_zh, vendor_zh)
-- 英文字段(title_en, brief_en, description_en, vendor_en)设为空或None
+- **简化处理**:暂时只填充中文字段(title.zh, brief.zh, description.zh, vendor.zh)
+- 英文字段(title.en, brief.en, description.en, vendor.en)设为空或None
- 不需要语言检测逻辑
#### 2.2 分类路径解析
- 从 `category_path` 字段按 "/" 分割提取分类层级
- 分割结果赋值给 `category1_name`, `category2_name`, `category3_name`
-- 生成 `category_path_zh`(暂时填充,`category_path_en` 设为空)
+- 生成 `category_path.zh`(暂时填充,`category_path.en` 设为空)
#### 2.3 SKU字段展开计算
@@ -152,7 +152,7 @@ mapping:
**文件**: `indexer/spu_transformer.py`
-- **简化多语言处理**:只填充中文字段(title_zh, brief_zh等),英文字段设为空或None
+- **简化多语言处理**:只填充中文字段(title.zh, brief.zh等),英文字段设为空或None
- 实现分类路径的解析和展开
- 实现SKU字段的展开计算(价格、重量、库存)
- 实现选项字段的处理
diff --git a/docs/索引字段说明v2.md b/docs/索引字段说明v2.md
index 40900ec..6da30f4 100644
--- a/docs/索引字段说明v2.md
+++ b/docs/索引字段说明v2.md
@@ -28,24 +28,24 @@
| 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
|--------|--------|--------|------|----------|
-| `title_zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` |
-| `title_en` | text | english | 英文标题 | 暂为空(待翻译服务填充) |
+| `title.zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` |
+| `title.en` | text | english | 英文标题 | 暂为空(待翻译服务填充) |
#### 2.2 描述字段
| 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
|--------|--------|--------|------|----------|
-| `brief_zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` |
-| `brief_en` | text | english | 英文短描述 | 暂为空 |
-| `description_zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` |
-| `description_en` | text | english | 英文详细描述 | 暂为空 |
+| `brief.zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` |
+| `brief.en` | text | english | 英文短描述 | 暂为空 |
+| `description.zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` |
+| `description.en` | text | english | 英文详细描述 | 暂为空 |
#### 2.3 供应商/品牌字段
| 字段名 | ES类型 | 分析器 | 子字段 | 说明 | 数据来源 |
|--------|--------|--------|--------|------|----------|
-| `vendor_zh` | text | hanlp_index / hanlp_standard | `vendor_zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` |
-| `vendor_en` | text | english | `vendor_en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 |
+| `vendor.zh` | text | hanlp_index / hanlp_standard | `vendor.zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` |
+| `vendor.en` | text | english | `vendor.en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 |
**用途**:
- `text` 类型:用于全文搜索(支持模糊匹配)
@@ -65,15 +65,15 @@
| 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
|--------|--------|--------|------|----------|
-| `category_path_zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` |
-| `category_path_en` | text | english | 英文类目路径 | 暂为空 |
+| `category_path.zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` |
+| `category_path.en` | text | english | 英文类目路径 | 暂为空 |
#### 4.2 类目名称(用于搜索)
| 字段名 | ES类型 | 分析器 | 说明 | 数据来源 |
|--------|--------|--------|------|----------|
-| `category_name_zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` |
-| `category_name_en` | text | english | 英文类目名称 | 暂为空 |
+| `category_name_text.zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` |
+| `category_name_text.en` | text | english | 英文类目名称 | 暂为空 |
#### 4.3 类目标识(用于过滤和分面)
@@ -87,7 +87,7 @@
| `category3_name` | keyword | 三级类目名称 | 从 `category_path` 解析 |
**用途**:
-- `category_path_zh/en`, `category_name_zh/en`: 用于全文搜索,支持模糊匹配
+- `category_path.zh/en`, `category_name_text.zh/en`: 用于全文搜索,支持模糊匹配
- `category_id`, `category_name`, `category_level`, `category1/2/3_name`: 用于精确过滤和分面聚合
### 5. 规格字段(Specifications)
@@ -348,20 +348,20 @@
### 搜索字段(参与相关性计算)
-- `title_zh`, `title_en` (boost: 3.0)
-- `brief_zh`, `brief_en` (boost: 1.5)
-- `description_zh`, `description_en` (boost: 1.0)
-- `vendor_zh`, `vendor_en` (boost: 1.5)
+- `title.zh`, `title.en` (boost: 3.0)
+- `brief.zh`, `brief.en` (boost: 1.5)
+- `description.zh`, `description.en` (boost: 1.0)
+- `vendor.zh`, `vendor.en` (boost: 1.5)
- `tags` (boost: 1.0)
-- `category_path_zh`, `category_path_en` (boost: 1.5)
-- `category_name_zh`, `category_name_en` (boost: 1.5)
+- `category_path.zh`, `category_path.en` (boost: 1.5)
+- `category_name_text.zh`, `category_name_text.en` (boost: 1.5)
- `title_embedding` (向量召回,boost: 0.2)
### 过滤字段(精确匹配)
- `tenant_id` (必需,多租户隔离)
- `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name`
-- `vendor_zh.keyword`, `vendor_en.keyword`
+- `vendor.zh.keyword`, `vendor.en.keyword`
- `specifications` (嵌套查询)
- `min_price`, `max_price` (范围过滤)
- `sales` (范围过滤)
@@ -395,12 +395,12 @@
- `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
映射到前端字段:
-- `title_zh/en` → `title`
-- `brief_zh/en` → `brief`
-- `description_zh/en` → `description`
-- `vendor_zh/en` → `vendor`
-- `category_path_zh/en` → `category_path`
-- `category_name_zh/en` → `category_name`
+- `title.zh/en` → `title`
+- `brief.zh/en` → `brief`
+- `description.zh/en` → `description`
+- `vendor.zh/en` → `vendor`
+- `category_path.zh/en` → `category_path`
+- `category_name_text.zh/en` → `category_name`
### 规格数据构建
@@ -434,12 +434,12 @@ filters AND (text_recall OR embedding_recall)
### 文本召回字段
根据查询词的语言选择对应的索引字段:
-- `title_zh^3.0`, `title_en^3.0`
-- `brief_zh^1.5`, `brief_en^1.5`
-- `description_zh^1.0`, `description_en^1.0`
-- `vendor_zh^1.5`, `vendor_en^1.5`
-- `category_path_zh^1.5`, `category_path_en^1.5`
-- `category_name_zh^1.5`, `category_name_en^1.5`
+- `title.zh^3.0`, `title.en^3.0`
+- `brief.zh^1.5`, `brief.en^1.5`
+- `description.zh^1.0`, `description.en^1.0`
+- `vendor.zh^1.5`, `vendor.en^1.5`
+- `category_path.zh^1.5`, `category_path.en^1.5`
+- `category_name_text.zh^1.5`, `category_name_text.en^1.5`
- `tags^1.0`
## 注意事项
diff --git a/docs/索引数据接口文档___old.md b/docs/索引数据接口文档___old.md
index 8e968b6..22a7d5f 100644
--- a/docs/索引数据接口文档___old.md
+++ b/docs/索引数据接口文档___old.md
@@ -52,12 +52,12 @@ tenant_config:
### 配置规则
1. **主语言**:指定SKU表中 `title`、`brief`、`description`、`vendor` 等字段的语言。
- - 如果主语言是 `zh`,这些字段的值会填充到 `title_zh`、`brief_zh` 等字段
- - 如果主语言是 `en`,这些字段的值会填充到 `title_en`、`brief_en` 等字段
+ - 如果主语言是 `zh`,这些字段的值会填充到 `title.zh`、`brief.zh` 等字段
+ - 如果主语言是 `en`,这些字段的值会填充到 `title.en`、`brief.en` 等字段
2. **翻译配置**:
- - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title_en` 等字段
- - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title_zh` 等字段
+ - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title.en` 等字段
+ - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title.zh` 等字段
- **注意**:如果主语言本身就是目标语言,则不会触发翻译(例如主语言是英文,`translate_to_en: true` 不会触发翻译)
3. **默认配置**:如果租户ID不在 `tenants` 中,则使用 `default` 配置。
@@ -72,8 +72,8 @@ tenant_config:
"translate_to_zh": false
}
```
-- SKU表的 `title` 字段(中文)→ `title_zh`
-- 翻译服务将中文翻译为英文 → `title_en`
+- SKU表的 `title` 字段(中文)→ `title.zh`
+- 翻译服务将中文翻译为英文 → `title.en`
**示例2:英文主语言,需要翻译中文**
```json
@@ -83,8 +83,8 @@ tenant_config:
"translate_to_zh": true
}
```
-- SKU表的 `title` 字段(英文)→ `title_en`
-- 翻译服务将英文翻译为中文 → `title_zh`
+- SKU表的 `title` 字段(英文)→ `title.en`
+- 翻译服务将英文翻译为中文 → `title.zh`
**示例3:仅使用主语言,不翻译**
```json
@@ -94,8 +94,8 @@ tenant_config:
"translate_to_zh": false
}
```
-- SKU表的 `title` 字段(中文)→ `title_zh`
-- `title_en` 保持为 `null`
+- SKU表的 `title` 字段(中文)→ `title.zh`
+- `title.en` 保持为 `null`
### 配置更新
@@ -272,19 +272,19 @@ String json = response.body().string();
{
"tenant_id": "1",
"spu_id": "123",
- "title_zh": "商品标题",
- "title_en": null,
- "brief_zh": "商品简介",
- "brief_en": null,
- "description_zh": "商品详细描述",
- "description_en": null,
- "vendor_zh": "供应商名称",
- "vendor_en": null,
+ "title.zh": "商品标题",
+ "title.en": null,
+ "brief.zh": "商品简介",
+ "brief.en": null,
+ "description.zh": "商品详细描述",
+ "description.en": null,
+ "vendor.zh": "供应商名称",
+ "vendor.en": null,
"tags": ["标签1", "标签2"],
- "category_path_zh": "类目1/类目2/类目3",
- "category_path_en": null,
- "category_name_zh": "类目名称",
- "category_name_en": null,
+ "category_path.zh": "类目1/类目2/类目3",
+ "category_path.en": null,
+ "category_name_text.zh": "类目名称",
+ "category_name_text.en": null,
"category_id": "100",
"category_name": "类目名称",
"category_level": 3,
@@ -453,11 +453,11 @@ for (String spuId : changedSpuIds) {
|---------|--------|------|------|
| 基础标识 | `tenant_id` | keyword | 租户ID |
| 基础标识 | `spu_id` | keyword | SPU ID |
-| 文本字段 | `title_zh`, `title_en` | text | 标题(中英文) |
-| 文本字段 | `brief_zh`, `brief_en` | text | 简介(中英文) |
-| 文本字段 | `description_zh`, `description_en` | text | 描述(中英文) |
-| 文本字段 | `vendor_zh`, `vendor_en` | text | 供应商(中英文) |
-| 类目字段 | `category_path_zh`, `category_path_en` | text | 类目路径(中英文) |
+| 文本字段 | `title.zh`, `title.en` | text | 标题(中英文) |
+| 文本字段 | `brief.zh`, `brief.en` | text | 简介(中英文) |
+| 文本字段 | `description.zh`, `description.en` | text | 描述(中英文) |
+| 文本字段 | `vendor.zh`, `vendor.en` | text | 供应商(中英文) |
+| 类目字段 | `category_path.zh`, `category_path.en` | text | 类目路径(中英文) |
| 类目字段 | `category1_name`, `category2_name`, `category3_name` | keyword | 分层类目名称 |
| 价格字段 | `min_price`, `max_price` | float | 价格范围 |
| 库存字段 | `total_inventory` | long | 总库存 |
diff --git a/docs/索引方案.md b/docs/索引方案.md
index a1c5f85..4e3b546 100644
--- a/docs/索引方案.md
+++ b/docs/索引方案.md
@@ -178,12 +178,12 @@ category_path varchar(500)
方案:采用方案2
4. categoryPath索引 + Prefix 查询(categoryPath.keyword: "服装/男装")(如果满足条件的key太多的则性能较差,比如 查询的是一级类目,类目树叶子节点太多时性能较差)
5. categoryPath支撑模糊查询 和 多级cate keyword索引支撑精确查询。 索引阶段冗余,查询性能高。
- "category_path_zh": { // 提供模糊查询功能,辅助相关性计算
+ "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "hanlp_index",
"search_analyzer": "hanlp_standard"
},
- "category_path_en": { // 提供模糊查询功能,辅助相关性计算
+ "category_path.en": { // 提供模糊查询功能,辅助相关性计算
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
diff --git a/docs/翻译功能测试说明.md b/docs/翻译功能测试说明.md
index a524c8c..76bb4a9 100644
--- a/docs/翻译功能测试说明.md
+++ b/docs/翻译功能测试说明.md
@@ -22,8 +22,8 @@
```yaml
translation_prompts:
# 商品标题翻译提示词
- product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
- product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
+ product_title.zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
+ product_title.en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
# query翻译提示词
query_zh: "电商领域"
query_en: "e-commerce domain"
@@ -35,8 +35,8 @@ translation_prompts:
### 提示词使用规则
1. **商品标题翻译**:
- - 中文→英文:使用 `product_title_en`
- - 英文→中文:使用 `product_title_zh`
+ - 中文→英文:使用 `product_title.en`
+ - 英文→中文:使用 `product_title.zh`
2. **其他字段翻译**(brief, description, vendor):
- 根据目标语言选择 `default_zh` 或 `default_en`
@@ -72,7 +72,7 @@ translator = Translator(
# 测试商品标题翻译
text = "蓝牙耳机"
-prompt = config.query_config.translation_prompts.get('product_title_en')
+prompt = config.query_config.translation_prompts.get('product_title.en')
result = translator.translate(
text,
target_lang='en',
@@ -140,8 +140,8 @@ doc = transformer.transform_spu_to_doc(
options=pd.DataFrame()
)
-print(f"title_zh: {doc.get('title_zh')}")
-print(f"title_en: {doc.get('title_en')}") # 应该包含翻译结果
+print(f"title.zh: {doc.get('title.zh')}")
+print(f"title.en: {doc.get('title.en')}") # 应该包含翻译结果
```
### 5. 测试缓存功能
diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py
index 69a4a92..a7a87b6 100644
--- a/indexer/document_transformer.py
+++ b/indexer/document_transformer.py
@@ -184,17 +184,40 @@ class SPUDocumentTransformer:
translate_to_en = bool(self.tenant_config.get('translate_to_en'))
translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))
+ def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None):
+ """
+ Write multilingual text field as an object, e.g.:
+ doc[field_name] = {"zh": "...", "en": "..."}
+ Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh.
+ """
+ if not source_text or not str(source_text).strip():
+ return
+
+ obj: Dict[str, str] = {}
+ src = str(source_text)
+ obj[primary_lang] = src
+
+ tr = translations or {}
+ if translate_to_en and primary_lang != "en":
+ en_text = tr.get("en")
+ if en_text and str(en_text).strip():
+ obj["en"] = str(en_text)
+ if translate_to_zh and primary_lang != "zh":
+ zh_text = tr.get("zh")
+ if zh_text and str(zh_text).strip():
+ obj["zh"] = str(zh_text)
+
+ if obj:
+ doc[field_name] = obj
+
# Title
if pd.notna(spu_row.get('title')):
title_text = str(spu_row['title'])
-
- # 使用translator的translate_for_indexing方法,自动处理多语言翻译
+
+ translations: Dict[str, str] = {}
if self.translator:
- # 根据目标语言选择对应的提示词
prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh')
prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en')
-
- # 调用translate_for_indexing,自动处理翻译逻辑
translations = self.translator.translate_for_indexing(
title_text,
shop_language=primary_lang,
@@ -202,26 +225,14 @@ class SPUDocumentTransformer:
prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
translate_to_en=translate_to_en,
translate_to_zh=translate_to_zh,
- )
-
- # 填充翻译结果
- doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None)
- doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None)
- else:
- # 无翻译器,只填充主语言字段
- if primary_lang == 'zh':
- doc['title_zh'] = title_text
- doc['title_en'] = None
- else:
- doc['title_zh'] = None
- doc['title_en'] = title_text
- else:
- doc['title_zh'] = None
- doc['title_en'] = None
+ ) or {}
+
+ _set_lang_obj("title", title_text, translations)
# Brief
if pd.notna(spu_row.get('brief')):
brief_text = str(spu_row['brief'])
+ translations: Dict[str, str] = {}
if self.translator:
prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
translations = self.translator.translate_for_indexing(
@@ -231,23 +242,13 @@ class SPUDocumentTransformer:
prompt=prompt,
translate_to_en=translate_to_en,
translate_to_zh=translate_to_zh,
- )
- doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None)
- doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None)
- else:
- if primary_lang == 'zh':
- doc['brief_zh'] = brief_text
- doc['brief_en'] = None
- else:
- doc['brief_zh'] = None
- doc['brief_en'] = brief_text
- else:
- doc['brief_zh'] = None
- doc['brief_en'] = None
+ ) or {}
+ _set_lang_obj("brief", brief_text, translations)
# Description
if pd.notna(spu_row.get('description')):
desc_text = str(spu_row['description'])
+ translations: Dict[str, str] = {}
if self.translator:
prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
translations = self.translator.translate_for_indexing(
@@ -257,23 +258,13 @@ class SPUDocumentTransformer:
prompt=prompt,
translate_to_en=translate_to_en,
translate_to_zh=translate_to_zh,
- )
- doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None)
- doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None)
- else:
- if primary_lang == 'zh':
- doc['description_zh'] = desc_text
- doc['description_en'] = None
- else:
- doc['description_zh'] = None
- doc['description_en'] = desc_text
- else:
- doc['description_zh'] = None
- doc['description_en'] = None
+ ) or {}
+ _set_lang_obj("description", desc_text, translations)
# Vendor
if pd.notna(spu_row.get('vendor')):
vendor_text = str(spu_row['vendor'])
+ translations: Dict[str, str] = {}
if self.translator:
prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
translations = self.translator.translate_for_indexing(
@@ -283,19 +274,8 @@ class SPUDocumentTransformer:
prompt=prompt,
translate_to_en=translate_to_en,
translate_to_zh=translate_to_zh,
- )
- doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None)
- doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None)
- else:
- if primary_lang == 'zh':
- doc['vendor_zh'] = vendor_text
- doc['vendor_en'] = None
- else:
- doc['vendor_zh'] = None
- doc['vendor_en'] = vendor_text
- else:
- doc['vendor_zh'] = None
- doc['vendor_en'] = None
+ ) or {}
+ _set_lang_obj("vendor", vendor_text, translations)
def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series):
"""填充类目相关字段。"""
@@ -303,6 +283,8 @@ class SPUDocumentTransformer:
# - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目)
# - 仅记录错误日志,不阻塞索引流程
+ primary_lang = self.tenant_config.get('primary_language', 'zh')
+
if pd.notna(spu_row.get('category_path')):
category_path = str(spu_row['category_path'])
@@ -329,8 +311,7 @@ class SPUDocumentTransformer:
# 构建类目路径字符串(用于搜索)
if category_names:
category_path_str = '/'.join(category_names)
- doc['category_path_zh'] = category_path_str
- doc['category_path_en'] = None # 暂时设为空
+ doc['category_path'] = {primary_lang: category_path_str}
# 填充分层类目名称
if len(category_names) > 0:
@@ -342,8 +323,7 @@ class SPUDocumentTransformer:
elif pd.notna(spu_row.get('category')):
# 如果category_path为空,使用category字段作为category1_name的备选
category = str(spu_row['category'])
- doc['category_name_zh'] = category
- doc['category_name_en'] = None
+ doc['category_name_text'] = {primary_lang: category}
doc['category_name'] = category
# 尝试从category字段解析多级分类
@@ -362,10 +342,8 @@ class SPUDocumentTransformer:
if pd.notna(spu_row.get('category')):
# 确保category相关字段都被设置(如果前面没有设置)
category_name = str(spu_row['category'])
- if 'category_name_zh' not in doc:
- doc['category_name_zh'] = category_name
- if 'category_name_en' not in doc:
- doc['category_name_en'] = None
+ if 'category_name_text' not in doc:
+ doc['category_name_text'] = {primary_lang: category_name}
if 'category_name' not in doc:
doc['category_name'] = category_name
@@ -587,13 +565,22 @@ class SPUDocumentTransformer:
"""
填充标题向量化字段。
- 使用英文标题(title_en)生成embedding。如果title_en不存在,则使用title_zh。
+ 使用英文标题(title.en)生成embedding。如果title.en不存在,则使用title.zh。
Args:
doc: ES文档字典
"""
- # 优先使用英文标题,如果没有则使用中文标题
- title_text = doc.get('title_en') or doc.get('title_zh')
+ # 优先使用英文标题,如果没有则使用中文标题;再没有则取任意可用语言
+ title_obj = doc.get("title") or {}
+ if isinstance(title_obj, dict):
+ title_text = title_obj.get("en") or title_obj.get("zh")
+ if not title_text:
+ for v in title_obj.values():
+ if v and str(v).strip():
+ title_text = str(v)
+ break
+ else:
+ title_text = None
if not title_text or not title_text.strip():
logger.debug(f"No title text available for embedding, SPU: {doc.get('spu_id')}")
diff --git a/indexer/incremental_service.py b/indexer/incremental_service.py
index 1170602..cd9f49b 100644
--- a/indexer/incremental_service.py
+++ b/indexer/incremental_service.py
@@ -130,7 +130,15 @@ class IncrementalIndexerService:
# 单条场景下也可补齐 embedding(仍走缓存)
if enable_embedding and encoder:
- title_text = doc.get("title_en") or doc.get("title_zh")
+ title_obj = doc.get("title") or {}
+ title_text = None
+ if isinstance(title_obj, dict):
+ title_text = title_obj.get("en") or title_obj.get("zh")
+ if not title_text:
+ for v in title_obj.values():
+ if v and str(v).strip():
+ title_text = str(v)
+ break
if title_text and str(title_text).strip():
try:
embeddings = encoder.encode(title_text)
@@ -560,7 +568,15 @@ class IncrementalIndexerService:
title_texts: List[str] = []
title_doc_indices: List[int] = []
for i, (_, doc) in enumerate(documents):
- title_text = doc.get("title_en") or doc.get("title_zh")
+ title_obj = doc.get("title") or {}
+ title_text = None
+ if isinstance(title_obj, dict):
+ title_text = title_obj.get("en") or title_obj.get("zh")
+ if not title_text:
+ for v in title_obj.values():
+ if v and str(v).strip():
+ title_text = str(v)
+ break
if title_text and str(title_text).strip():
title_texts.append(str(title_text))
title_doc_indices.append(i)
diff --git a/indexer/test_indexing.py b/indexer/test_indexing.py
index c018054..58330d7 100755
--- a/indexer/test_indexing.py
+++ b/indexer/test_indexing.py
@@ -114,16 +114,17 @@ def test_full_indexing(tenant_id: str = "162"):
print(f"\n文档 {i+1}:")
print(f" SPU ID: {doc.get('spu_id')}")
print(f" Tenant ID: {doc.get('tenant_id')}")
- print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}")
- print(f" 标题 (英文): {doc.get('title_en', 'N/A')}")
+ title_obj = doc.get("title") or {}
+ print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
+ print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
# 检查租户162的翻译状态
if tenant_id == "162":
- # 租户162翻译应该关闭,title_en应该为None
- if doc.get('title_en') is None:
- print(f" ✓ 翻译已关闭(title_en为None)")
+ # 租户162翻译应该关闭:只写入主语言,不应出现 title.en
+ if isinstance(title_obj, dict) and title_obj.get("en") is None:
+ print(f" ✓ 翻译已关闭(title.en为空)")
else:
- print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}")
+ print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
return True
@@ -192,17 +193,18 @@ def test_incremental_indexing(tenant_id: str = "162"):
print(f"✓ SPU文档获取成功")
print(f" SPU ID: {doc.get('spu_id')}")
print(f" Tenant ID: {doc.get('tenant_id')}")
- print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}")
- print(f" 标题 (英文): {doc.get('title_en', 'N/A')}")
+ title_obj = doc.get("title") or {}
+ print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
+ print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
print(f" SKU数量: {len(doc.get('skus', []))}")
print(f" 规格数量: {len(doc.get('specifications', []))}")
# 检查租户162的翻译状态
if tenant_id == "162":
- if doc.get('title_en') is None:
- print(f" ✓ 翻译已关闭(title_en为None)")
+ if isinstance(title_obj, dict) and title_obj.get("en") is None:
+ print(f" ✓ 翻译已关闭(title.en为空)")
else:
- print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}")
+ print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
return True
@@ -291,12 +293,13 @@ def test_document_transformer():
if doc:
print(f"✓ 文档转换成功")
- print(f" title_zh: {doc.get('title_zh')}")
- print(f" title_en: {doc.get('title_en')}")
+ title_obj = doc.get("title") or {}
+ print(f" title.zh: {title_obj.get('zh') if isinstance(title_obj, dict) else None}")
+ print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
print(f" SKU数量: {len(doc.get('skus', []))}")
# 验证租户162翻译关闭
- if doc.get('title_en') is None:
+ if isinstance(title_obj, dict) and title_obj.get("en") is None:
print(f" ✓ 翻译已关闭(符合租户162配置)")
else:
print(f" ⚠ 警告:翻译应该关闭")
diff --git a/mappings/search_products.json b/mappings/search_products.json
index 9d0a5a8..acda2df 100644
--- a/mappings/search_products.json
+++ b/mappings/search_products.json
@@ -4,22 +4,12 @@
"number_of_replicas": 0,
"refresh_interval": "30s",
"analysis": {
- "analyzer": {
- "hanlp_index": {
- "type": "custom",
- "tokenizer": "standard",
- "filter": ["lowercase", "asciifolding"]
- },
- "hanlp_standard": {
- "type": "custom",
- "tokenizer": "standard",
- "filter": ["lowercase", "asciifolding"]
- }
- },
"normalizer": {
"lowercase": {
"type": "custom",
- "filter": ["lowercase"]
+ "filter": [
+ "lowercase"
+ ]
}
}
},
@@ -45,75 +35,963 @@
"update_time": {
"type": "date"
},
- "title_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
- },
- "qanchors_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
- },
- "keywords_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
- },
- "brief_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
- },
- "description_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
- },
- "vendor_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard",
- "fields": {
- "keyword": {
- "type": "keyword",
- "normalizer": "lowercase"
+ "title": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
}
}
},
- "title_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
- },
- "qanchors_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
- },
- "keywords_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
- },
- "brief_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
- },
- "description_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
- },
- "vendor_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english",
- "fields": {
- "keyword": {
- "type": "keyword",
- "normalizer": "lowercase"
+ "qanchors": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
+ }
+ }
+ },
+ "keywords": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
+ }
+ }
+ },
+ "brief": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
+ }
+ }
+ },
+ "description": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
+ }
+ }
+ },
+ "vendor": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai",
+ "fields": {
+ "keyword": {
+ "type": "keyword",
+ "normalizer": "lowercase"
+ }
+ }
}
}
},
@@ -158,25 +1036,263 @@
}
}
},
- "category_path_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
- },
- "category_path_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
- },
- "category_name_zh": {
- "type": "text",
- "analyzer": "hanlp_index",
- "search_analyzer": "hanlp_standard"
+ "category_path": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
+ }
+ }
},
- "category_name_en": {
- "type": "text",
- "analyzer": "english",
- "search_analyzer": "english"
+ "category_name_text": {
+ "type": "object",
+ "properties": {
+ "ar": {
+ "type": "text",
+ "analyzer": "arabic"
+ },
+ "hy": {
+ "type": "text",
+ "analyzer": "armenian"
+ },
+ "eu": {
+ "type": "text",
+ "analyzer": "basque"
+ },
+ "pt_br": {
+ "type": "text",
+ "analyzer": "brazilian"
+ },
+ "bg": {
+ "type": "text",
+ "analyzer": "bulgarian"
+ },
+ "ca": {
+ "type": "text",
+ "analyzer": "catalan"
+ },
+ "zh": {
+ "type": "text",
+ "analyzer": "icu_analyzer"
+ },
+ "chinese": {
+ "type": "text",
+ "analyzer": "chinese"
+ },
+ "cjk": {
+ "type": "text",
+ "analyzer": "cjk"
+ },
+ "cs": {
+ "type": "text",
+ "analyzer": "czech"
+ },
+ "da": {
+ "type": "text",
+ "analyzer": "danish"
+ },
+ "nl": {
+ "type": "text",
+ "analyzer": "dutch"
+ },
+ "en": {
+ "type": "text",
+ "analyzer": "english"
+ },
+ "fi": {
+ "type": "text",
+ "analyzer": "finnish"
+ },
+ "fr": {
+ "type": "text",
+ "analyzer": "french"
+ },
+ "gl": {
+ "type": "text",
+ "analyzer": "galician"
+ },
+ "de": {
+ "type": "text",
+ "analyzer": "german"
+ },
+ "el": {
+ "type": "text",
+ "analyzer": "greek"
+ },
+ "hi": {
+ "type": "text",
+ "analyzer": "hindi"
+ },
+ "hu": {
+ "type": "text",
+ "analyzer": "hungarian"
+ },
+ "id": {
+ "type": "text",
+ "analyzer": "indonesian"
+ },
+ "it": {
+ "type": "text",
+ "analyzer": "italian"
+ },
+ "no": {
+ "type": "text",
+ "analyzer": "norwegian"
+ },
+ "fa": {
+ "type": "text",
+ "analyzer": "persian"
+ },
+ "pt": {
+ "type": "text",
+ "analyzer": "portuguese"
+ },
+ "ro": {
+ "type": "text",
+ "analyzer": "romanian"
+ },
+ "ru": {
+ "type": "text",
+ "analyzer": "russian"
+ },
+ "es": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "sv": {
+ "type": "text",
+ "analyzer": "swedish"
+ },
+ "tr": {
+ "type": "text",
+ "analyzer": "turkish"
+ },
+ "th": {
+ "type": "text",
+ "analyzer": "thai"
+ }
+ }
},
"category_id": {
"type": "keyword"
@@ -294,4 +1410,3 @@
}
}
}
-
diff --git a/scripts/check_es_data.py b/scripts/check_es_data.py
index ef9c735..bc3e753 100755
--- a/scripts/check_es_data.py
+++ b/scripts/check_es_data.py
@@ -28,12 +28,12 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
"size": size,
"_source": [
"spu_id",
- "title_zh",
+ "title",
"category1_name",
"category2_name",
"category3_name",
"category_name",
- "category_path_zh",
+ "category_path",
"specifications",
"option1_name",
"option2_name",
@@ -51,14 +51,16 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
for i, hit in enumerate(hits, 1):
source = hit.get('_source', {})
+ title_obj = source.get("title") or {}
+ category_path_obj = source.get("category_path") or {}
print(f"文档 {i}:")
print(f" spu_id: {source.get('spu_id')}")
- print(f" title_zh: {source.get('title_zh', '')[:50]}")
+ print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}")
print(f" category1_name: {source.get('category1_name')}")
print(f" category2_name: {source.get('category2_name')}")
print(f" category3_name: {source.get('category3_name')}")
print(f" category_name: {source.get('category_name')}")
- print(f" category_path_zh: {source.get('category_path_zh')}")
+ print(f" category_path.zh: {category_path_obj.get('zh') if isinstance(category_path_obj, dict) else None}")
print(f" option1_name: {source.get('option1_name')}")
print(f" option2_name: {source.get('option2_name')}")
print(f" option3_name: {source.get('option3_name')}")
diff --git a/scripts/check_index_mapping.py b/scripts/check_index_mapping.py
index 3640633..c7b06bd 100644
--- a/scripts/check_index_mapping.py
+++ b/scripts/check_index_mapping.py
@@ -20,11 +20,19 @@ def check_field_mapping(mapping_dict, field_path):
current = mapping_dict
for part in parts:
- if isinstance(current, dict):
- current = current.get(part)
- if current is None:
- return None
- else:
+ if not isinstance(current, dict):
+ return None
+
+ # ES mapping nesting: object fields store subfields under "properties"
+ if "properties" in current and isinstance(current["properties"], dict):
+ current = current["properties"]
+
+ # multi-fields store subfields under "fields" (e.g. vendor.zh.keyword)
+ if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]:
+ current = current["fields"]
+
+ current = current.get(part)
+ if current is None:
return None
return current
@@ -70,12 +78,13 @@ def main():
# 检查关键字段
fields_to_check = [
- 'title_zh',
- 'brief_zh',
- 'description_zh',
- 'vendor_zh',
- 'category_path_zh',
- 'category_name_zh'
+ "title.zh",
+ "brief.zh",
+ "description.zh",
+ "vendor.zh",
+ "vendor.zh.keyword",
+ "category_path.zh",
+ "category_name_text.zh"
]
print("=" * 80)
@@ -83,7 +92,7 @@ def main():
print("=" * 80)
for field_name in fields_to_check:
- field_mapping = index_mapping.get(field_name)
+ field_mapping = check_field_mapping(index_mapping, field_name)
if field_mapping is None:
print(f"\n❌ {field_name}: 字段不存在")
diff --git a/search/es_query_builder.py b/search/es_query_builder.py
index 7b6bd77..eacc4fa 100644
--- a/search/es_query_builder.py
+++ b/search/es_query_builder.py
@@ -367,37 +367,37 @@ class ESQueryBuilder:
"""
if language == 'zh':
all_fields = [
- "title_zh^3.0",
- "brief_zh^1.5",
- "description_zh",
- "vendor_zh^1.5",
+ "title.zh^3.0",
+ "brief.zh^1.5",
+ "description.zh",
+ "vendor.zh^1.5",
"tags",
- "category_path_zh^1.5",
- "category_name_zh^1.5",
+ "category_path.zh^1.5",
+ "category_name_text.zh^1.5",
"option1_values^0.5"
]
core_fields = [
- "title_zh^3.0",
- "brief_zh^1.5",
- "vendor_zh^1.5",
- "category_name_zh^1.5"
+ "title.zh^3.0",
+ "brief.zh^1.5",
+ "vendor.zh^1.5",
+ "category_name_text.zh^1.5"
]
else: # en
all_fields = [
- "title_en^3.0",
- "brief_en^1.5",
- "description_en",
- "vendor_en^1.5",
+ "title.en^3.0",
+ "brief.en^1.5",
+ "description.en",
+ "vendor.en^1.5",
"tags",
- "category_path_en^1.5",
- "category_name_en^1.5",
+ "category_path.en^1.5",
+ "category_name_text.en^1.5",
"option1_values^0.5"
]
core_fields = [
- "title_en^3.0",
- "brief_en^1.5",
- "vendor_en^1.5",
- "category_name_en^1.5"
+ "title.en^3.0",
+ "brief.en^1.5",
+ "vendor.en^1.5",
+ "category_name_text.en^1.5"
]
return all_fields, core_fields
diff --git a/tests/conftest.py b/tests/conftest.py
index 573d53e..49373c6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,7 +28,7 @@ def sample_index_config() -> IndexConfig:
return IndexConfig(
name="default",
label="默认索引",
- fields=["title_zh", "brief_zh", "tags"],
+ fields=["title.zh", "brief.zh", "tags"],
boost=1.0
)
@@ -60,10 +60,10 @@ def sample_search_config(sample_index_config) -> SearchConfig:
es_index_name="test_products",
field_boosts={
"tenant_id": 1.0,
- "title_zh": 3.0,
- "brief_zh": 1.5,
+ "title.zh": 3.0,
+ "brief.zh": 1.5,
"tags": 1.0,
- "category_path_zh": 1.5,
+ "category_path.zh": 1.5,
},
indexes=[sample_index_config],
query_config=query_config,
@@ -89,8 +89,8 @@ def mock_es_client() -> Mock:
"_id": "1",
"_score": 2.5,
"_source": {
- "title_zh": "红色连衣裙",
- "vendor_zh": "测试品牌",
+ "title": {"zh": "红色连衣裙"},
+ "vendor": {"zh": "测试品牌"},
"min_price": 299.0,
"category_id": "1"
}
@@ -99,8 +99,8 @@ def mock_es_client() -> Mock:
"_id": "2",
"_score": 2.2,
"_source": {
- "title_zh": "蓝色连衣裙",
- "vendor_zh": "测试品牌",
+ "title": {"zh": "蓝色连衣裙"},
+ "vendor": {"zh": "测试品牌"},
"min_price": 399.0,
"category_id": "1"
}
@@ -142,8 +142,8 @@ def sample_search_results() -> Dict[str, Any]:
"query": "红色连衣裙",
"expected_total": 2,
"expected_products": [
- {"title_zh": "红色连衣裙", "min_price": 299.0},
- {"title_zh": "蓝色连衣裙", "min_price": 399.0}
+ {"title": "红色连衣裙", "min_price": 299.0},
+ {"title": "蓝色连衣裙", "min_price": 399.0}
]
}
@@ -157,16 +157,16 @@ def temp_config_file() -> Generator[str, None, None]:
config_data = {
"es_index_name": "test_products",
"field_boosts": {
- "title_zh": 3.0,
- "brief_zh": 1.5,
+ "title.zh": 3.0,
+ "brief.zh": 1.5,
"tags": 1.0,
- "category_path_zh": 1.5
+ "category_path.zh": 1.5
},
"indexes": [
{
"name": "default",
"label": "默认索引",
- "fields": ["title_zh", "brief_zh", "tags"],
+ "fields": ["title.zh", "brief.zh", "tags"],
"boost": 1.0
}
],
--
libgit2 0.21.2