Commit d7d48f5238c6c83665e359cda931dbc17dc748f8
1 parent
62b7972c
改动(mapping + 灌入结构)
mappings/search_products.json:把原来的 title_zh/title_en/brief_zh/... 改成 按语言 key 的对象结构( /products/_doc/1 { "title": {"en":...} } )
同时在这些字段下 预置了全部 analyzer 语言:
arabic, armenian, basque, brazilian, bulgarian, catalan, chinese, cjk, czech, danish, dutch, english, finnish, french, galician, german, greek, hindi, hungarian, indonesian, italian, norwegian, persian, portuguese, romanian, russian, spanish, swedish, turkish, thai
实现为 type: object + properties,同时满足“按语言灌入”和“按语言 analyzer”。
索引灌入(全量/增量/transformer)已同步改完
indexer/document_transformer.py:输出从 title_zh/title_en/... 改为:
title: {<primary_lang>: 原文, en?: 翻译, zh?: 翻译}
brief/description/vendor 同理
category_path/category_name_text 也改为语言对象(避免查询侧继续依赖旧字段)
indexer/incremental_service.py:embedding 取值从 title_en/title_zh 改为从 title 对象里优先取 en,否则取 zh,否则取任一可用语言。
查询侧与配置、API/文档已同步
search/es_query_builder.py:查询字段统一改成点路径:title.zh / title.en / vendor.zh / vendor.zh.keyword / category_name_text.zh 等。
config/config.yaml:field boosts / indexes 里的字段名同步为新点路径。
API & formatter:
api/result_formatter.py 已支持新结构(并保留对旧 *_zh/_en 的兼容兜底)。
api/models.py、相关 docs/examples 里的 vendor_zh.keyword 等已更新为 vendor.zh.keyword。
文档/脚本:docs/、README.md、scripts/ 里所有旧字段名引用已批量替换为新结构。
Showing
31 changed files
with
1673 additions
and
503 deletions
Show diff stats
CLAUDE.md
| @@ -256,12 +256,12 @@ The system uses centralized configuration through `config/config.yaml`: | @@ -256,12 +256,12 @@ The system uses centralized configuration through `config/config.yaml`: | ||
| 256 | { | 256 | { |
| 257 | "tenant_id": "keyword", // Multi-tenant isolation | 257 | "tenant_id": "keyword", // Multi-tenant isolation |
| 258 | "spu_id": "keyword", // Product identifier | 258 | "spu_id": "keyword", // Product identifier |
| 259 | - "title_zh/en": "text", // Multi-language titles | ||
| 260 | - "brief_zh/en": "text", // Short descriptions | ||
| 261 | - "description_zh/en": "text", // Detailed descriptions | ||
| 262 | - "vendor_zh/en": "text", // Supplier/brand with keyword subfield | ||
| 263 | - "category_path_zh/en": "text", // Hierarchical category paths | ||
| 264 | - "category_name_zh/en": "text", // Category names for search | 259 | + "title.zh/en": "text", // Multi-language titles |
| 260 | + "brief.zh/en": "text", // Short descriptions | ||
| 261 | + "description.zh/en": "text", // Detailed descriptions | ||
| 262 | + "vendor.zh/en": "text", // Supplier/brand with keyword subfield | ||
| 263 | + "category_path.zh/en": "text", // Hierarchical category paths | ||
| 264 | + "category_name_text.zh/en": "text", // Category names for search | ||
| 265 | "category1/2/3_name": "keyword", // Multi-level category filtering | 265 | "category1/2/3_name": "keyword", // Multi-level category filtering |
| 266 | "tags": "keyword", // Product tags | 266 | "tags": "keyword", // Product tags |
| 267 | "specifications": "nested", // Product variants (color, size, etc.) | 267 | "specifications": "nested", // Product variants (color, size, etc.) |
| @@ -318,11 +318,11 @@ The system uses centralized configuration through `config/config.yaml`: | @@ -318,11 +318,11 @@ The system uses centralized configuration through `config/config.yaml`: | ||
| 318 | **Field Boost Configuration**: | 318 | **Field Boost Configuration**: |
| 319 | ```yaml | 319 | ```yaml |
| 320 | field_boosts: | 320 | field_boosts: |
| 321 | - title_zh/en: 3.0 # Highest priority | ||
| 322 | - brief_zh/en: 1.5 # Medium priority | ||
| 323 | - description_zh/en: 1.0 # Lower priority | ||
| 324 | - vendor_zh/en: 1.5 # Brand emphasis | ||
| 325 | - category_path_zh/en: 1.5 # Category relevance | 321 | + title.zh/en: 3.0 # Highest priority |
| 322 | + brief.zh/en: 1.5 # Medium priority | ||
| 323 | + description.zh/en: 1.0 # Lower priority | ||
| 324 | + vendor.zh/en: 1.5 # Brand emphasis | ||
| 325 | + category_path.zh/en: 1.5 # Category relevance | ||
| 326 | tags: 1.0 # Tag matching | 326 | tags: 1.0 # Tag matching |
| 327 | ``` | 327 | ``` |
| 328 | 328 |
README.md
| @@ -15,8 +15,8 @@ | @@ -15,8 +15,8 @@ | ||
| 15 | "query" : { | 15 | "query" : { |
| 16 | "dis_max" : { | 16 | "dis_max" : { |
| 17 | "queries" : [ | 17 | "queries" : [ |
| 18 | - {"match" : { "title_en" : xxx }}, | ||
| 19 | - {"match" : { "title_zh" : xxx }}, | 18 | + {"match" : { "title.en" : xxx }}, |
| 19 | + {"match" : { "title.zh" : xxx }}, | ||
| 20 | {"match" : { "title_xx" : xxx }} | 20 | {"match" : { "title_xx" : xxx }} |
| 21 | ], | 21 | ], |
| 22 | "tie_breakler" : 0.8 | 22 | "tie_breakler" : 0.8 |
api/models.py
| @@ -86,7 +86,7 @@ class SearchRequest(BaseModel): | @@ -86,7 +86,7 @@ class SearchRequest(BaseModel): | ||
| 86 | "examples": [ | 86 | "examples": [ |
| 87 | { | 87 | { |
| 88 | "category_name": ["手机", "电子产品"], | 88 | "category_name": ["手机", "电子产品"], |
| 89 | - "vendor_zh.keyword": "奇乐", | 89 | + "vendor.zh.keyword": "奇乐", |
| 90 | "specifications": {"name": "颜色", "value": "白色"} | 90 | "specifications": {"name": "颜色", "value": "白色"} |
| 91 | }, | 91 | }, |
| 92 | { | 92 | { |
api/result_formatter.py
| @@ -32,13 +32,22 @@ class ResultFormatter: | @@ -32,13 +32,22 @@ class ResultFormatter: | ||
| 32 | lang = "en" | 32 | lang = "en" |
| 33 | 33 | ||
| 34 | def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]: | 34 | def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]: |
| 35 | - """从 *_zh / *_en 字段中按语言选择一个值,若目标语言缺失则回退到另一种。""" | ||
| 36 | - zh_val = src.get(f"{base}_zh") | ||
| 37 | - en_val = src.get(f"{base}_en") | 35 | + """ |
| 36 | + 从多语言对象字段中按语言选择一个值: | ||
| 37 | + - 新结构: {base: {"zh": "...", "en": "...", ...}} | ||
| 38 | + - 兼容旧结构: {base_zh: "...", base_en: "..."} | ||
| 39 | + 若目标语言缺失则回退到另一种。 | ||
| 40 | + """ | ||
| 41 | + obj = src.get(base) | ||
| 42 | + if isinstance(obj, dict): | ||
| 43 | + zh_val = obj.get("zh") | ||
| 44 | + en_val = obj.get("en") | ||
| 45 | + else: | ||
| 46 | + zh_val = src.get(f"{base}_zh") | ||
| 47 | + en_val = src.get(f"{base}_en") | ||
| 38 | if lang == "zh": | 48 | if lang == "zh": |
| 39 | return zh_val or en_val | 49 | return zh_val or en_val |
| 40 | - else: | ||
| 41 | - return en_val or zh_val | 50 | + return en_val or zh_val |
| 42 | 51 | ||
| 43 | for hit in es_hits: | 52 | for hit in es_hits: |
| 44 | source = hit.get('_source', {}) | 53 | source = hit.get('_source', {}) |
| @@ -60,7 +69,7 @@ class ResultFormatter: | @@ -60,7 +69,7 @@ class ResultFormatter: | ||
| 60 | description = pick_lang_field(source, "description") | 69 | description = pick_lang_field(source, "description") |
| 61 | vendor = pick_lang_field(source, "vendor") | 70 | vendor = pick_lang_field(source, "vendor") |
| 62 | category_path = pick_lang_field(source, "category_path") | 71 | category_path = pick_lang_field(source, "category_path") |
| 63 | - category_name = pick_lang_field(source, "category_name") | 72 | + category_name = pick_lang_field(source, "category_name_text") or source.get("category_name") |
| 64 | 73 | ||
| 65 | # Extract SKUs | 74 | # Extract SKUs |
| 66 | skus = [] | 75 | skus = [] |
config/config.yaml
| @@ -15,20 +15,20 @@ es_settings: | @@ -15,20 +15,20 @@ es_settings: | ||
| 15 | # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义) | 15 | # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义) |
| 16 | field_boosts: | 16 | field_boosts: |
| 17 | # 文本相关性字段 | 17 | # 文本相关性字段 |
| 18 | - title_zh: 3.0 | ||
| 19 | - brief_zh: 1.5 | ||
| 20 | - description_zh: 1.0 | ||
| 21 | - vendor_zh: 1.5 | ||
| 22 | - title_en: 3.0 | ||
| 23 | - brief_en: 1.5 | ||
| 24 | - description_en: 1.0 | ||
| 25 | - vendor_en: 1.5 | 18 | + "title.zh": 3.0 |
| 19 | + "brief.zh": 1.5 | ||
| 20 | + "description.zh": 1.0 | ||
| 21 | + "vendor.zh": 1.5 | ||
| 22 | + "title.en": 3.0 | ||
| 23 | + "brief.en": 1.5 | ||
| 24 | + "description.en": 1.0 | ||
| 25 | + "vendor.en": 1.5 | ||
| 26 | 26 | ||
| 27 | # 分类相关字段 | 27 | # 分类相关字段 |
| 28 | - category_path_zh: 1.5 | ||
| 29 | - category_name_zh: 1.5 | ||
| 30 | - category_path_en: 1.5 | ||
| 31 | - category_name_en: 1.5 | 28 | + "category_path.zh": 1.5 |
| 29 | + "category_name_text.zh": 1.5 | ||
| 30 | + "category_path.en": 1.5 | ||
| 31 | + "category_name_text.en": 1.5 | ||
| 32 | 32 | ||
| 33 | # 标签和属性值字段 | 33 | # 标签和属性值字段 |
| 34 | tags: 1.0 | 34 | tags: 1.0 |
| @@ -42,33 +42,33 @@ indexes: | @@ -42,33 +42,33 @@ indexes: | ||
| 42 | - name: "default" | 42 | - name: "default" |
| 43 | label: "默认搜索" | 43 | label: "默认搜索" |
| 44 | fields: | 44 | fields: |
| 45 | - - "title_zh" | ||
| 46 | - - "brief_zh" | ||
| 47 | - - "description_zh" | ||
| 48 | - - "vendor_zh" | 45 | + - "title.zh" |
| 46 | + - "brief.zh" | ||
| 47 | + - "description.zh" | ||
| 48 | + - "vendor.zh" | ||
| 49 | - "tags" | 49 | - "tags" |
| 50 | - - "category_path_zh" | ||
| 51 | - - "category_name_zh" | 50 | + - "category_path.zh" |
| 51 | + - "category_name_text.zh" | ||
| 52 | - "option1_values" | 52 | - "option1_values" |
| 53 | boost: 1.0 | 53 | boost: 1.0 |
| 54 | 54 | ||
| 55 | - name: "title" | 55 | - name: "title" |
| 56 | label: "标题搜索" | 56 | label: "标题搜索" |
| 57 | fields: | 57 | fields: |
| 58 | - - "title_zh" | 58 | + - "title.zh" |
| 59 | boost: 2.0 | 59 | boost: 2.0 |
| 60 | 60 | ||
| 61 | - name: "vendor" | 61 | - name: "vendor" |
| 62 | label: "品牌搜索" | 62 | label: "品牌搜索" |
| 63 | fields: | 63 | fields: |
| 64 | - - "vendor_zh" | 64 | + - "vendor.zh" |
| 65 | boost: 1.5 | 65 | boost: 1.5 |
| 66 | 66 | ||
| 67 | - name: "category" | 67 | - name: "category" |
| 68 | label: "类目搜索" | 68 | label: "类目搜索" |
| 69 | fields: | 69 | fields: |
| 70 | - - "category_path_zh" | ||
| 71 | - - "category_name_zh" | 70 | + - "category_path.zh" |
| 71 | + - "category_name_text.zh" | ||
| 72 | boost: 1.5 | 72 | boost: 1.5 |
| 73 | 73 | ||
| 74 | - name: "tags" | 74 | - name: "tags" |
config/utils.py
| @@ -17,7 +17,7 @@ def get_match_fields_for_index(config: SearchConfig, index_name: str = "default" | @@ -17,7 +17,7 @@ def get_match_fields_for_index(config: SearchConfig, index_name: str = "default" | ||
| 17 | index_name: Name of the index domain (default: "default") | 17 | index_name: Name of the index domain (default: "default") |
| 18 | 18 | ||
| 19 | Returns: | 19 | Returns: |
| 20 | - List of field names with boost, e.g., ["title_zh^3.0", "brief_zh^1.5"] | 20 | + List of field names with boost, e.g., ["title.zh^3.0", "brief.zh^1.5"] |
| 21 | """ | 21 | """ |
| 22 | # Find the index config | 22 | # Find the index config |
| 23 | index_config = None | 23 | index_config = None |
docs/MySQL到ES字段映射说明-业务版.md
| @@ -28,14 +28,14 @@ | @@ -28,14 +28,14 @@ | ||
| 28 | 28 | ||
| 29 | | ES 字段 | 数据来源表 | 表中字段 | 转换说明 | | 29 | | ES 字段 | 数据来源表 | 表中字段 | 转换说明 | |
| 30 | |---------|-----------|----------|----------| | 30 | |---------|-----------|----------|----------| |
| 31 | -| `title_zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 | | ||
| 32 | -| `title_en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 | | ||
| 33 | -| `brief_zh` | SPU 表 | `brief` | 同上 | | ||
| 34 | -| `brief_en` | SPU 表 | `brief` | 同上 | | ||
| 35 | -| `description_zh` | SPU 表 | `description` | 同上 | | ||
| 36 | -| `description_en` | SPU 表 | `description` | 同上 | | ||
| 37 | -| `vendor_zh` | SPU 表 | `vendor` | 同上 | | ||
| 38 | -| `vendor_en` | SPU 表 | `vendor` | 同上 | | 31 | +| `title.zh` | SPU 表 | `title` | 如果主语言是中文,直接使用;否则翻译为中文 | |
| 32 | +| `title.en` | SPU 表 | `title` | 如果主语言是英文,直接使用;否则翻译为英文 | | ||
| 33 | +| `brief.zh` | SPU 表 | `brief` | 同上 | | ||
| 34 | +| `brief.en` | SPU 表 | `brief` | 同上 | | ||
| 35 | +| `description.zh` | SPU 表 | `description` | 同上 | | ||
| 36 | +| `description.en` | SPU 表 | `description` | 同上 | | ||
| 37 | +| `vendor.zh` | SPU 表 | `vendor` | 同上 | | ||
| 38 | +| `vendor.en` | SPU 表 | `vendor` | 同上 | | ||
| 39 | 39 | ||
| 40 | **翻译规则:** | 40 | **翻译规则:** |
| 41 | - 根据租户配置的 `primary_language` 确定主语言 | 41 | - 根据租户配置的 `primary_language` 确定主语言 |
| @@ -104,7 +104,7 @@ category_ids = ["1", "2", "3"] | @@ -104,7 +104,7 @@ category_ids = ["1", "2", "3"] | ||
| 104 | 104 | ||
| 105 | | ES 字段 | 数据来源 | 转换说明 | | 105 | | ES 字段 | 数据来源 | 转换说明 | |
| 106 | |---------|----------|----------| | 106 | |---------|----------|----------| |
| 107 | -| `category_path_zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` | | 107 | +| `category_path.zh` | 类目名称列表 | 用 `/` 连接:`"电子产品/手机/iPhone"` | |
| 108 | | `category1_name` | 类目名称列表[0] | 一级类目:`"电子产品"` | | 108 | | `category1_name` | 类目名称列表[0] | 一级类目:`"电子产品"` | |
| 109 | | `category2_name` | 类目名称列表[1] | 二级类目:`"手机"` | | 109 | | `category2_name` | 类目名称列表[1] | 二级类目:`"手机"` | |
| 110 | | `category3_name` | 类目名称列表[2] | 三级类目:`"iPhone"` | | 110 | | `category3_name` | 类目名称列表[2] | 三级类目:`"iPhone"` | |
| @@ -300,13 +300,13 @@ sku_weight_units = 去重后的重量单位列表 | @@ -300,13 +300,13 @@ sku_weight_units = 去重后的重量单位列表 | ||
| 300 | 300 | ||
| 301 | | ES 字段 | 数据来源 | 转换说明 | | 301 | | ES 字段 | 数据来源 | 转换说明 | |
| 302 | |---------|----------|----------| | 302 | |---------|----------|----------| |
| 303 | -| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量<br/>优先使用 `title_en`,如果没有则使用 `title_zh` | | 303 | +| `title_embedding` | SPU 表 `title` | 使用文本编码器(BGE)将标题转换为 1024 维向量<br/>优先使用 `title.en`,如果没有则使用 `title.zh` | |
| 304 | 304 | ||
| 305 | **生成逻辑:** | 305 | **生成逻辑:** |
| 306 | 306 | ||
| 307 | ``` | 307 | ``` |
| 308 | 如果启用向量搜索: | 308 | 如果启用向量搜索: |
| 309 | - 文本 = title_en 或 title_zh | 309 | + 文本 = title.en 或 title.zh |
| 310 | 向量 = 文本编码器.encode(文本) | 310 | 向量 = 文本编码器.encode(文本) |
| 311 | title_embedding = 向量(1024 维浮点数组) | 311 | title_embedding = 向量(1024 维浮点数组) |
| 312 | ``` | 312 | ``` |
| @@ -322,17 +322,17 @@ sku_weight_units = 去重后的重量单位列表 | @@ -322,17 +322,17 @@ sku_weight_units = 去重后的重量单位列表 | ||
| 322 | | **基础字段** | | 322 | | **基础字段** | |
| 323 | | `tenant_id` | SPU | `tenant_id` | 租户ID | | 323 | | `tenant_id` | SPU | `tenant_id` | 租户ID | |
| 324 | | `spu_id` | SPU | `id` | 商品ID | | 324 | | `spu_id` | SPU | `id` | 商品ID | |
| 325 | -| `title_zh/en` | SPU | `title` | 标题(多语言) | | ||
| 326 | -| `brief_zh/en` | SPU | `brief` | 简介(多语言) | | ||
| 327 | -| `description_zh/en` | SPU | `description` | 描述(多语言) | | ||
| 328 | -| `vendor_zh/en` | SPU | `vendor` | 品牌(多语言) | | 325 | +| `title.zh/en` | SPU | `title` | 标题(多语言) | |
| 326 | +| `brief.zh/en` | SPU | `brief` | 简介(多语言) | | ||
| 327 | +| `description.zh/en` | SPU | `description` | 描述(多语言) | | ||
| 328 | +| `vendor.zh/en` | SPU | `vendor` | 品牌(多语言) | | ||
| 329 | | `tags` | SPU | `tags` | 标签数组 | | 329 | | `tags` | SPU | `tags` | 标签数组 | |
| 330 | | `image_url` | SPU | `image_src` | 主图URL | | 330 | | `image_url` | SPU | `image_src` | 主图URL | |
| 331 | | `sales` | SPU | `fake_sales` | 销量 | | 331 | | `sales` | SPU | `fake_sales` | 销量 | |
| 332 | | `create_time` | SPU | `create_time` | 创建时间 | | 332 | | `create_time` | SPU | `create_time` | 创建时间 | |
| 333 | | `update_time` | SPU | `update_time` | 更新时间 | | 333 | | `update_time` | SPU | `update_time` | 更新时间 | |
| 334 | | **类别字段** | | 334 | | **类别字段** | |
| 335 | -| `category_path_zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 | | 335 | +| `category_path.zh` | SPU + 类目映射 | `category_path` → 类目名称 | 类目路径 | |
| 336 | | `category1_name` | SPU + 类目映射 | `category_path` → 类目名称[0] | 一级类目 | | 336 | | `category1_name` | SPU + 类目映射 | `category_path` → 类目名称[0] | 一级类目 | |
| 337 | | `category2_name` | SPU + 类目映射 | `category_path` → 类目名称[1] | 二级类目 | | 337 | | `category2_name` | SPU + 类目映射 | `category_path` → 类目名称[1] | 二级类目 | |
| 338 | | `category3_name` | SPU + 类目映射 | `category_path` → 类目名称[2] | 三级类目 | | 338 | | `category3_name` | SPU + 类目映射 | `category_path` → 类目名称[2] | 三级类目 | |
| @@ -451,14 +451,14 @@ GET /search_products/_search | @@ -451,14 +451,14 @@ GET /search_products/_search | ||
| 451 | "_source": { | 451 | "_source": { |
| 452 | "tenant_id": "162", | 452 | "tenant_id": "162", |
| 453 | "spu_id": "74174", | 453 | "spu_id": "74174", |
| 454 | - "title_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】", | ||
| 455 | - "title_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging", | ||
| 456 | - "brief_zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】", | ||
| 457 | - "brief_en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging", | ||
| 458 | - "description_zh": "<p>实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】</p>", | ||
| 459 | - "description_en": "<p>Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]</p", | ||
| 460 | - "vendor_zh": "顺达", | ||
| 461 | - "vendor_en": "Shunda", | 454 | + "title.zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】", |
| 455 | + "title.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging", | ||
| 456 | + "brief.zh": "实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】", | ||
| 457 | + "brief.en": "Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging", | ||
| 458 | + "description.zh": "<p>实色二阶碳纤维魔方 方形 2阶 塑料【英文包装】</p>", | ||
| 459 | + "description.en": "<p>Solid Color 2nd Order Carbon Fiber Rubik's Cube Square 2 Steps Plastic [English Packaging]</p", | ||
| 460 | + "vendor.zh": "顺达", | ||
| 461 | + "vendor.en": "Shunda", | ||
| 462 | "title_embedding": [ | 462 | "title_embedding": [ |
| 463 | -0.005125104915350676, | 463 | -0.005125104915350676, |
| 464 | ... | 464 | ... |
| @@ -468,12 +468,12 @@ GET /search_products/_search | @@ -468,12 +468,12 @@ GET /search_products/_search | ||
| 468 | "魔方", | 468 | "魔方", |
| 469 | "ShunDa" | 469 | "ShunDa" |
| 470 | ], | 470 | ], |
| 471 | - "category_path_zh": "玩具/汽车", | ||
| 472 | - "category_path_en": null, | 471 | + "category_path.zh": "玩具/汽车", |
| 472 | + "category_path.en": null, | ||
| 473 | "category1_name": "玩具", | 473 | "category1_name": "玩具", |
| 474 | "category2_name": "汽车", | 474 | "category2_name": "汽车", |
| 475 | - "category_name_zh": "汽车", | ||
| 476 | - "category_name_en": null, | 475 | + "category_name_text.zh": "汽车", |
| 476 | + "category_name_text.en": null, | ||
| 477 | "category_name": "汽车", | 477 | "category_name": "汽车", |
| 478 | "category_id": "593389466647880832", | 478 | "category_id": "593389466647880832", |
| 479 | "category_level": 2, | 479 | "category_level": 2, |
| @@ -599,7 +599,7 @@ category_ids ["1", "2", "3"] | @@ -599,7 +599,7 @@ category_ids ["1", "2", "3"] | ||
| 599 | ↓ [通过映射表转换] | 599 | ↓ [通过映射表转换] |
| 600 | category_names ["电子产品", "手机", "iPhone"] | 600 | category_names ["电子产品", "手机", "iPhone"] |
| 601 | ↓ [构建字段] | 601 | ↓ [构建字段] |
| 602 | -category_path_zh: "电子产品/手机/iPhone" | 602 | +category_path.zh: "电子产品/手机/iPhone" |
| 603 | category1_name: "电子产品" | 603 | category1_name: "电子产品" |
| 604 | category2_name: "手机" | 604 | category2_name: "手机" |
| 605 | category3_name: "iPhone" | 605 | category3_name: "iPhone" |
| @@ -644,12 +644,12 @@ sku_prices: [99.99, 109.99, 129.99] | @@ -644,12 +644,12 @@ sku_prices: [99.99, 109.99, 129.99] | ||
| 644 | |--------|---------|------| | 644 | |--------|---------|------| |
| 645 | | `id` | `spu_id` | 商品ID | | 645 | | `id` | `spu_id` | 商品ID | |
| 646 | | `tenant_id` | `tenant_id` | 租户ID | | 646 | | `tenant_id` | `tenant_id` | 租户ID | |
| 647 | -| `title` | `title_zh/en` | 标题 | | ||
| 648 | -| `brief` | `brief_zh/en` | 简介 | | ||
| 649 | -| `description` | `description_zh/en` | 描述 | | ||
| 650 | -| `vendor` | `vendor_zh/en` | 品牌 | | 647 | +| `title` | `title.zh/en` | 标题 | |
| 648 | +| `brief` | `brief.zh/en` | 简介 | | ||
| 649 | +| `description` | `description.zh/en` | 描述 | | ||
| 650 | +| `vendor` | `vendor.zh/en` | 品牌 | | ||
| 651 | | `tags` | `tags` | 标签 | | 651 | | `tags` | `tags` | 标签 | |
| 652 | -| `category_path` | `category_path_zh` | 类目路径 | | 652 | +| `category_path` | `category_path.zh` | 类目路径 | |
| 653 | | `category_id` | `category_id` | 类目ID | | 653 | | `category_id` | `category_id` | 类目ID | |
| 654 | | `category_level` | `category_level` | 类目层级 | | 654 | | `category_level` | `category_level` | 类目层级 | |
| 655 | | `image_src` | `image_url` | 主图 | | 655 | | `image_src` | `image_url` | 主图 | |
docs/MySQL到ES文档映射说明.md
| @@ -217,10 +217,10 @@ WHERE deleted = 0 AND category_id IS NOT NULL | @@ -217,10 +217,10 @@ WHERE deleted = 0 AND category_id IS NOT NULL | ||
| 217 | 文本字段支持中英文双语,根据租户配置进行自动翻译。 | 217 | 文本字段支持中英文双语,根据租户配置进行自动翻译。 |
| 218 | 218 | ||
| 219 | **字段列表:** | 219 | **字段列表:** |
| 220 | -- `title_zh` / `title_en` | ||
| 221 | -- `brief_zh` / `brief_en` | ||
| 222 | -- `description_zh` / `description_en` | ||
| 223 | -- `vendor_zh` / `vendor_en` | 220 | +- `title.zh` / `title.en` |
| 221 | +- `brief.zh` / `brief.en` | ||
| 222 | +- `description.zh` / `description.en` | ||
| 223 | +- `vendor.zh` / `vendor.en` | ||
| 224 | 224 | ||
| 225 | **填充逻辑:** | 225 | **填充逻辑:** |
| 226 | 226 | ||
| @@ -244,8 +244,8 @@ WHERE deleted = 0 AND category_id IS NOT NULL | @@ -244,8 +244,8 @@ WHERE deleted = 0 AND category_id IS NOT NULL | ||
| 244 | translate_to_en=translate_to_en, | 244 | translate_to_en=translate_to_en, |
| 245 | translate_to_zh=translate_to_zh, | 245 | translate_to_zh=translate_to_zh, |
| 246 | ) | 246 | ) |
| 247 | - doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None) | ||
| 248 | - doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None) | 247 | + doc['title.zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None) |
| 248 | + doc['title.en'] = translations.get('en') or (title_text if primary_lang == 'en' else None) | ||
| 249 | ``` | 249 | ``` |
| 250 | 250 | ||
| 251 | **代码位置:** `indexer/document_transformer.py:168-298` | 251 | **代码位置:** `indexer/document_transformer.py:168-298` |
| @@ -340,7 +340,7 @@ for cid in category_ids: | @@ -340,7 +340,7 @@ for cid in category_ids: | ||
| 340 | if category_names: | 340 | if category_names: |
| 341 | # 构建类目路径字符串 | 341 | # 构建类目路径字符串 |
| 342 | category_path_str = '/'.join(category_names) | 342 | category_path_str = '/'.join(category_names) |
| 343 | - doc['category_path_zh'] = category_path_str # 例如:"电子产品/手机/iPhone" | 343 | + doc['category_path.zh'] = category_path_str # 例如:"电子产品/手机/iPhone" |
| 344 | 344 | ||
| 345 | # 填充分层类目名称 | 345 | # 填充分层类目名称 |
| 346 | if len(category_names) > 0: | 346 | if len(category_names) > 0: |
| @@ -358,7 +358,7 @@ if category_names: | @@ -358,7 +358,7 @@ if category_names: | ||
| 358 | ```python | 358 | ```python |
| 359 | elif pd.notna(spu_row.get('category')): | 359 | elif pd.notna(spu_row.get('category')): |
| 360 | category = str(spu_row['category']) | 360 | category = str(spu_row['category']) |
| 361 | - doc['category_name_zh'] = category | 361 | + doc['category_name_text.zh'] = category |
| 362 | 362 | ||
| 363 | # 尝试从 category 字段解析多级分类(如果包含 "/") | 363 | # 尝试从 category 字段解析多级分类(如果包含 "/") |
| 364 | if '/' in category: | 364 | if '/' in category: |
| @@ -375,8 +375,8 @@ elif pd.notna(spu_row.get('category')): | @@ -375,8 +375,8 @@ elif pd.notna(spu_row.get('category')): | ||
| 375 | 375 | ||
| 376 | | ES 字段 | 类型 | 说明 | | 376 | | ES 字段 | 类型 | 说明 | |
| 377 | |---------|------|------| | 377 | |---------|------|------| |
| 378 | -| `category_path_zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") | | ||
| 379 | -| `category_path_en` | text | 类目路径英文(暂未实现) | | 378 | +| `category_path.zh` | text | 类目路径字符串(如:"电子产品/手机/iPhone") | |
| 379 | +| `category_path.en` | text | 类目路径英文(暂未实现) | | ||
| 380 | | `category1_name` | keyword | 一级类目名称 | | 380 | | `category1_name` | keyword | 一级类目名称 | |
| 381 | | `category2_name` | keyword | 二级类目名称 | | 381 | | `category2_name` | keyword | 二级类目名称 | |
| 382 | | `category3_name` | keyword | 三级类目名称 | | 382 | | `category3_name` | keyword | 三级类目名称 | |
| @@ -673,7 +673,7 @@ if enable_embedding and encoder and documents: | @@ -673,7 +673,7 @@ if enable_embedding and encoder and documents: | ||
| 673 | title_texts = [] | 673 | title_texts = [] |
| 674 | title_doc_indices = [] | 674 | title_doc_indices = [] |
| 675 | for i, (_, doc) in enumerate(documents): | 675 | for i, (_, doc) in enumerate(documents): |
| 676 | - title_text = doc.get("title_en") or doc.get("title_zh") | 676 | + title_text = doc.get("title.en") or doc.get("title.zh") |
| 677 | if title_text and str(title_text).strip(): | 677 | if title_text and str(title_text).strip(): |
| 678 | title_texts.append(str(title_text)) | 678 | title_texts.append(str(title_text)) |
| 679 | title_doc_indices.append(i) | 679 | title_doc_indices.append(i) |
| @@ -858,16 +858,16 @@ spu_df["_is_deleted"] = spu_df["deleted"].apply(_is_deleted_value) | @@ -858,16 +858,16 @@ spu_df["_is_deleted"] = spu_df["deleted"].apply(_is_deleted_value) | ||
| 858 | { | 858 | { |
| 859 | "tenant_id": "1", | 859 | "tenant_id": "1", |
| 860 | "spu_id": "12345", | 860 | "spu_id": "12345", |
| 861 | - "title_zh": "iPhone 15 Pro Max", | ||
| 862 | - "title_en": "iPhone 15 Pro Max", | ||
| 863 | - "brief_zh": "最新款 iPhone", | ||
| 864 | - "brief_en": "Latest iPhone", | ||
| 865 | - "description_zh": "详细描述...", | ||
| 866 | - "description_en": "Detailed description...", | ||
| 867 | - "vendor_zh": "Apple", | ||
| 868 | - "vendor_en": "Apple", | 861 | + "title.zh": "iPhone 15 Pro Max", |
| 862 | + "title.en": "iPhone 15 Pro Max", | ||
| 863 | + "brief.zh": "最新款 iPhone", | ||
| 864 | + "brief.en": "Latest iPhone", | ||
| 865 | + "description.zh": "详细描述...", | ||
| 866 | + "description.en": "Detailed description...", | ||
| 867 | + "vendor.zh": "Apple", | ||
| 868 | + "vendor.en": "Apple", | ||
| 869 | "tags": ["手机", "智能手机", "Apple"], | 869 | "tags": ["手机", "智能手机", "Apple"], |
| 870 | - "category_path_zh": "电子产品/手机/iPhone", | 870 | + "category_path.zh": "电子产品/手机/iPhone", |
| 871 | "category1_name": "电子产品", | 871 | "category1_name": "电子产品", |
| 872 | "category2_name": "手机", | 872 | "category2_name": "手机", |
| 873 | "category3_name": "iPhone", | 873 | "category3_name": "iPhone", |
docs/Search-API-Examples.md
| @@ -132,7 +132,7 @@ curl -X POST "http://localhost:6002/search/" \ | @@ -132,7 +132,7 @@ curl -X POST "http://localhost:6002/search/" \ | ||
| 132 | "language": "zh", | 132 | "language": "zh", |
| 133 | "filters": { | 133 | "filters": { |
| 134 | "category_name": "手机", | 134 | "category_name": "手机", |
| 135 | - "vendor_zh.keyword": "奇乐" | 135 | + "vendor.zh.keyword": "奇乐" |
| 136 | } | 136 | } |
| 137 | }' | 137 | }' |
| 138 | ``` | 138 | ``` |
| @@ -294,7 +294,7 @@ curl -X POST "http://localhost:6002/search/" \ | @@ -294,7 +294,7 @@ curl -X POST "http://localhost:6002/search/" \ | ||
| 294 | "language": "zh", | 294 | "language": "zh", |
| 295 | "filters": { | 295 | "filters": { |
| 296 | "category_name": ["手机", "电子产品"], | 296 | "category_name": ["手机", "电子产品"], |
| 297 | - "vendor_zh.keyword": "品牌A" | 297 | + "vendor.zh.keyword": "品牌A" |
| 298 | }, | 298 | }, |
| 299 | "range_filters": { | 299 | "range_filters": { |
| 300 | "min_price": { | 300 | "min_price": { |
docs/基础配置指南.md
| @@ -26,12 +26,12 @@ | @@ -26,12 +26,12 @@ | ||
| 26 | - `create_time`, `update_time` (date) - 时间字段 | 26 | - `create_time`, `update_time` (date) - 时间字段 |
| 27 | 27 | ||
| 28 | #### 多语言文本字段 | 28 | #### 多语言文本字段 |
| 29 | -- `title_zh`, `title_en` (text) - 标题(中英文) | ||
| 30 | -- `brief_zh`, `brief_en` (text) - 短描述(中英文) | ||
| 31 | -- `description_zh`, `description_en` (text) - 详细描述(中英文) | ||
| 32 | -- `vendor_zh`, `vendor_en` (text) - 供应商/品牌(中英文,含keyword子字段) | ||
| 33 | -- `category_path_zh`, `category_path_en` (text) - 类目路径(中英文) | ||
| 34 | -- `category_name_zh`, `category_name_en` (text) - 类目名称(中英文) | 29 | +- `title.zh`, `title.en` (text) - 标题(中英文) |
| 30 | +- `brief.zh`, `brief.en` (text) - 短描述(中英文) | ||
| 31 | +- `description.zh`, `description.en` (text) - 详细描述(中英文) | ||
| 32 | +- `vendor.zh`, `vendor.en` (text) - 供应商/品牌(中英文,含keyword子字段) | ||
| 33 | +- `category_path.zh`, `category_path.en` (text) - 类目路径(中英文) | ||
| 34 | +- `category_name_text.zh`, `category_name_text.en` (text) - 类目名称(中英文) | ||
| 35 | 35 | ||
| 36 | #### 类目字段 | 36 | #### 类目字段 |
| 37 | - `category_id` (keyword) - 类目ID | 37 | - `category_id` (keyword) - 类目ID |
| @@ -64,12 +64,12 @@ | @@ -64,12 +64,12 @@ | ||
| 64 | ### 文本召回字段 | 64 | ### 文本召回字段 |
| 65 | 65 | ||
| 66 | 默认同时搜索以下字段(中英文都包含): | 66 | 默认同时搜索以下字段(中英文都包含): |
| 67 | -- `title_zh^3.0`, `title_en^3.0` | ||
| 68 | -- `brief_zh^1.5`, `brief_en^1.5` | ||
| 69 | -- `description_zh^1.0`, `description_en^1.0` | ||
| 70 | -- `vendor_zh^1.5`, `vendor_en^1.5` | ||
| 71 | -- `category_path_zh^1.5`, `category_path_en^1.5` | ||
| 72 | -- `category_name_zh^1.5`, `category_name_en^1.5` | 67 | +- `title.zh^3.0`, `title.en^3.0` |
| 68 | +- `brief.zh^1.5`, `brief.en^1.5` | ||
| 69 | +- `description.zh^1.0`, `description.en^1.0` | ||
| 70 | +- `vendor.zh^1.5`, `vendor.en^1.5` | ||
| 71 | +- `category_path.zh^1.5`, `category_path.en^1.5` | ||
| 72 | +- `category_name_text.zh^1.5`, `category_name_text.en^1.5` | ||
| 73 | - `tags^1.0` | 73 | - `tags^1.0` |
| 74 | 74 | ||
| 75 | ### 查询架构 | 75 | ### 查询架构 |
| @@ -126,12 +126,12 @@ | @@ -126,12 +126,12 @@ | ||
| 126 | - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段 | 126 | - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段 |
| 127 | 127 | ||
| 128 | 映射规则: | 128 | 映射规则: |
| 129 | -- `title_zh/en` → `title` | ||
| 130 | -- `brief_zh/en` → `brief` | ||
| 131 | -- `description_zh/en` → `description` | ||
| 132 | -- `vendor_zh/en` → `vendor` | ||
| 133 | -- `category_path_zh/en` → `category_path` | ||
| 134 | -- `category_name_zh/en` → `category_name` | 129 | +- `title.zh/en` → `title` |
| 130 | +- `brief.zh/en` → `brief` | ||
| 131 | +- `description.zh/en` → `description` | ||
| 132 | +- `vendor.zh/en` → `vendor` | ||
| 133 | +- `category_path.zh/en` → `category_path` | ||
| 134 | +- `category_name_text.zh/en` → `category_name` | ||
| 135 | 135 | ||
| 136 | ## 配置修改 | 136 | ## 配置修改 |
| 137 | 137 |
docs/常用查询 - ES.md
| @@ -22,7 +22,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -22,7 +22,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 22 | 22 | ||
| 23 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 23 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 24 | "size": 100, | 24 | "size": 100, |
| 25 | - "_source": ["title_zh", "title_en"], | 25 | + "_source": ["title"], |
| 26 | "query": { | 26 | "query": { |
| 27 | "bool": { | 27 | "bool": { |
| 28 | "filter": [ | 28 | "filter": [ |
| @@ -34,13 +34,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -34,13 +34,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 34 | 34 | ||
| 35 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 35 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 36 | "size": 1, | 36 | "size": 1, |
| 37 | - "_source": ["title_zh", "title_en"], | 37 | + "_source": ["title"], |
| 38 | "query": { | 38 | "query": { |
| 39 | "bool": { | 39 | "bool": { |
| 40 | "must": [ | 40 | "must": [ |
| 41 | { | 41 | { |
| 42 | "match": { | 42 | "match": { |
| 43 | - "title_zh": { | 43 | + "title.zh": { |
| 44 | "query": "裙子" | 44 | "query": "裙子" |
| 45 | } | 45 | } |
| 46 | } | 46 | } |
| @@ -54,7 +54,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -54,7 +54,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 54 | }' | 54 | }' |
| 55 | 55 | ||
| 56 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ | 56 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ |
| 57 | - "analyzer": "query_ansj", | 57 | + "analyzer": "icu_analyzer", |
| 58 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" | 58 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 59 | }' | 59 | }' |
| 60 | 60 | ||
| @@ -73,13 +73,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -73,13 +73,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 73 | "multi_match": { | 73 | "multi_match": { |
| 74 | "_name": "base_query", | 74 | "_name": "base_query", |
| 75 | "fields": [ | 75 | "fields": [ |
| 76 | - "title_zh^3.0", | ||
| 77 | - "brief_zh^1.5", | ||
| 78 | - "description_zh", | ||
| 79 | - "vendor_zh^1.5", | 76 | + "title.zh^3.0", |
| 77 | + "brief.zh^1.5", | ||
| 78 | + "description.zh", | ||
| 79 | + "vendor.zh^1.5", | ||
| 80 | "tags", | 80 | "tags", |
| 81 | - "category_path_zh^1.5", | ||
| 82 | - "category_name_zh^1.5", | 81 | + "category_path.zh^1.5", |
| 82 | + "category_name_text.zh^1.5", | ||
| 83 | "option1_values^0.5" | 83 | "option1_values^0.5" |
| 84 | ], | 84 | ], |
| 85 | "minimum_should_match": "75%", | 85 | "minimum_should_match": "75%", |
| @@ -110,13 +110,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -110,13 +110,13 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 110 | "multi_match": { | 110 | "multi_match": { |
| 111 | "_name": "base_query", | 111 | "_name": "base_query", |
| 112 | "fields": [ | 112 | "fields": [ |
| 113 | - "title_zh^3.0", | ||
| 114 | - "brief_zh^1.5", | ||
| 115 | - "description_zh", | ||
| 116 | - "vendor_zh^1.5", | 113 | + "title.zh^3.0", |
| 114 | + "brief.zh^1.5", | ||
| 115 | + "description.zh", | ||
| 116 | + "vendor.zh^1.5", | ||
| 117 | "tags", | 117 | "tags", |
| 118 | - "category_path_zh^1.5", | ||
| 119 | - "category_name_zh^1.5", | 118 | + "category_path.zh^1.5", |
| 119 | + "category_name_text.zh^1.5", | ||
| 120 | "option1_values^0.5" | 120 | "option1_values^0.5" |
| 121 | ], | 121 | ], |
| 122 | "minimum_should_match": "75%", | 122 | "minimum_should_match": "75%", |
| @@ -271,7 +271,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -271,7 +271,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 271 | "size": 1, | 271 | "size": 1, |
| 272 | "_source": [ | 272 | "_source": [ |
| 273 | "spu_id", | 273 | "spu_id", |
| 274 | - "title_zh", | 274 | + "title", |
| 275 | "category1_name", | 275 | "category1_name", |
| 276 | "category2_name", | 276 | "category2_name", |
| 277 | "category3_name", | 277 | "category3_name", |
| @@ -552,7 +552,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -552,7 +552,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 552 | } | 552 | } |
| 553 | }, | 553 | }, |
| 554 | "size": 1, | 554 | "size": 1, |
| 555 | - "_source": ["spu_id", "title_zh", "specifications"] | 555 | + "_source": ["spu_id", "title", "specifications"] |
| 556 | }' | 556 | }' |
| 557 | 557 | ||
| 558 | ## 4. 统计查询 | 558 | ## 4. 统计查询 |
| @@ -597,7 +597,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -597,7 +597,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 597 | } | 597 | } |
| 598 | }, | 598 | }, |
| 599 | "size": 10, | 599 | "size": 10, |
| 600 | - "_source": ["spu_id", "title_zh", "category_name_zh", "category_path_zh"] | 600 | + "_source": ["spu_id", "title", "category_name_text", "category_path"] |
| 601 | }' | 601 | }' |
| 602 | 602 | ||
| 603 | ### 5.2 查找有option但没有specifications的文档(数据转换问题) | 603 | ### 5.2 查找有option但没有specifications的文档(数据转换问题) |
| @@ -614,7 +614,32 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -614,7 +614,32 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 614 | } | 614 | } |
| 615 | }, | 615 | }, |
| 616 | "size": 10, | 616 | "size": 10, |
| 617 | - "_source": ["spu_id", "title_zh", "option1_name", "option2_name", "option3_name", "specifications"] | 617 | + "_source": ["spu_id", "title", "option1_name", "option2_name", "option3_name", "specifications"] |
| 618 | }' | 618 | }' |
| 619 | 619 | ||
| 620 | 620 | ||
| 621 | +重排序: | ||
| 622 | +GET /search_products/_search | ||
| 623 | +{ | ||
| 624 | + "query": { | ||
| 625 | + "match": { | ||
| 626 | + "title.en": { | ||
| 627 | + "query": "quick brown fox", | ||
| 628 | + "minimum_should_match": "90%" | ||
| 629 | + } | ||
| 630 | + } | ||
| 631 | + }, | ||
| 632 | + "rescore": { | ||
| 633 | + "window_size": 50, | ||
| 634 | + "query": { | ||
| 635 | + "rescore_query": { | ||
| 636 | + "match_phrase": { | ||
| 637 | + "title.en": { | ||
| 638 | + "query": "quick brown fox", | ||
| 639 | + "slop": 50 | ||
| 640 | + } | ||
| 641 | + } | ||
| 642 | + } | ||
| 643 | + } | ||
| 644 | + } | ||
| 645 | +} |
docs/搜索API对接指南.md
| @@ -203,7 +203,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -203,7 +203,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 203 | "category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个 | 203 | "category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个 |
| 204 | "category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个 | 204 | "category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个 |
| 205 | "category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个 | 205 | "category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个 |
| 206 | - "vendor_zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个 | 206 | + "vendor.zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个 |
| 207 | "tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个 | 207 | "tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个 |
| 208 | // specifications 嵌套过滤(特殊格式) | 208 | // specifications 嵌套过滤(特殊格式) |
| 209 | "specifications": { | 209 | "specifications": { |
| @@ -274,7 +274,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -274,7 +274,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 274 | - `category_name`: 类目名称 | 274 | - `category_name`: 类目名称 |
| 275 | - `category1_name`, `category2_name`, `category3_name`: 多级类目 | 275 | - `category1_name`, `category2_name`, `category3_name`: 多级类目 |
| 276 | - `category_id`: 类目ID | 276 | - `category_id`: 类目ID |
| 277 | -- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段) | 277 | +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段) |
| 278 | - `tags`: 标签(keyword类型,支持数组) | 278 | - `tags`: 标签(keyword类型,支持数组) |
| 279 | - `option1_name`, `option2_name`, `option3_name`: 选项名称 | 279 | - `option1_name`, `option2_name`, `option3_name`: 选项名称 |
| 280 | - `specifications`: 规格过滤(嵌套字段,格式见上文) | 280 | - `specifications`: 规格过滤(嵌套字段,格式见上文) |
| @@ -602,7 +602,9 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5" | @@ -602,7 +602,9 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5" | ||
| 602 | { | 602 | { |
| 603 | "id": "12345", | 603 | "id": "12345", |
| 604 | "source": { | 604 | "source": { |
| 605 | - "title_zh": "芭比时尚娃娃", | 605 | + "title": { |
| 606 | + "zh": "芭比时尚娃娃" | ||
| 607 | + }, | ||
| 606 | "min_price": 89.99, | 608 | "min_price": 89.99, |
| 607 | "category1_name": "玩具" | 609 | "category1_name": "玩具" |
| 608 | } | 610 | } |
| @@ -739,7 +741,7 @@ curl "http://localhost:6002/search/12345" | @@ -739,7 +741,7 @@ curl "http://localhost:6002/search/12345" | ||
| 739 | | 字段 | 类型 | 说明 | | 741 | | 字段 | 类型 | 说明 | |
| 740 | |------|------|------| | 742 | |------|------|------| |
| 741 | | `spu_id` | string | SPU ID | | 743 | | `spu_id` | string | SPU ID | |
| 742 | -| `title` | string | 商品标题(根据language参数自动选择title_zh或title_en) | | 744 | +| `title` | string | 商品标题(根据language参数自动选择 `title.zh` 或 `title.en`) | |
| 743 | | `brief` | string | 商品短描述(根据language参数自动选择) | | 745 | | `brief` | string | 商品短描述(根据language参数自动选择) | |
| 744 | | `description` | string | 商品详细描述(根据language参数自动选择) | | 746 | | `description` | string | 商品详细描述(根据language参数自动选择) | |
| 745 | | `vendor` | string | 供应商/品牌(根据language参数自动选择) | | 747 | | `vendor` | string | 供应商/品牌(根据language参数自动选择) | |
| @@ -1088,7 +1090,9 @@ curl -X POST "http://localhost:6004/indexer/index" \ | @@ -1088,7 +1090,9 @@ curl -X POST "http://localhost:6004/indexer/index" \ | ||
| 1088 | "document": { | 1090 | "document": { |
| 1089 | "tenant_id": "162", | 1091 | "tenant_id": "162", |
| 1090 | "spu_id": "123", | 1092 | "spu_id": "123", |
| 1091 | - "title_zh": "商品标题", | 1093 | + "title": { |
| 1094 | + "zh": "商品标题" | ||
| 1095 | + }, | ||
| 1092 | ... | 1096 | ... |
| 1093 | } | 1097 | } |
| 1094 | }, | 1098 | }, |
| @@ -1298,7 +1302,7 @@ curl -X GET "http://localhost:6004/indexer/health" | @@ -1298,7 +1302,7 @@ curl -X GET "http://localhost:6004/indexer/health" | ||
| 1298 | "size": 20, | 1302 | "size": 20, |
| 1299 | "language": "zh", | 1303 | "language": "zh", |
| 1300 | "filters": { | 1304 | "filters": { |
| 1301 | - "vendor_zh.keyword": ["品牌A", "品牌B"] | 1305 | + "vendor.zh.keyword": ["品牌A", "品牌B"] |
| 1302 | }, | 1306 | }, |
| 1303 | "range_filters": { | 1307 | "range_filters": { |
| 1304 | "min_price": { | 1308 | "min_price": { |
| @@ -1505,12 +1509,12 @@ curl -X GET "http://localhost:6004/indexer/health" | @@ -1505,12 +1509,12 @@ curl -X GET "http://localhost:6004/indexer/health" | ||
| 1505 | |--------|------|------| | 1509 | |--------|------|------| |
| 1506 | | `tenant_id` | keyword | 租户ID(多租户隔离) | | 1510 | | `tenant_id` | keyword | 租户ID(多租户隔离) | |
| 1507 | | `spu_id` | keyword | SPU ID | | 1511 | | `spu_id` | keyword | SPU ID | |
| 1508 | -| `title_zh`, `title_en` | text | 商品标题(中英文) | | ||
| 1509 | -| `brief_zh`, `brief_en` | text | 商品短描述(中英文) | | ||
| 1510 | -| `description_zh`, `description_en` | text | 商品详细描述(中英文) | | ||
| 1511 | -| `vendor_zh`, `vendor_en` | text | 供应商/品牌(中英文,含keyword子字段) | | ||
| 1512 | -| `category_path_zh`, `category_path_en` | text | 类目路径(中英文,用于搜索) | | ||
| 1513 | -| `category_name_zh`, `category_name_en` | text | 类目名称(中英文,用于搜索) | | 1512 | +| `title.<lang>` | object/text | 商品标题(多语言对象,如 `title.zh`, `title.en`) | |
| 1513 | +| `brief.<lang>` | object/text | 商品短描述(多语言对象,如 `brief.zh`, `brief.en`) | | ||
| 1514 | +| `description.<lang>` | object/text | 商品详细描述(多语言对象,如 `description.zh`, `description.en`) | | ||
| 1515 | +| `vendor.<lang>` | object/text | 供应商/品牌(多语言对象,且带 keyword 子字段,如 `vendor.zh.keyword`) | | ||
| 1516 | +| `category_path.<lang>` | object/text | 类目路径(多语言对象,用于搜索,如 `category_path.zh`) | | ||
| 1517 | +| `category_name_text.<lang>` | object/text | 类目名称(多语言对象,用于搜索,如 `category_name_text.zh`) | | ||
| 1514 | | `category_id` | keyword | 类目ID | | 1518 | | `category_id` | keyword | 类目ID | |
| 1515 | | `category_name` | keyword | 类目名称(用于过滤) | | 1519 | | `category_name` | keyword | 类目名称(用于过滤) | |
| 1516 | | `category_level` | integer | 类目层级 | | 1520 | | `category_level` | integer | 类目层级 | |
| @@ -1552,7 +1556,7 @@ curl -X GET "http://localhost:6004/indexer/health" | @@ -1552,7 +1556,7 @@ curl -X GET "http://localhost:6004/indexer/health" | ||
| 1552 | - `category_name`: 类目名称 | 1556 | - `category_name`: 类目名称 |
| 1553 | - `category1_name`, `category2_name`, `category3_name`: 多级类目 | 1557 | - `category1_name`, `category2_name`, `category3_name`: 多级类目 |
| 1554 | - `category_id`: 类目ID | 1558 | - `category_id`: 类目ID |
| 1555 | -- `vendor_zh.keyword`, `vendor_en.keyword`: 供应商/品牌(使用keyword子字段) | 1559 | +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段) |
| 1556 | - `tags`: 标签(keyword类型) | 1560 | - `tags`: 标签(keyword类型) |
| 1557 | - `option1_name`, `option2_name`, `option3_name`: 选项名称 | 1561 | - `option1_name`, `option2_name`, `option3_name`: 选项名称 |
| 1558 | - `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解)) | 1562 | - `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解)) |
docs/搜索API速查表.md
| @@ -19,7 +19,7 @@ POST /search/ | @@ -19,7 +19,7 @@ POST /search/ | ||
| 19 | "filters": { | 19 | "filters": { |
| 20 | "category_name": "手机", // 单值 | 20 | "category_name": "手机", // 单值 |
| 21 | "category1_name": "服装", // 一级类目 | 21 | "category1_name": "服装", // 一级类目 |
| 22 | - "vendor_zh.keyword": ["奇乐", "品牌A"], // 多值(OR) | 22 | + "vendor.zh.keyword": ["奇乐", "品牌A"], // 多值(OR) |
| 23 | "tags": "手机", // 标签 | 23 | "tags": "手机", // 标签 |
| 24 | // specifications 嵌套过滤 | 24 | // specifications 嵌套过滤 |
| 25 | "specifications": { | 25 | "specifications": { |
docs/相关性检索优化说明.md
| @@ -20,7 +20,7 @@ | @@ -20,7 +20,7 @@ | ||
| 20 | { | 20 | { |
| 21 | "multi_match": { | 21 | "multi_match": { |
| 22 | "query": "戏水动物", | 22 | "query": "戏水动物", |
| 23 | - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], | 23 | + "fields": ["title.zh^3.0", "brief.zh^1.5", ...], |
| 24 | "minimum_should_match": "67%", | 24 | "minimum_should_match": "67%", |
| 25 | "tie_breaker": 0.9, | 25 | "tie_breaker": 0.9, |
| 26 | "boost": 1, | 26 | "boost": 1, |
| @@ -40,7 +40,7 @@ | @@ -40,7 +40,7 @@ | ||
| 40 | { | 40 | { |
| 41 | "multi_match": { | 41 | "multi_match": { |
| 42 | "_name": "base_query", | 42 | "_name": "base_query", |
| 43 | - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], | 43 | + "fields": ["title.zh^3.0", "brief.zh^1.5", ...], |
| 44 | "minimum_should_match": "75%", | 44 | "minimum_should_match": "75%", |
| 45 | "operator": "AND", | 45 | "operator": "AND", |
| 46 | "query": "戏水动物", | 46 | "query": "戏水动物", |
| @@ -51,7 +51,7 @@ | @@ -51,7 +51,7 @@ | ||
| 51 | "multi_match": { | 51 | "multi_match": { |
| 52 | "_name": "base_query_trans_en", | 52 | "_name": "base_query_trans_en", |
| 53 | "boost": 0.4, | 53 | "boost": 0.4, |
| 54 | - "fields": ["title_en^3.0", ...], | 54 | + "fields": ["title.en^3.0", ...], |
| 55 | "minimum_should_match": "75%", | 55 | "minimum_should_match": "75%", |
| 56 | "operator": "AND", | 56 | "operator": "AND", |
| 57 | "query": "water sports", | 57 | "query": "water sports", |
| @@ -61,7 +61,7 @@ | @@ -61,7 +61,7 @@ | ||
| 61 | { | 61 | { |
| 62 | "multi_match": { | 62 | "multi_match": { |
| 63 | "query": "戏水动物", | 63 | "query": "戏水动物", |
| 64 | - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], | 64 | + "fields": ["title.zh^3.0", "brief.zh^1.5", ...], |
| 65 | "type": "phrase", | 65 | "type": "phrase", |
| 66 | "slop": 2, | 66 | "slop": 2, |
| 67 | "boost": 1.0, | 67 | "boost": 1.0, |
| @@ -71,7 +71,7 @@ | @@ -71,7 +71,7 @@ | ||
| 71 | { | 71 | { |
| 72 | "multi_match": { | 72 | "multi_match": { |
| 73 | "query": "戏水 动物", | 73 | "query": "戏水 动物", |
| 74 | - "fields": ["title_zh^3.0", "brief_zh^1.5", ...], | 74 | + "fields": ["title.zh^3.0", "brief.zh^1.5", ...], |
| 75 | "operator": "AND", | 75 | "operator": "AND", |
| 76 | "tie_breaker": 0.9, | 76 | "tie_breaker": 0.9, |
| 77 | "boost": 0.1, | 77 | "boost": 0.1, |
docs/系统设计文档.md
| @@ -31,18 +31,18 @@ | @@ -31,18 +31,18 @@ | ||
| 31 | { | 31 | { |
| 32 | "tenant_id": "1", | 32 | "tenant_id": "1", |
| 33 | "spu_id": "123", | 33 | "spu_id": "123", |
| 34 | - "title_zh": "蓝牙耳机", | ||
| 35 | - "title_en": "Bluetooth Headphones", | ||
| 36 | - "brief_zh": "高品质蓝牙耳机", | ||
| 37 | - "brief_en": "High-quality Bluetooth headphones", | 34 | + "title.zh": "蓝牙耳机", |
| 35 | + "title.en": "Bluetooth Headphones", | ||
| 36 | + "brief.zh": "高品质蓝牙耳机", | ||
| 37 | + "brief.en": "High-quality Bluetooth headphones", | ||
| 38 | "category_name": "电子产品", | 38 | "category_name": "电子产品", |
| 39 | - "category_path_zh": "电子产品/音频设备/耳机", | ||
| 40 | - "category_path_en": "Electronics/Audio/Headphones", | 39 | + "category_path.zh": "电子产品/音频设备/耳机", |
| 40 | + "category_path.en": "Electronics/Audio/Headphones", | ||
| 41 | "category1_name": "电子产品", | 41 | "category1_name": "电子产品", |
| 42 | "category2_name": "音频设备", | 42 | "category2_name": "音频设备", |
| 43 | "category3_name": "耳机", | 43 | "category3_name": "耳机", |
| 44 | - "vendor_zh": "品牌A", | ||
| 45 | - "vendor_en": "Brand A", | 44 | + "vendor.zh": "品牌A", |
| 45 | + "vendor.en": "Brand A", | ||
| 46 | "min_price": 199.99, | 46 | "min_price": 199.99, |
| 47 | "max_price": 299.99, | 47 | "max_price": 299.99, |
| 48 | "option1_name": "color", | 48 | "option1_name": "color", |
| @@ -93,7 +93,7 @@ | @@ -93,7 +93,7 @@ | ||
| 93 | - **查询配置硬编码**:查询相关配置(字段 boost、查询域等)硬编码在 `search/query_config.py` | 93 | - **查询配置硬编码**:查询相关配置(字段 boost、查询域等)硬编码在 `search/query_config.py` |
| 94 | 94 | ||
| 95 | **索引结构特点**: | 95 | **索引结构特点**: |
| 96 | -1. **多语言字段**:所有文本字段支持中英文(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`) | 96 | +1. **多语言字段**:所有文本字段支持中英文(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`) |
| 97 | 2. **嵌套字段**: | 97 | 2. **嵌套字段**: |
| 98 | - `skus`: SKU 嵌套数组(包含价格、库存、选项值等) | 98 | - `skus`: SKU 嵌套数组(包含价格、库存、选项值等) |
| 99 | - `specifications`: 规格嵌套数组(包含 name、value、sku_id) | 99 | - `specifications`: 规格嵌套数组(包含 name、value、sku_id) |
| @@ -125,12 +125,12 @@ | @@ -125,12 +125,12 @@ | ||
| 125 | - `create_time`, `update_time` (date): 创建和更新时间 | 125 | - `create_time`, `update_time` (date): 创建和更新时间 |
| 126 | 126 | ||
| 127 | #### 多语言文本字段 | 127 | #### 多语言文本字段 |
| 128 | -- `title_zh/en` (text): 标题(中英文) | ||
| 129 | -- `brief_zh/en` (text): 短描述(中英文) | ||
| 130 | -- `description_zh/en` (text): 详细描述(中英文) | ||
| 131 | -- `vendor_zh/en` (text): 供应商/品牌(中英文) | ||
| 132 | -- `category_path_zh/en` (text): 类目路径(中英文) | ||
| 133 | -- `category_name_zh/en` (text): 类目名称(中英文) | 128 | +- `title.zh/en` (text): 标题(中英文) |
| 129 | +- `brief.zh/en` (text): 短描述(中英文) | ||
| 130 | +- `description.zh/en` (text): 详细描述(中英文) | ||
| 131 | +- `vendor.zh/en` (text): 供应商/品牌(中英文) | ||
| 132 | +- `category_path.zh/en` (text): 类目路径(中英文) | ||
| 133 | +- `category_name_text.zh/en` (text): 类目名称(中英文) | ||
| 134 | 134 | ||
| 135 | **分析器配置**: | 135 | **分析器配置**: |
| 136 | - 中文字段:`hanlp_index`(索引时)/ `hanlp_standard`(查询时) | 136 | - 中文字段:`hanlp_index`(索引时)/ `hanlp_standard`(查询时) |
| @@ -282,7 +282,7 @@ indexes: | @@ -282,7 +282,7 @@ indexes: | ||
| 282 | 2. **数据转换**(`indexer/spu_transformer.py`): | 282 | 2. **数据转换**(`indexer/spu_transformer.py`): |
| 283 | - 按`spu_id`和`tenant_id`关联SPU和SKU数据 | 283 | - 按`spu_id`和`tenant_id`关联SPU和SKU数据 |
| 284 | - **多语言字段映射**: | 284 | - **多语言字段映射**: |
| 285 | - - MySQL的`title` → ES的`title_zh`(英文字段设为空) | 285 | + - MySQL的`title` → ES的`title.zh`(英文字段设为空) |
| 286 | - 其他文本字段类似处理 | 286 | - 其他文本字段类似处理 |
| 287 | - **分类字段映射**: | 287 | - **分类字段映射**: |
| 288 | - 从SPU表的`category_path`解析多级类目(`category1_name`, `category2_name`, `category3_name`) | 288 | - 从SPU表的`category_path`解析多级类目(`category1_name`, `category2_name`, `category3_name`) |
| @@ -483,7 +483,7 @@ laptop AND (gaming OR professional) ANDNOT cheap | @@ -483,7 +483,7 @@ laptop AND (gaming OR professional) ANDNOT cheap | ||
| 483 | - 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系 | 483 | - 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系 |
| 484 | - 使用ES的`nested`查询实现 | 484 | - 使用ES的`nested`查询实现 |
| 485 | - **text_recall**: 文本相关性召回 | 485 | - **text_recall**: 文本相关性召回 |
| 486 | - - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`) | 486 | + - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`) |
| 487 | - 使用 `multi_match` 查询,支持字段 boost | 487 | - 使用 `multi_match` 查询,支持字段 boost |
| 488 | - 中文字段使用中文分词器,英文字段使用英文分析器 | 488 | - 中文字段使用中文分词器,英文字段使用英文分析器 |
| 489 | - **embedding_recall**: 向量召回(KNN) | 489 | - **embedding_recall**: 向量召回(KNN) |
| @@ -503,8 +503,8 @@ laptop AND (gaming OR professional) ANDNOT cheap | @@ -503,8 +503,8 @@ laptop AND (gaming OR professional) ANDNOT cheap | ||
| 503 | "multi_match": { | 503 | "multi_match": { |
| 504 | "query": "手机", | 504 | "query": "手机", |
| 505 | "fields": [ | 505 | "fields": [ |
| 506 | - "title_zh^3.0", "title_en^3.0", | ||
| 507 | - "brief_zh^1.5", "brief_en^1.5", | 506 | + "title.zh^3.0", "title.en^3.0", |
| 507 | + "brief.zh^1.5", "brief.en^1.5", | ||
| 508 | ... | 508 | ... |
| 509 | ] | 509 | ] |
| 510 | } | 510 | } |
docs/系统设计文档v1.md
| @@ -424,7 +424,7 @@ laptop AND (gaming OR professional) ANDNOT cheap | @@ -424,7 +424,7 @@ laptop AND (gaming OR professional) ANDNOT cheap | ||
| 424 | - **结构**: `filters AND (text_recall OR embedding_recall)` | 424 | - **结构**: `filters AND (text_recall OR embedding_recall)` |
| 425 | - **filters**: 前端传递的过滤条件(永远起作用,放在 `filter` 中) | 425 | - **filters**: 前端传递的过滤条件(永远起作用,放在 `filter` 中) |
| 426 | - **text_recall**: 文本相关性召回 | 426 | - **text_recall**: 文本相关性召回 |
| 427 | - - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`) | 427 | + - 同时搜索中英文字段(`title.zh/en`, `brief.zh/en`, `description.zh/en`, `vendor.zh/en`, `category_path.zh/en`, `category_name_text.zh/en`, `tags`) |
| 428 | - 使用 `multi_match` 查询,支持字段 boost | 428 | - 使用 `multi_match` 查询,支持字段 boost |
| 429 | - **embedding_recall**: 向量召回(KNN) | 429 | - **embedding_recall**: 向量召回(KNN) |
| 430 | - 使用 `title_embedding` 字段进行 KNN 搜索 | 430 | - 使用 `title_embedding` 字段进行 KNN 搜索 |
| @@ -443,8 +443,8 @@ laptop AND (gaming OR professional) ANDNOT cheap | @@ -443,8 +443,8 @@ laptop AND (gaming OR professional) ANDNOT cheap | ||
| 443 | "multi_match": { | 443 | "multi_match": { |
| 444 | "query": "手机", | 444 | "query": "手机", |
| 445 | "fields": [ | 445 | "fields": [ |
| 446 | - "title_zh^3.0", "title_en^3.0", | ||
| 447 | - "brief_zh^1.5", "brief_en^1.5", | 446 | + "title.zh^3.0", "title.en^3.0", |
| 447 | + "brief.zh^1.5", "brief.en^1.5", | ||
| 448 | ... | 448 | ... |
| 449 | ] | 449 | ] |
| 450 | } | 450 | } |
docs/索引字段说明v1.md
| @@ -539,7 +539,7 @@ title brief description seo_title seo_description seo_keywords vendor vendor_key | @@ -539,7 +539,7 @@ title brief description seo_title seo_description seo_keywords vendor vendor_key | ||
| 539 | 539 | ||
| 540 | 2. tenant - 数据灌入: | 540 | 2. tenant - 数据灌入: |
| 541 | 对每个tenant设置一一个语言,作为tenant的一个基本配置。 | 541 | 对每个tenant设置一一个语言,作为tenant的一个基本配置。 |
| 542 | -写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title_en) | 542 | +写入索引的时候,根据语言配置将title 等文本字段写入对应的索引字段(比如 title.en) |
| 543 | 查询的时候,将query转为商家所用语言,并到对应的field去查。 | 543 | 查询的时候,将query转为商家所用语言,并到对应的field去查。 |
| 544 | 544 | ||
| 545 | 545 |
docs/索引字段说明v2-mapping结构.md
| @@ -16,22 +16,22 @@ | @@ -16,22 +16,22 @@ | ||
| 16 | }, | 16 | }, |
| 17 | 17 | ||
| 18 | // 文本相关性相关字段 | 18 | // 文本相关性相关字段 |
| 19 | - "title_zh": { | 19 | + "title.zh": { |
| 20 | "type": "text", | 20 | "type": "text", |
| 21 | "analyzer": "hanlp_index", | 21 | "analyzer": "hanlp_index", |
| 22 | "search_analyzer": "hanlp_standard" | 22 | "search_analyzer": "hanlp_standard" |
| 23 | }, | 23 | }, |
| 24 | - "brief_zh": { | 24 | + "brief.zh": { |
| 25 | "type": "text", | 25 | "type": "text", |
| 26 | "analyzer": "hanlp_index", | 26 | "analyzer": "hanlp_index", |
| 27 | "search_analyzer": "hanlp_standard" | 27 | "search_analyzer": "hanlp_standard" |
| 28 | }, | 28 | }, |
| 29 | - "description_zh": { | 29 | + "description.zh": { |
| 30 | "type": "text", | 30 | "type": "text", |
| 31 | "analyzer": "hanlp_index", | 31 | "analyzer": "hanlp_index", |
| 32 | "search_analyzer": "hanlp_standard" | 32 | "search_analyzer": "hanlp_standard" |
| 33 | }, | 33 | }, |
| 34 | - "vendor_zh": { | 34 | + "vendor.zh": { |
| 35 | "type": "text", | 35 | "type": "text", |
| 36 | "analyzer": "hanlp_index", | 36 | "analyzer": "hanlp_index", |
| 37 | "search_analyzer": "hanlp_standard", | 37 | "search_analyzer": "hanlp_standard", |
| @@ -43,23 +43,23 @@ | @@ -43,23 +43,23 @@ | ||
| 43 | } | 43 | } |
| 44 | }, | 44 | }, |
| 45 | 45 | ||
| 46 | - "title_en": { | 46 | + "title.en": { |
| 47 | "type": "text", | 47 | "type": "text", |
| 48 | "analyzer": "english", | 48 | "analyzer": "english", |
| 49 | "search_analyzer": "english", | 49 | "search_analyzer": "english", |
| 50 | }, | 50 | }, |
| 51 | - "brief_en": { | 51 | + "brief.en": { |
| 52 | "type": "text", | 52 | "type": "text", |
| 53 | "analyzer": "english", | 53 | "analyzer": "english", |
| 54 | "search_analyzer": "english", | 54 | "search_analyzer": "english", |
| 55 | 55 | ||
| 56 | }, | 56 | }, |
| 57 | - "description_en": { | 57 | + "description.en": { |
| 58 | "type": "text", | 58 | "type": "text", |
| 59 | "analyzer": "english", | 59 | "analyzer": "english", |
| 60 | "search_analyzer": "english", | 60 | "search_analyzer": "english", |
| 61 | }, | 61 | }, |
| 62 | - "vendor_en": { | 62 | + "vendor.en": { |
| 63 | "type": "text", | 63 | "type": "text", |
| 64 | "analyzer": "english", | 64 | "analyzer": "english", |
| 65 | "search_analyzer": "english", | 65 | "search_analyzer": "english", |
| @@ -103,22 +103,22 @@ | @@ -103,22 +103,22 @@ | ||
| 103 | }, | 103 | }, |
| 104 | 104 | ||
| 105 | // 分类相关 | 105 | // 分类相关 |
| 106 | - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 | 106 | + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算 |
| 107 | "type": "text", | 107 | "type": "text", |
| 108 | "analyzer": "hanlp_index", | 108 | "analyzer": "hanlp_index", |
| 109 | "search_analyzer": "hanlp_standard" | 109 | "search_analyzer": "hanlp_standard" |
| 110 | }, | 110 | }, |
| 111 | - "category_path_en": { // 提供模糊查询功能,辅助相关性计算 | 111 | + "category_path.en": { // 提供模糊查询功能,辅助相关性计算 |
| 112 | "type": "text", | 112 | "type": "text", |
| 113 | "analyzer": "english", | 113 | "analyzer": "english", |
| 114 | "search_analyzer": "english" | 114 | "search_analyzer": "english" |
| 115 | }, | 115 | }, |
| 116 | - "category_name_zh": { // 提供模糊查询功能,辅助相关性计算 | 116 | + "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算 |
| 117 | "type": "text", | 117 | "type": "text", |
| 118 | "analyzer": "hanlp_index", | 118 | "analyzer": "hanlp_index", |
| 119 | "search_analyzer": "hanlp_standard" | 119 | "search_analyzer": "hanlp_standard" |
| 120 | }, | 120 | }, |
| 121 | - "category_name_en": { // 提供模糊查询功能,辅助相关性计算 | 121 | + "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算 |
| 122 | "type": "text", | 122 | "type": "text", |
| 123 | "analyzer": "english", | 123 | "analyzer": "english", |
| 124 | "search_analyzer": "english" | 124 | "search_analyzer": "english" |
docs/索引字段说明v2-plan.md
| @@ -10,7 +10,7 @@ | @@ -10,7 +10,7 @@ | ||
| 10 | 10 | ||
| 11 | #### 1.1 多语言文本字段 | 11 | #### 1.1 多语言文本字段 |
| 12 | 12 | ||
| 13 | -- 为文本字段添加中英文双字段支持(title_zh/title_en, brief_zh/brief_en, description_zh/description_en, vendor_zh/vendor_en) | 13 | +- 为文本字段添加中英文双字段支持(title.zh/title.en, brief.zh/brief.en, description.zh/description.en, vendor.zh/vendor.en) |
| 14 | - 中文字段使用 `index_ansj`/`query_ansj` 分析器(对应文档中的hanlp_index/hanlp_standard) | 14 | - 中文字段使用 `index_ansj`/`query_ansj` 分析器(对应文档中的hanlp_index/hanlp_standard) |
| 15 | - 英文字段使用 `english` 分析器 | 15 | - 英文字段使用 `english` 分析器 |
| 16 | - **暂时只填充中文字段,英文字段设为空**(不需要语言检测,每个tenant的语言预先知道) | 16 | - **暂时只填充中文字段,英文字段设为空**(不需要语言检测,每个tenant的语言预先知道) |
| @@ -28,22 +28,22 @@ category_path varchar(500) | @@ -28,22 +28,22 @@ category_path varchar(500) | ||
| 28 | 28 | ||
| 29 | mapping: | 29 | mapping: |
| 30 | 30 | ||
| 31 | - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 | 31 | + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算 |
| 32 | "type": "text", | 32 | "type": "text", |
| 33 | "analyzer": "hanlp_index", | 33 | "analyzer": "hanlp_index", |
| 34 | "search_analyzer": "hanlp_standard" | 34 | "search_analyzer": "hanlp_standard" |
| 35 | }, | 35 | }, |
| 36 | - "category_path_en": { // 提供模糊查询功能,辅助相关性计算 | 36 | + "category_path.en": { // 提供模糊查询功能,辅助相关性计算 |
| 37 | "type": "text", | 37 | "type": "text", |
| 38 | "analyzer": "english", | 38 | "analyzer": "english", |
| 39 | "search_analyzer": "english" | 39 | "search_analyzer": "english" |
| 40 | }, | 40 | }, |
| 41 | - "category_name_zh": { // 提供模糊查询功能,辅助相关性计算 | 41 | + "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算 |
| 42 | "type": "text", | 42 | "type": "text", |
| 43 | "analyzer": "hanlp_index", | 43 | "analyzer": "hanlp_index", |
| 44 | "search_analyzer": "hanlp_standard" | 44 | "search_analyzer": "hanlp_standard" |
| 45 | }, | 45 | }, |
| 46 | - "category_name_en": { // 提供模糊查询功能,辅助相关性计算 | 46 | + "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算 |
| 47 | "type": "text", | 47 | "type": "text", |
| 48 | "analyzer": "english", | 48 | "analyzer": "english", |
| 49 | "search_analyzer": "english" | 49 | "search_analyzer": "english" |
| @@ -104,15 +104,15 @@ mapping: | @@ -104,15 +104,15 @@ mapping: | ||
| 104 | 104 | ||
| 105 | #### 2.1 多语言文本处理 | 105 | #### 2.1 多语言文本处理 |
| 106 | 106 | ||
| 107 | -- **简化处理**:暂时只填充中文字段(title_zh, brief_zh, description_zh, vendor_zh) | ||
| 108 | -- 英文字段(title_en, brief_en, description_en, vendor_en)设为空或None | 107 | +- **简化处理**:暂时只填充中文字段(title.zh, brief.zh, description.zh, vendor.zh) |
| 108 | +- 英文字段(title.en, brief.en, description.en, vendor.en)设为空或None | ||
| 109 | - 不需要语言检测逻辑 | 109 | - 不需要语言检测逻辑 |
| 110 | 110 | ||
| 111 | #### 2.2 分类路径解析 | 111 | #### 2.2 分类路径解析 |
| 112 | 112 | ||
| 113 | - 从 `category_path` 字段按 "/" 分割提取分类层级 | 113 | - 从 `category_path` 字段按 "/" 分割提取分类层级 |
| 114 | - 分割结果赋值给 `category1_name`, `category2_name`, `category3_name` | 114 | - 分割结果赋值给 `category1_name`, `category2_name`, `category3_name` |
| 115 | -- 生成 `category_path_zh`(暂时填充,`category_path_en` 设为空) | 115 | +- 生成 `category_path.zh`(暂时填充,`category_path.en` 设为空) |
| 116 | 116 | ||
| 117 | #### 2.3 SKU字段展开计算 | 117 | #### 2.3 SKU字段展开计算 |
| 118 | 118 | ||
| @@ -152,7 +152,7 @@ mapping: | @@ -152,7 +152,7 @@ mapping: | ||
| 152 | 152 | ||
| 153 | **文件**: `indexer/spu_transformer.py` | 153 | **文件**: `indexer/spu_transformer.py` |
| 154 | 154 | ||
| 155 | -- **简化多语言处理**:只填充中文字段(title_zh, brief_zh等),英文字段设为空或None | 155 | +- **简化多语言处理**:只填充中文字段(title.zh, brief.zh等),英文字段设为空或None |
| 156 | - 实现分类路径的解析和展开 | 156 | - 实现分类路径的解析和展开 |
| 157 | - 实现SKU字段的展开计算(价格、重量、库存) | 157 | - 实现SKU字段的展开计算(价格、重量、库存) |
| 158 | - 实现选项字段的处理 | 158 | - 实现选项字段的处理 |
docs/索引字段说明v2.md
| @@ -28,24 +28,24 @@ | @@ -28,24 +28,24 @@ | ||
| 28 | 28 | ||
| 29 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | | 29 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | |
| 30 | |--------|--------|--------|------|----------| | 30 | |--------|--------|--------|------|----------| |
| 31 | -| `title_zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` | | ||
| 32 | -| `title_en` | text | english | 英文标题 | 暂为空(待翻译服务填充) | | 31 | +| `title.zh` | text | hanlp_index / hanlp_standard | 中文标题 | MySQL: `shoplazza_product_spu.title` | |
| 32 | +| `title.en` | text | english | 英文标题 | 暂为空(待翻译服务填充) | | ||
| 33 | 33 | ||
| 34 | #### 2.2 描述字段 | 34 | #### 2.2 描述字段 |
| 35 | 35 | ||
| 36 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | | 36 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | |
| 37 | |--------|--------|--------|------|----------| | 37 | |--------|--------|--------|------|----------| |
| 38 | -| `brief_zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` | | ||
| 39 | -| `brief_en` | text | english | 英文短描述 | 暂为空 | | ||
| 40 | -| `description_zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` | | ||
| 41 | -| `description_en` | text | english | 英文详细描述 | 暂为空 | | 38 | +| `brief.zh` | text | hanlp_index / hanlp_standard | 中文短描述 | MySQL: `shoplazza_product_spu.brief` | |
| 39 | +| `brief.en` | text | english | 英文短描述 | 暂为空 | | ||
| 40 | +| `description.zh` | text | hanlp_index / hanlp_standard | 中文详细描述 | MySQL: `shoplazza_product_spu.description` | | ||
| 41 | +| `description.en` | text | english | 英文详细描述 | 暂为空 | | ||
| 42 | 42 | ||
| 43 | #### 2.3 供应商/品牌字段 | 43 | #### 2.3 供应商/品牌字段 |
| 44 | 44 | ||
| 45 | | 字段名 | ES类型 | 分析器 | 子字段 | 说明 | 数据来源 | | 45 | | 字段名 | ES类型 | 分析器 | 子字段 | 说明 | 数据来源 | |
| 46 | |--------|--------|--------|--------|------|----------| | 46 | |--------|--------|--------|--------|------|----------| |
| 47 | -| `vendor_zh` | text | hanlp_index / hanlp_standard | `vendor_zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` | | ||
| 48 | -| `vendor_en` | text | english | `vendor_en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 | | 47 | +| `vendor.zh` | text | hanlp_index / hanlp_standard | `vendor.zh.keyword` (keyword, normalizer: lowercase) | 中文供应商/品牌 | MySQL: `shoplazza_product_spu.vendor` | |
| 48 | +| `vendor.en` | text | english | `vendor.en.keyword` (keyword, normalizer: lowercase) | 英文供应商/品牌 | 暂为空 | | ||
| 49 | 49 | ||
| 50 | **用途**: | 50 | **用途**: |
| 51 | - `text` 类型:用于全文搜索(支持模糊匹配) | 51 | - `text` 类型:用于全文搜索(支持模糊匹配) |
| @@ -65,15 +65,15 @@ | @@ -65,15 +65,15 @@ | ||
| 65 | 65 | ||
| 66 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | | 66 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | |
| 67 | |--------|--------|--------|------|----------| | 67 | |--------|--------|--------|------|----------| |
| 68 | -| `category_path_zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` | | ||
| 69 | -| `category_path_en` | text | english | 英文类目路径 | 暂为空 | | 68 | +| `category_path.zh` | text | hanlp_index / hanlp_standard | 中文类目路径(如"服装/男装/衬衫") | MySQL: `shoplazza_product_spu.category_path` | |
| 69 | +| `category_path.en` | text | english | 英文类目路径 | 暂为空 | | ||
| 70 | 70 | ||
| 71 | #### 4.2 类目名称(用于搜索) | 71 | #### 4.2 类目名称(用于搜索) |
| 72 | 72 | ||
| 73 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | | 73 | | 字段名 | ES类型 | 分析器 | 说明 | 数据来源 | |
| 74 | |--------|--------|--------|------|----------| | 74 | |--------|--------|--------|------|----------| |
| 75 | -| `category_name_zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` | | ||
| 76 | -| `category_name_en` | text | english | 英文类目名称 | 暂为空 | | 75 | +| `category_name_text.zh` | text | hanlp_index / hanlp_standard | 中文类目名称 | MySQL: `shoplazza_product_spu.category` | |
| 76 | +| `category_name_text.en` | text | english | 英文类目名称 | 暂为空 | | ||
| 77 | 77 | ||
| 78 | #### 4.3 类目标识(用于过滤和分面) | 78 | #### 4.3 类目标识(用于过滤和分面) |
| 79 | 79 | ||
| @@ -87,7 +87,7 @@ | @@ -87,7 +87,7 @@ | ||
| 87 | | `category3_name` | keyword | 三级类目名称 | 从 `category_path` 解析 | | 87 | | `category3_name` | keyword | 三级类目名称 | 从 `category_path` 解析 | |
| 88 | 88 | ||
| 89 | **用途**: | 89 | **用途**: |
| 90 | -- `category_path_zh/en`, `category_name_zh/en`: 用于全文搜索,支持模糊匹配 | 90 | +- `category_path.zh/en`, `category_name_text.zh/en`: 用于全文搜索,支持模糊匹配 |
| 91 | - `category_id`, `category_name`, `category_level`, `category1/2/3_name`: 用于精确过滤和分面聚合 | 91 | - `category_id`, `category_name`, `category_level`, `category1/2/3_name`: 用于精确过滤和分面聚合 |
| 92 | 92 | ||
| 93 | ### 5. 规格字段(Specifications) | 93 | ### 5. 规格字段(Specifications) |
| @@ -348,20 +348,20 @@ | @@ -348,20 +348,20 @@ | ||
| 348 | 348 | ||
| 349 | ### 搜索字段(参与相关性计算) | 349 | ### 搜索字段(参与相关性计算) |
| 350 | 350 | ||
| 351 | -- `title_zh`, `title_en` (boost: 3.0) | ||
| 352 | -- `brief_zh`, `brief_en` (boost: 1.5) | ||
| 353 | -- `description_zh`, `description_en` (boost: 1.0) | ||
| 354 | -- `vendor_zh`, `vendor_en` (boost: 1.5) | 351 | +- `title.zh`, `title.en` (boost: 3.0) |
| 352 | +- `brief.zh`, `brief.en` (boost: 1.5) | ||
| 353 | +- `description.zh`, `description.en` (boost: 1.0) | ||
| 354 | +- `vendor.zh`, `vendor.en` (boost: 1.5) | ||
| 355 | - `tags` (boost: 1.0) | 355 | - `tags` (boost: 1.0) |
| 356 | -- `category_path_zh`, `category_path_en` (boost: 1.5) | ||
| 357 | -- `category_name_zh`, `category_name_en` (boost: 1.5) | 356 | +- `category_path.zh`, `category_path.en` (boost: 1.5) |
| 357 | +- `category_name_text.zh`, `category_name_text.en` (boost: 1.5) | ||
| 358 | - `title_embedding` (向量召回,boost: 0.2) | 358 | - `title_embedding` (向量召回,boost: 0.2) |
| 359 | 359 | ||
| 360 | ### 过滤字段(精确匹配) | 360 | ### 过滤字段(精确匹配) |
| 361 | 361 | ||
| 362 | - `tenant_id` (必需,多租户隔离) | 362 | - `tenant_id` (必需,多租户隔离) |
| 363 | - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name` | 363 | - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name` |
| 364 | -- `vendor_zh.keyword`, `vendor_en.keyword` | 364 | +- `vendor.zh.keyword`, `vendor.en.keyword` |
| 365 | - `specifications` (嵌套查询) | 365 | - `specifications` (嵌套查询) |
| 366 | - `min_price`, `max_price` (范围过滤) | 366 | - `min_price`, `max_price` (范围过滤) |
| 367 | - `sales` (范围过滤) | 367 | - `sales` (范围过滤) |
| @@ -395,12 +395,12 @@ | @@ -395,12 +395,12 @@ | ||
| 395 | - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段 | 395 | - `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段 |
| 396 | 396 | ||
| 397 | 映射到前端字段: | 397 | 映射到前端字段: |
| 398 | -- `title_zh/en` → `title` | ||
| 399 | -- `brief_zh/en` → `brief` | ||
| 400 | -- `description_zh/en` → `description` | ||
| 401 | -- `vendor_zh/en` → `vendor` | ||
| 402 | -- `category_path_zh/en` → `category_path` | ||
| 403 | -- `category_name_zh/en` → `category_name` | 398 | +- `title.zh/en` → `title` |
| 399 | +- `brief.zh/en` → `brief` | ||
| 400 | +- `description.zh/en` → `description` | ||
| 401 | +- `vendor.zh/en` → `vendor` | ||
| 402 | +- `category_path.zh/en` → `category_path` | ||
| 403 | +- `category_name_text.zh/en` → `category_name` | ||
| 404 | 404 | ||
| 405 | ### 规格数据构建 | 405 | ### 规格数据构建 |
| 406 | 406 | ||
| @@ -434,12 +434,12 @@ filters AND (text_recall OR embedding_recall) | @@ -434,12 +434,12 @@ filters AND (text_recall OR embedding_recall) | ||
| 434 | ### 文本召回字段 | 434 | ### 文本召回字段 |
| 435 | 435 | ||
| 436 | 根据查询词的语言选择对应的索引字段: | 436 | 根据查询词的语言选择对应的索引字段: |
| 437 | -- `title_zh^3.0`, `title_en^3.0` | ||
| 438 | -- `brief_zh^1.5`, `brief_en^1.5` | ||
| 439 | -- `description_zh^1.0`, `description_en^1.0` | ||
| 440 | -- `vendor_zh^1.5`, `vendor_en^1.5` | ||
| 441 | -- `category_path_zh^1.5`, `category_path_en^1.5` | ||
| 442 | -- `category_name_zh^1.5`, `category_name_en^1.5` | 437 | +- `title.zh^3.0`, `title.en^3.0` |
| 438 | +- `brief.zh^1.5`, `brief.en^1.5` | ||
| 439 | +- `description.zh^1.0`, `description.en^1.0` | ||
| 440 | +- `vendor.zh^1.5`, `vendor.en^1.5` | ||
| 441 | +- `category_path.zh^1.5`, `category_path.en^1.5` | ||
| 442 | +- `category_name_text.zh^1.5`, `category_name_text.en^1.5` | ||
| 443 | - `tags^1.0` | 443 | - `tags^1.0` |
| 444 | 444 | ||
| 445 | ## 注意事项 | 445 | ## 注意事项 |
docs/索引数据接口文档___old.md
| @@ -52,12 +52,12 @@ tenant_config: | @@ -52,12 +52,12 @@ tenant_config: | ||
| 52 | ### 配置规则 | 52 | ### 配置规则 |
| 53 | 53 | ||
| 54 | 1. **主语言**:指定SKU表中 `title`、`brief`、`description`、`vendor` 等字段的语言。 | 54 | 1. **主语言**:指定SKU表中 `title`、`brief`、`description`、`vendor` 等字段的语言。 |
| 55 | - - 如果主语言是 `zh`,这些字段的值会填充到 `title_zh`、`brief_zh` 等字段 | ||
| 56 | - - 如果主语言是 `en`,这些字段的值会填充到 `title_en`、`brief_en` 等字段 | 55 | + - 如果主语言是 `zh`,这些字段的值会填充到 `title.zh`、`brief.zh` 等字段 |
| 56 | + - 如果主语言是 `en`,这些字段的值会填充到 `title.en`、`brief.en` 等字段 | ||
| 57 | 57 | ||
| 58 | 2. **翻译配置**: | 58 | 2. **翻译配置**: |
| 59 | - - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title_en` 等字段 | ||
| 60 | - - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title_zh` 等字段 | 59 | + - `translate_to_en: true`:如果主语言是中文,则会将中文内容翻译为英文,填充到 `title.en` 等字段 |
| 60 | + - `translate_to_zh: true`:如果主语言是英文,则会将英文内容翻译为中文,填充到 `title.zh` 等字段 | ||
| 61 | - **注意**:如果主语言本身就是目标语言,则不会触发翻译(例如主语言是英文,`translate_to_en: true` 不会触发翻译) | 61 | - **注意**:如果主语言本身就是目标语言,则不会触发翻译(例如主语言是英文,`translate_to_en: true` 不会触发翻译) |
| 62 | 62 | ||
| 63 | 3. **默认配置**:如果租户ID不在 `tenants` 中,则使用 `default` 配置。 | 63 | 3. **默认配置**:如果租户ID不在 `tenants` 中,则使用 `default` 配置。 |
| @@ -72,8 +72,8 @@ tenant_config: | @@ -72,8 +72,8 @@ tenant_config: | ||
| 72 | "translate_to_zh": false | 72 | "translate_to_zh": false |
| 73 | } | 73 | } |
| 74 | ``` | 74 | ``` |
| 75 | -- SKU表的 `title` 字段(中文)→ `title_zh` | ||
| 76 | -- 翻译服务将中文翻译为英文 → `title_en` | 75 | +- SKU表的 `title` 字段(中文)→ `title.zh` |
| 76 | +- 翻译服务将中文翻译为英文 → `title.en` | ||
| 77 | 77 | ||
| 78 | **示例2:英文主语言,需要翻译中文** | 78 | **示例2:英文主语言,需要翻译中文** |
| 79 | ```json | 79 | ```json |
| @@ -83,8 +83,8 @@ tenant_config: | @@ -83,8 +83,8 @@ tenant_config: | ||
| 83 | "translate_to_zh": true | 83 | "translate_to_zh": true |
| 84 | } | 84 | } |
| 85 | ``` | 85 | ``` |
| 86 | -- SKU表的 `title` 字段(英文)→ `title_en` | ||
| 87 | -- 翻译服务将英文翻译为中文 → `title_zh` | 86 | +- SKU表的 `title` 字段(英文)→ `title.en` |
| 87 | +- 翻译服务将英文翻译为中文 → `title.zh` | ||
| 88 | 88 | ||
| 89 | **示例3:仅使用主语言,不翻译** | 89 | **示例3:仅使用主语言,不翻译** |
| 90 | ```json | 90 | ```json |
| @@ -94,8 +94,8 @@ tenant_config: | @@ -94,8 +94,8 @@ tenant_config: | ||
| 94 | "translate_to_zh": false | 94 | "translate_to_zh": false |
| 95 | } | 95 | } |
| 96 | ``` | 96 | ``` |
| 97 | -- SKU表的 `title` 字段(中文)→ `title_zh` | ||
| 98 | -- `title_en` 保持为 `null` | 97 | +- SKU表的 `title` 字段(中文)→ `title.zh` |
| 98 | +- `title.en` 保持为 `null` | ||
| 99 | 99 | ||
| 100 | ### 配置更新 | 100 | ### 配置更新 |
| 101 | 101 | ||
| @@ -272,19 +272,19 @@ String json = response.body().string(); | @@ -272,19 +272,19 @@ String json = response.body().string(); | ||
| 272 | { | 272 | { |
| 273 | "tenant_id": "1", | 273 | "tenant_id": "1", |
| 274 | "spu_id": "123", | 274 | "spu_id": "123", |
| 275 | - "title_zh": "商品标题", | ||
| 276 | - "title_en": null, | ||
| 277 | - "brief_zh": "商品简介", | ||
| 278 | - "brief_en": null, | ||
| 279 | - "description_zh": "商品详细描述", | ||
| 280 | - "description_en": null, | ||
| 281 | - "vendor_zh": "供应商名称", | ||
| 282 | - "vendor_en": null, | 275 | + "title.zh": "商品标题", |
| 276 | + "title.en": null, | ||
| 277 | + "brief.zh": "商品简介", | ||
| 278 | + "brief.en": null, | ||
| 279 | + "description.zh": "商品详细描述", | ||
| 280 | + "description.en": null, | ||
| 281 | + "vendor.zh": "供应商名称", | ||
| 282 | + "vendor.en": null, | ||
| 283 | "tags": ["标签1", "标签2"], | 283 | "tags": ["标签1", "标签2"], |
| 284 | - "category_path_zh": "类目1/类目2/类目3", | ||
| 285 | - "category_path_en": null, | ||
| 286 | - "category_name_zh": "类目名称", | ||
| 287 | - "category_name_en": null, | 284 | + "category_path.zh": "类目1/类目2/类目3", |
| 285 | + "category_path.en": null, | ||
| 286 | + "category_name_text.zh": "类目名称", | ||
| 287 | + "category_name_text.en": null, | ||
| 288 | "category_id": "100", | 288 | "category_id": "100", |
| 289 | "category_name": "类目名称", | 289 | "category_name": "类目名称", |
| 290 | "category_level": 3, | 290 | "category_level": 3, |
| @@ -453,11 +453,11 @@ for (String spuId : changedSpuIds) { | @@ -453,11 +453,11 @@ for (String spuId : changedSpuIds) { | ||
| 453 | |---------|--------|------|------| | 453 | |---------|--------|------|------| |
| 454 | | 基础标识 | `tenant_id` | keyword | 租户ID | | 454 | | 基础标识 | `tenant_id` | keyword | 租户ID | |
| 455 | | 基础标识 | `spu_id` | keyword | SPU ID | | 455 | | 基础标识 | `spu_id` | keyword | SPU ID | |
| 456 | -| 文本字段 | `title_zh`, `title_en` | text | 标题(中英文) | | ||
| 457 | -| 文本字段 | `brief_zh`, `brief_en` | text | 简介(中英文) | | ||
| 458 | -| 文本字段 | `description_zh`, `description_en` | text | 描述(中英文) | | ||
| 459 | -| 文本字段 | `vendor_zh`, `vendor_en` | text | 供应商(中英文) | | ||
| 460 | -| 类目字段 | `category_path_zh`, `category_path_en` | text | 类目路径(中英文) | | 456 | +| 文本字段 | `title.zh`, `title.en` | text | 标题(中英文) | |
| 457 | +| 文本字段 | `brief.zh`, `brief.en` | text | 简介(中英文) | | ||
| 458 | +| 文本字段 | `description.zh`, `description.en` | text | 描述(中英文) | | ||
| 459 | +| 文本字段 | `vendor.zh`, `vendor.en` | text | 供应商(中英文) | | ||
| 460 | +| 类目字段 | `category_path.zh`, `category_path.en` | text | 类目路径(中英文) | | ||
| 461 | | 类目字段 | `category1_name`, `category2_name`, `category3_name` | keyword | 分层类目名称 | | 461 | | 类目字段 | `category1_name`, `category2_name`, `category3_name` | keyword | 分层类目名称 | |
| 462 | | 价格字段 | `min_price`, `max_price` | float | 价格范围 | | 462 | | 价格字段 | `min_price`, `max_price` | float | 价格范围 | |
| 463 | | 库存字段 | `total_inventory` | long | 总库存 | | 463 | | 库存字段 | `total_inventory` | long | 总库存 | |
docs/索引方案.md
| @@ -178,12 +178,12 @@ category_path varchar(500) | @@ -178,12 +178,12 @@ category_path varchar(500) | ||
| 178 | 方案:采用方案2 | 178 | 方案:采用方案2 |
| 179 | 4. categoryPath索引 + Prefix 查询(categoryPath.keyword: "服装/男装")(如果满足条件的key太多的则性能较差,比如 查询的是一级类目,类目树叶子节点太多时性能较差) | 179 | 4. categoryPath索引 + Prefix 查询(categoryPath.keyword: "服装/男装")(如果满足条件的key太多的则性能较差,比如 查询的是一级类目,类目树叶子节点太多时性能较差) |
| 180 | 5. categoryPath支撑模糊查询 和 多级cate keyword索引支撑精确查询。 索引阶段冗余,查询性能高。 | 180 | 5. categoryPath支撑模糊查询 和 多级cate keyword索引支撑精确查询。 索引阶段冗余,查询性能高。 |
| 181 | - "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 | 181 | + "category_path.zh": { // 提供模糊查询功能,辅助相关性计算 |
| 182 | "type": "text", | 182 | "type": "text", |
| 183 | "analyzer": "hanlp_index", | 183 | "analyzer": "hanlp_index", |
| 184 | "search_analyzer": "hanlp_standard" | 184 | "search_analyzer": "hanlp_standard" |
| 185 | }, | 185 | }, |
| 186 | - "category_path_en": { // 提供模糊查询功能,辅助相关性计算 | 186 | + "category_path.en": { // 提供模糊查询功能,辅助相关性计算 |
| 187 | "type": "text", | 187 | "type": "text", |
| 188 | "analyzer": "english", | 188 | "analyzer": "english", |
| 189 | "search_analyzer": "english" | 189 | "search_analyzer": "english" |
docs/翻译功能测试说明.md
| @@ -22,8 +22,8 @@ | @@ -22,8 +22,8 @@ | ||
| 22 | ```yaml | 22 | ```yaml |
| 23 | translation_prompts: | 23 | translation_prompts: |
| 24 | # 商品标题翻译提示词 | 24 | # 商品标题翻译提示词 |
| 25 | - product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。" | ||
| 26 | - product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." | 25 | + product_title.zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。" |
| 26 | + product_title.en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." | ||
| 27 | # query翻译提示词 | 27 | # query翻译提示词 |
| 28 | query_zh: "电商领域" | 28 | query_zh: "电商领域" |
| 29 | query_en: "e-commerce domain" | 29 | query_en: "e-commerce domain" |
| @@ -35,8 +35,8 @@ translation_prompts: | @@ -35,8 +35,8 @@ translation_prompts: | ||
| 35 | ### 提示词使用规则 | 35 | ### 提示词使用规则 |
| 36 | 36 | ||
| 37 | 1. **商品标题翻译**: | 37 | 1. **商品标题翻译**: |
| 38 | - - 中文→英文:使用 `product_title_en` | ||
| 39 | - - 英文→中文:使用 `product_title_zh` | 38 | + - 中文→英文:使用 `product_title.en` |
| 39 | + - 英文→中文:使用 `product_title.zh` | ||
| 40 | 40 | ||
| 41 | 2. **其他字段翻译**(brief, description, vendor): | 41 | 2. **其他字段翻译**(brief, description, vendor): |
| 42 | - 根据目标语言选择 `default_zh` 或 `default_en` | 42 | - 根据目标语言选择 `default_zh` 或 `default_en` |
| @@ -72,7 +72,7 @@ translator = Translator( | @@ -72,7 +72,7 @@ translator = Translator( | ||
| 72 | 72 | ||
| 73 | # 测试商品标题翻译 | 73 | # 测试商品标题翻译 |
| 74 | text = "蓝牙耳机" | 74 | text = "蓝牙耳机" |
| 75 | -prompt = config.query_config.translation_prompts.get('product_title_en') | 75 | +prompt = config.query_config.translation_prompts.get('product_title.en') |
| 76 | result = translator.translate( | 76 | result = translator.translate( |
| 77 | text, | 77 | text, |
| 78 | target_lang='en', | 78 | target_lang='en', |
| @@ -140,8 +140,8 @@ doc = transformer.transform_spu_to_doc( | @@ -140,8 +140,8 @@ doc = transformer.transform_spu_to_doc( | ||
| 140 | options=pd.DataFrame() | 140 | options=pd.DataFrame() |
| 141 | ) | 141 | ) |
| 142 | 142 | ||
| 143 | -print(f"title_zh: {doc.get('title_zh')}") | ||
| 144 | -print(f"title_en: {doc.get('title_en')}") # 应该包含翻译结果 | 143 | +print(f"title.zh: {doc.get('title.zh')}") |
| 144 | +print(f"title.en: {doc.get('title.en')}") # 应该包含翻译结果 | ||
| 145 | ``` | 145 | ``` |
| 146 | 146 | ||
| 147 | ### 5. 测试缓存功能 | 147 | ### 5. 测试缓存功能 |
indexer/document_transformer.py
| @@ -184,17 +184,40 @@ class SPUDocumentTransformer: | @@ -184,17 +184,40 @@ class SPUDocumentTransformer: | ||
| 184 | translate_to_en = bool(self.tenant_config.get('translate_to_en')) | 184 | translate_to_en = bool(self.tenant_config.get('translate_to_en')) |
| 185 | translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) | 185 | translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) |
| 186 | 186 | ||
| 187 | + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None): | ||
| 188 | + """ | ||
| 189 | + Write multilingual text field as an object, e.g.: | ||
| 190 | + doc[field_name] = {"zh": "...", "en": "..."} | ||
| 191 | + Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh. | ||
| 192 | + """ | ||
| 193 | + if not source_text or not str(source_text).strip(): | ||
| 194 | + return | ||
| 195 | + | ||
| 196 | + obj: Dict[str, str] = {} | ||
| 197 | + src = str(source_text) | ||
| 198 | + obj[primary_lang] = src | ||
| 199 | + | ||
| 200 | + tr = translations or {} | ||
| 201 | + if translate_to_en and primary_lang != "en": | ||
| 202 | + en_text = tr.get("en") | ||
| 203 | + if en_text and str(en_text).strip(): | ||
| 204 | + obj["en"] = str(en_text) | ||
| 205 | + if translate_to_zh and primary_lang != "zh": | ||
| 206 | + zh_text = tr.get("zh") | ||
| 207 | + if zh_text and str(zh_text).strip(): | ||
| 208 | + obj["zh"] = str(zh_text) | ||
| 209 | + | ||
| 210 | + if obj: | ||
| 211 | + doc[field_name] = obj | ||
| 212 | + | ||
| 187 | # Title | 213 | # Title |
| 188 | if pd.notna(spu_row.get('title')): | 214 | if pd.notna(spu_row.get('title')): |
| 189 | title_text = str(spu_row['title']) | 215 | title_text = str(spu_row['title']) |
| 190 | - | ||
| 191 | - # 使用translator的translate_for_indexing方法,自动处理多语言翻译 | 216 | + |
| 217 | + translations: Dict[str, str] = {} | ||
| 192 | if self.translator: | 218 | if self.translator: |
| 193 | - # 根据目标语言选择对应的提示词 | ||
| 194 | prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') | 219 | prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') |
| 195 | prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') | 220 | prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') |
| 196 | - | ||
| 197 | - # 调用translate_for_indexing,自动处理翻译逻辑 | ||
| 198 | translations = self.translator.translate_for_indexing( | 221 | translations = self.translator.translate_for_indexing( |
| 199 | title_text, | 222 | title_text, |
| 200 | shop_language=primary_lang, | 223 | shop_language=primary_lang, |
| @@ -202,26 +225,14 @@ class SPUDocumentTransformer: | @@ -202,26 +225,14 @@ class SPUDocumentTransformer: | ||
| 202 | prompt=prompt_zh if primary_lang == 'zh' else prompt_en, | 225 | prompt=prompt_zh if primary_lang == 'zh' else prompt_en, |
| 203 | translate_to_en=translate_to_en, | 226 | translate_to_en=translate_to_en, |
| 204 | translate_to_zh=translate_to_zh, | 227 | translate_to_zh=translate_to_zh, |
| 205 | - ) | ||
| 206 | - | ||
| 207 | - # 填充翻译结果 | ||
| 208 | - doc['title_zh'] = translations.get('zh') or (title_text if primary_lang == 'zh' else None) | ||
| 209 | - doc['title_en'] = translations.get('en') or (title_text if primary_lang == 'en' else None) | ||
| 210 | - else: | ||
| 211 | - # 无翻译器,只填充主语言字段 | ||
| 212 | - if primary_lang == 'zh': | ||
| 213 | - doc['title_zh'] = title_text | ||
| 214 | - doc['title_en'] = None | ||
| 215 | - else: | ||
| 216 | - doc['title_zh'] = None | ||
| 217 | - doc['title_en'] = title_text | ||
| 218 | - else: | ||
| 219 | - doc['title_zh'] = None | ||
| 220 | - doc['title_en'] = None | 228 | + ) or {} |
| 229 | + | ||
| 230 | + _set_lang_obj("title", title_text, translations) | ||
| 221 | 231 | ||
| 222 | # Brief | 232 | # Brief |
| 223 | if pd.notna(spu_row.get('brief')): | 233 | if pd.notna(spu_row.get('brief')): |
| 224 | brief_text = str(spu_row['brief']) | 234 | brief_text = str(spu_row['brief']) |
| 235 | + translations: Dict[str, str] = {} | ||
| 225 | if self.translator: | 236 | if self.translator: |
| 226 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') | 237 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 227 | translations = self.translator.translate_for_indexing( | 238 | translations = self.translator.translate_for_indexing( |
| @@ -231,23 +242,13 @@ class SPUDocumentTransformer: | @@ -231,23 +242,13 @@ class SPUDocumentTransformer: | ||
| 231 | prompt=prompt, | 242 | prompt=prompt, |
| 232 | translate_to_en=translate_to_en, | 243 | translate_to_en=translate_to_en, |
| 233 | translate_to_zh=translate_to_zh, | 244 | translate_to_zh=translate_to_zh, |
| 234 | - ) | ||
| 235 | - doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None) | ||
| 236 | - doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None) | ||
| 237 | - else: | ||
| 238 | - if primary_lang == 'zh': | ||
| 239 | - doc['brief_zh'] = brief_text | ||
| 240 | - doc['brief_en'] = None | ||
| 241 | - else: | ||
| 242 | - doc['brief_zh'] = None | ||
| 243 | - doc['brief_en'] = brief_text | ||
| 244 | - else: | ||
| 245 | - doc['brief_zh'] = None | ||
| 246 | - doc['brief_en'] = None | 245 | + ) or {} |
| 246 | + _set_lang_obj("brief", brief_text, translations) | ||
| 247 | 247 | ||
| 248 | # Description | 248 | # Description |
| 249 | if pd.notna(spu_row.get('description')): | 249 | if pd.notna(spu_row.get('description')): |
| 250 | desc_text = str(spu_row['description']) | 250 | desc_text = str(spu_row['description']) |
| 251 | + translations: Dict[str, str] = {} | ||
| 251 | if self.translator: | 252 | if self.translator: |
| 252 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') | 253 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 253 | translations = self.translator.translate_for_indexing( | 254 | translations = self.translator.translate_for_indexing( |
| @@ -257,23 +258,13 @@ class SPUDocumentTransformer: | @@ -257,23 +258,13 @@ class SPUDocumentTransformer: | ||
| 257 | prompt=prompt, | 258 | prompt=prompt, |
| 258 | translate_to_en=translate_to_en, | 259 | translate_to_en=translate_to_en, |
| 259 | translate_to_zh=translate_to_zh, | 260 | translate_to_zh=translate_to_zh, |
| 260 | - ) | ||
| 261 | - doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None) | ||
| 262 | - doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None) | ||
| 263 | - else: | ||
| 264 | - if primary_lang == 'zh': | ||
| 265 | - doc['description_zh'] = desc_text | ||
| 266 | - doc['description_en'] = None | ||
| 267 | - else: | ||
| 268 | - doc['description_zh'] = None | ||
| 269 | - doc['description_en'] = desc_text | ||
| 270 | - else: | ||
| 271 | - doc['description_zh'] = None | ||
| 272 | - doc['description_en'] = None | 261 | + ) or {} |
| 262 | + _set_lang_obj("description", desc_text, translations) | ||
| 273 | 263 | ||
| 274 | # Vendor | 264 | # Vendor |
| 275 | if pd.notna(spu_row.get('vendor')): | 265 | if pd.notna(spu_row.get('vendor')): |
| 276 | vendor_text = str(spu_row['vendor']) | 266 | vendor_text = str(spu_row['vendor']) |
| 267 | + translations: Dict[str, str] = {} | ||
| 277 | if self.translator: | 268 | if self.translator: |
| 278 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') | 269 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 279 | translations = self.translator.translate_for_indexing( | 270 | translations = self.translator.translate_for_indexing( |
| @@ -283,19 +274,8 @@ class SPUDocumentTransformer: | @@ -283,19 +274,8 @@ class SPUDocumentTransformer: | ||
| 283 | prompt=prompt, | 274 | prompt=prompt, |
| 284 | translate_to_en=translate_to_en, | 275 | translate_to_en=translate_to_en, |
| 285 | translate_to_zh=translate_to_zh, | 276 | translate_to_zh=translate_to_zh, |
| 286 | - ) | ||
| 287 | - doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None) | ||
| 288 | - doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None) | ||
| 289 | - else: | ||
| 290 | - if primary_lang == 'zh': | ||
| 291 | - doc['vendor_zh'] = vendor_text | ||
| 292 | - doc['vendor_en'] = None | ||
| 293 | - else: | ||
| 294 | - doc['vendor_zh'] = None | ||
| 295 | - doc['vendor_en'] = vendor_text | ||
| 296 | - else: | ||
| 297 | - doc['vendor_zh'] = None | ||
| 298 | - doc['vendor_en'] = None | 277 | + ) or {} |
| 278 | + _set_lang_obj("vendor", vendor_text, translations) | ||
| 299 | 279 | ||
| 300 | def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series): | 280 | def _fill_category_fields(self, doc: Dict[str, Any], spu_row: pd.Series): |
| 301 | """填充类目相关字段。""" | 281 | """填充类目相关字段。""" |
| @@ -303,6 +283,8 @@ class SPUDocumentTransformer: | @@ -303,6 +283,8 @@ class SPUDocumentTransformer: | ||
| 303 | # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) | 283 | # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) |
| 304 | # - 仅记录错误日志,不阻塞索引流程 | 284 | # - 仅记录错误日志,不阻塞索引流程 |
| 305 | 285 | ||
| 286 | + primary_lang = self.tenant_config.get('primary_language', 'zh') | ||
| 287 | + | ||
| 306 | if pd.notna(spu_row.get('category_path')): | 288 | if pd.notna(spu_row.get('category_path')): |
| 307 | category_path = str(spu_row['category_path']) | 289 | category_path = str(spu_row['category_path']) |
| 308 | 290 | ||
| @@ -329,8 +311,7 @@ class SPUDocumentTransformer: | @@ -329,8 +311,7 @@ class SPUDocumentTransformer: | ||
| 329 | # 构建类目路径字符串(用于搜索) | 311 | # 构建类目路径字符串(用于搜索) |
| 330 | if category_names: | 312 | if category_names: |
| 331 | category_path_str = '/'.join(category_names) | 313 | category_path_str = '/'.join(category_names) |
| 332 | - doc['category_path_zh'] = category_path_str | ||
| 333 | - doc['category_path_en'] = None # 暂时设为空 | 314 | + doc['category_path'] = {primary_lang: category_path_str} |
| 334 | 315 | ||
| 335 | # 填充分层类目名称 | 316 | # 填充分层类目名称 |
| 336 | if len(category_names) > 0: | 317 | if len(category_names) > 0: |
| @@ -342,8 +323,7 @@ class SPUDocumentTransformer: | @@ -342,8 +323,7 @@ class SPUDocumentTransformer: | ||
| 342 | elif pd.notna(spu_row.get('category')): | 323 | elif pd.notna(spu_row.get('category')): |
| 343 | # 如果category_path为空,使用category字段作为category1_name的备选 | 324 | # 如果category_path为空,使用category字段作为category1_name的备选 |
| 344 | category = str(spu_row['category']) | 325 | category = str(spu_row['category']) |
| 345 | - doc['category_name_zh'] = category | ||
| 346 | - doc['category_name_en'] = None | 326 | + doc['category_name_text'] = {primary_lang: category} |
| 347 | doc['category_name'] = category | 327 | doc['category_name'] = category |
| 348 | 328 | ||
| 349 | # 尝试从category字段解析多级分类 | 329 | # 尝试从category字段解析多级分类 |
| @@ -362,10 +342,8 @@ class SPUDocumentTransformer: | @@ -362,10 +342,8 @@ class SPUDocumentTransformer: | ||
| 362 | if pd.notna(spu_row.get('category')): | 342 | if pd.notna(spu_row.get('category')): |
| 363 | # 确保category相关字段都被设置(如果前面没有设置) | 343 | # 确保category相关字段都被设置(如果前面没有设置) |
| 364 | category_name = str(spu_row['category']) | 344 | category_name = str(spu_row['category']) |
| 365 | - if 'category_name_zh' not in doc: | ||
| 366 | - doc['category_name_zh'] = category_name | ||
| 367 | - if 'category_name_en' not in doc: | ||
| 368 | - doc['category_name_en'] = None | 345 | + if 'category_name_text' not in doc: |
| 346 | + doc['category_name_text'] = {primary_lang: category_name} | ||
| 369 | if 'category_name' not in doc: | 347 | if 'category_name' not in doc: |
| 370 | doc['category_name'] = category_name | 348 | doc['category_name'] = category_name |
| 371 | 349 | ||
| @@ -587,13 +565,22 @@ class SPUDocumentTransformer: | @@ -587,13 +565,22 @@ class SPUDocumentTransformer: | ||
| 587 | """ | 565 | """ |
| 588 | 填充标题向量化字段。 | 566 | 填充标题向量化字段。 |
| 589 | 567 | ||
| 590 | - 使用英文标题(title_en)生成embedding。如果title_en不存在,则使用title_zh。 | 568 | + 使用英文标题(title.en)生成embedding。如果title.en不存在,则使用title.zh。 |
| 591 | 569 | ||
| 592 | Args: | 570 | Args: |
| 593 | doc: ES文档字典 | 571 | doc: ES文档字典 |
| 594 | """ | 572 | """ |
| 595 | - # 优先使用英文标题,如果没有则使用中文标题 | ||
| 596 | - title_text = doc.get('title_en') or doc.get('title_zh') | 573 | + # 优先使用英文标题,如果没有则使用中文标题;再没有则取任意可用语言 |
| 574 | + title_obj = doc.get("title") or {} | ||
| 575 | + if isinstance(title_obj, dict): | ||
| 576 | + title_text = title_obj.get("en") or title_obj.get("zh") | ||
| 577 | + if not title_text: | ||
| 578 | + for v in title_obj.values(): | ||
| 579 | + if v and str(v).strip(): | ||
| 580 | + title_text = str(v) | ||
| 581 | + break | ||
| 582 | + else: | ||
| 583 | + title_text = None | ||
| 597 | 584 | ||
| 598 | if not title_text or not title_text.strip(): | 585 | if not title_text or not title_text.strip(): |
| 599 | logger.debug(f"No title text available for embedding, SPU: {doc.get('spu_id')}") | 586 | logger.debug(f"No title text available for embedding, SPU: {doc.get('spu_id')}") |
indexer/incremental_service.py
| @@ -130,7 +130,15 @@ class IncrementalIndexerService: | @@ -130,7 +130,15 @@ class IncrementalIndexerService: | ||
| 130 | 130 | ||
| 131 | # 单条场景下也可补齐 embedding(仍走缓存) | 131 | # 单条场景下也可补齐 embedding(仍走缓存) |
| 132 | if enable_embedding and encoder: | 132 | if enable_embedding and encoder: |
| 133 | - title_text = doc.get("title_en") or doc.get("title_zh") | 133 | + title_obj = doc.get("title") or {} |
| 134 | + title_text = None | ||
| 135 | + if isinstance(title_obj, dict): | ||
| 136 | + title_text = title_obj.get("en") or title_obj.get("zh") | ||
| 137 | + if not title_text: | ||
| 138 | + for v in title_obj.values(): | ||
| 139 | + if v and str(v).strip(): | ||
| 140 | + title_text = str(v) | ||
| 141 | + break | ||
| 134 | if title_text and str(title_text).strip(): | 142 | if title_text and str(title_text).strip(): |
| 135 | try: | 143 | try: |
| 136 | embeddings = encoder.encode(title_text) | 144 | embeddings = encoder.encode(title_text) |
| @@ -560,7 +568,15 @@ class IncrementalIndexerService: | @@ -560,7 +568,15 @@ class IncrementalIndexerService: | ||
| 560 | title_texts: List[str] = [] | 568 | title_texts: List[str] = [] |
| 561 | title_doc_indices: List[int] = [] | 569 | title_doc_indices: List[int] = [] |
| 562 | for i, (_, doc) in enumerate(documents): | 570 | for i, (_, doc) in enumerate(documents): |
| 563 | - title_text = doc.get("title_en") or doc.get("title_zh") | 571 | + title_obj = doc.get("title") or {} |
| 572 | + title_text = None | ||
| 573 | + if isinstance(title_obj, dict): | ||
| 574 | + title_text = title_obj.get("en") or title_obj.get("zh") | ||
| 575 | + if not title_text: | ||
| 576 | + for v in title_obj.values(): | ||
| 577 | + if v and str(v).strip(): | ||
| 578 | + title_text = str(v) | ||
| 579 | + break | ||
| 564 | if title_text and str(title_text).strip(): | 580 | if title_text and str(title_text).strip(): |
| 565 | title_texts.append(str(title_text)) | 581 | title_texts.append(str(title_text)) |
| 566 | title_doc_indices.append(i) | 582 | title_doc_indices.append(i) |
indexer/test_indexing.py
| @@ -114,16 +114,17 @@ def test_full_indexing(tenant_id: str = "162"): | @@ -114,16 +114,17 @@ def test_full_indexing(tenant_id: str = "162"): | ||
| 114 | print(f"\n文档 {i+1}:") | 114 | print(f"\n文档 {i+1}:") |
| 115 | print(f" SPU ID: {doc.get('spu_id')}") | 115 | print(f" SPU ID: {doc.get('spu_id')}") |
| 116 | print(f" Tenant ID: {doc.get('tenant_id')}") | 116 | print(f" Tenant ID: {doc.get('tenant_id')}") |
| 117 | - print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}") | ||
| 118 | - print(f" 标题 (英文): {doc.get('title_en', 'N/A')}") | 117 | + title_obj = doc.get("title") or {} |
| 118 | + print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") | ||
| 119 | + print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") | ||
| 119 | 120 | ||
| 120 | # 检查租户162的翻译状态 | 121 | # 检查租户162的翻译状态 |
| 121 | if tenant_id == "162": | 122 | if tenant_id == "162": |
| 122 | - # 租户162翻译应该关闭,title_en应该为None | ||
| 123 | - if doc.get('title_en') is None: | ||
| 124 | - print(f" ✓ 翻译已关闭(title_en为None)") | 123 | + # 租户162翻译应该关闭:只写入主语言,不应出现 title.en |
| 124 | + if isinstance(title_obj, dict) and title_obj.get("en") is None: | ||
| 125 | + print(f" ✓ 翻译已关闭(title.en为空)") | ||
| 125 | else: | 126 | else: |
| 126 | - print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}") | 127 | + print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") |
| 127 | 128 | ||
| 128 | return True | 129 | return True |
| 129 | 130 | ||
| @@ -192,17 +193,18 @@ def test_incremental_indexing(tenant_id: str = "162"): | @@ -192,17 +193,18 @@ def test_incremental_indexing(tenant_id: str = "162"): | ||
| 192 | print(f"✓ SPU文档获取成功") | 193 | print(f"✓ SPU文档获取成功") |
| 193 | print(f" SPU ID: {doc.get('spu_id')}") | 194 | print(f" SPU ID: {doc.get('spu_id')}") |
| 194 | print(f" Tenant ID: {doc.get('tenant_id')}") | 195 | print(f" Tenant ID: {doc.get('tenant_id')}") |
| 195 | - print(f" 标题 (中文): {doc.get('title_zh', 'N/A')}") | ||
| 196 | - print(f" 标题 (英文): {doc.get('title_en', 'N/A')}") | 196 | + title_obj = doc.get("title") or {} |
| 197 | + print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") | ||
| 198 | + print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") | ||
| 197 | print(f" SKU数量: {len(doc.get('skus', []))}") | 199 | print(f" SKU数量: {len(doc.get('skus', []))}") |
| 198 | print(f" 规格数量: {len(doc.get('specifications', []))}") | 200 | print(f" 规格数量: {len(doc.get('specifications', []))}") |
| 199 | 201 | ||
| 200 | # 检查租户162的翻译状态 | 202 | # 检查租户162的翻译状态 |
| 201 | if tenant_id == "162": | 203 | if tenant_id == "162": |
| 202 | - if doc.get('title_en') is None: | ||
| 203 | - print(f" ✓ 翻译已关闭(title_en为None)") | 204 | + if isinstance(title_obj, dict) and title_obj.get("en") is None: |
| 205 | + print(f" ✓ 翻译已关闭(title.en为空)") | ||
| 204 | else: | 206 | else: |
| 205 | - print(f" ⚠ 警告:翻译应该关闭,但title_en有值: {doc.get('title_en')}") | 207 | + print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") |
| 206 | 208 | ||
| 207 | return True | 209 | return True |
| 208 | 210 | ||
| @@ -291,12 +293,13 @@ def test_document_transformer(): | @@ -291,12 +293,13 @@ def test_document_transformer(): | ||
| 291 | 293 | ||
| 292 | if doc: | 294 | if doc: |
| 293 | print(f"✓ 文档转换成功") | 295 | print(f"✓ 文档转换成功") |
| 294 | - print(f" title_zh: {doc.get('title_zh')}") | ||
| 295 | - print(f" title_en: {doc.get('title_en')}") | 296 | + title_obj = doc.get("title") or {} |
| 297 | + print(f" title.zh: {title_obj.get('zh') if isinstance(title_obj, dict) else None}") | ||
| 298 | + print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}") | ||
| 296 | print(f" SKU数量: {len(doc.get('skus', []))}") | 299 | print(f" SKU数量: {len(doc.get('skus', []))}") |
| 297 | 300 | ||
| 298 | # 验证租户162翻译关闭 | 301 | # 验证租户162翻译关闭 |
| 299 | - if doc.get('title_en') is None: | 302 | + if isinstance(title_obj, dict) and title_obj.get("en") is None: |
| 300 | print(f" ✓ 翻译已关闭(符合租户162配置)") | 303 | print(f" ✓ 翻译已关闭(符合租户162配置)") |
| 301 | else: | 304 | else: |
| 302 | print(f" ⚠ 警告:翻译应该关闭") | 305 | print(f" ⚠ 警告:翻译应该关闭") |
mappings/search_products.json
| @@ -4,22 +4,12 @@ | @@ -4,22 +4,12 @@ | ||
| 4 | "number_of_replicas": 0, | 4 | "number_of_replicas": 0, |
| 5 | "refresh_interval": "30s", | 5 | "refresh_interval": "30s", |
| 6 | "analysis": { | 6 | "analysis": { |
| 7 | - "analyzer": { | ||
| 8 | - "hanlp_index": { | ||
| 9 | - "type": "custom", | ||
| 10 | - "tokenizer": "standard", | ||
| 11 | - "filter": ["lowercase", "asciifolding"] | ||
| 12 | - }, | ||
| 13 | - "hanlp_standard": { | ||
| 14 | - "type": "custom", | ||
| 15 | - "tokenizer": "standard", | ||
| 16 | - "filter": ["lowercase", "asciifolding"] | ||
| 17 | - } | ||
| 18 | - }, | ||
| 19 | "normalizer": { | 7 | "normalizer": { |
| 20 | "lowercase": { | 8 | "lowercase": { |
| 21 | "type": "custom", | 9 | "type": "custom", |
| 22 | - "filter": ["lowercase"] | 10 | + "filter": [ |
| 11 | + "lowercase" | ||
| 12 | + ] | ||
| 23 | } | 13 | } |
| 24 | } | 14 | } |
| 25 | }, | 15 | }, |
| @@ -45,75 +35,963 @@ | @@ -45,75 +35,963 @@ | ||
| 45 | "update_time": { | 35 | "update_time": { |
| 46 | "type": "date" | 36 | "type": "date" |
| 47 | }, | 37 | }, |
| 48 | - "title_zh": { | ||
| 49 | - "type": "text", | ||
| 50 | - "analyzer": "hanlp_index", | ||
| 51 | - "search_analyzer": "hanlp_standard" | ||
| 52 | - }, | ||
| 53 | - "qanchors_zh": { | ||
| 54 | - "type": "text", | ||
| 55 | - "analyzer": "hanlp_index", | ||
| 56 | - "search_analyzer": "hanlp_standard" | ||
| 57 | - }, | ||
| 58 | - "keywords_zh": { | ||
| 59 | - "type": "text", | ||
| 60 | - "analyzer": "hanlp_index", | ||
| 61 | - "search_analyzer": "hanlp_standard" | ||
| 62 | - }, | ||
| 63 | - "brief_zh": { | ||
| 64 | - "type": "text", | ||
| 65 | - "analyzer": "hanlp_index", | ||
| 66 | - "search_analyzer": "hanlp_standard" | ||
| 67 | - }, | ||
| 68 | - "description_zh": { | ||
| 69 | - "type": "text", | ||
| 70 | - "analyzer": "hanlp_index", | ||
| 71 | - "search_analyzer": "hanlp_standard" | ||
| 72 | - }, | ||
| 73 | - "vendor_zh": { | ||
| 74 | - "type": "text", | ||
| 75 | - "analyzer": "hanlp_index", | ||
| 76 | - "search_analyzer": "hanlp_standard", | ||
| 77 | - "fields": { | ||
| 78 | - "keyword": { | ||
| 79 | - "type": "keyword", | ||
| 80 | - "normalizer": "lowercase" | 38 | + "title": { |
| 39 | + "type": "object", | ||
| 40 | + "properties": { | ||
| 41 | + "ar": { | ||
| 42 | + "type": "text", | ||
| 43 | + "analyzer": "arabic" | ||
| 44 | + }, | ||
| 45 | + "hy": { | ||
| 46 | + "type": "text", | ||
| 47 | + "analyzer": "armenian" | ||
| 48 | + }, | ||
| 49 | + "eu": { | ||
| 50 | + "type": "text", | ||
| 51 | + "analyzer": "basque" | ||
| 52 | + }, | ||
| 53 | + "pt_br": { | ||
| 54 | + "type": "text", | ||
| 55 | + "analyzer": "brazilian" | ||
| 56 | + }, | ||
| 57 | + "bg": { | ||
| 58 | + "type": "text", | ||
| 59 | + "analyzer": "bulgarian" | ||
| 60 | + }, | ||
| 61 | + "ca": { | ||
| 62 | + "type": "text", | ||
| 63 | + "analyzer": "catalan" | ||
| 64 | + }, | ||
| 65 | + "zh": { | ||
| 66 | + "type": "text", | ||
| 67 | + "analyzer": "icu_analyzer" | ||
| 68 | + }, | ||
| 69 | + "chinese": { | ||
| 70 | + "type": "text", | ||
| 71 | + "analyzer": "chinese" | ||
| 72 | + }, | ||
| 73 | + "cjk": { | ||
| 74 | + "type": "text", | ||
| 75 | + "analyzer": "cjk" | ||
| 76 | + }, | ||
| 77 | + "cs": { | ||
| 78 | + "type": "text", | ||
| 79 | + "analyzer": "czech" | ||
| 80 | + }, | ||
| 81 | + "da": { | ||
| 82 | + "type": "text", | ||
| 83 | + "analyzer": "danish" | ||
| 84 | + }, | ||
| 85 | + "nl": { | ||
| 86 | + "type": "text", | ||
| 87 | + "analyzer": "dutch" | ||
| 88 | + }, | ||
| 89 | + "en": { | ||
| 90 | + "type": "text", | ||
| 91 | + "analyzer": "english" | ||
| 92 | + }, | ||
| 93 | + "fi": { | ||
| 94 | + "type": "text", | ||
| 95 | + "analyzer": "finnish" | ||
| 96 | + }, | ||
| 97 | + "fr": { | ||
| 98 | + "type": "text", | ||
| 99 | + "analyzer": "french" | ||
| 100 | + }, | ||
| 101 | + "gl": { | ||
| 102 | + "type": "text", | ||
| 103 | + "analyzer": "galician" | ||
| 104 | + }, | ||
| 105 | + "de": { | ||
| 106 | + "type": "text", | ||
| 107 | + "analyzer": "german" | ||
| 108 | + }, | ||
| 109 | + "el": { | ||
| 110 | + "type": "text", | ||
| 111 | + "analyzer": "greek" | ||
| 112 | + }, | ||
| 113 | + "hi": { | ||
| 114 | + "type": "text", | ||
| 115 | + "analyzer": "hindi" | ||
| 116 | + }, | ||
| 117 | + "hu": { | ||
| 118 | + "type": "text", | ||
| 119 | + "analyzer": "hungarian" | ||
| 120 | + }, | ||
| 121 | + "id": { | ||
| 122 | + "type": "text", | ||
| 123 | + "analyzer": "indonesian" | ||
| 124 | + }, | ||
| 125 | + "it": { | ||
| 126 | + "type": "text", | ||
| 127 | + "analyzer": "italian" | ||
| 128 | + }, | ||
| 129 | + "no": { | ||
| 130 | + "type": "text", | ||
| 131 | + "analyzer": "norwegian" | ||
| 132 | + }, | ||
| 133 | + "fa": { | ||
| 134 | + "type": "text", | ||
| 135 | + "analyzer": "persian" | ||
| 136 | + }, | ||
| 137 | + "pt": { | ||
| 138 | + "type": "text", | ||
| 139 | + "analyzer": "portuguese" | ||
| 140 | + }, | ||
| 141 | + "ro": { | ||
| 142 | + "type": "text", | ||
| 143 | + "analyzer": "romanian" | ||
| 144 | + }, | ||
| 145 | + "ru": { | ||
| 146 | + "type": "text", | ||
| 147 | + "analyzer": "russian" | ||
| 148 | + }, | ||
| 149 | + "es": { | ||
| 150 | + "type": "text", | ||
| 151 | + "analyzer": "spanish" | ||
| 152 | + }, | ||
| 153 | + "sv": { | ||
| 154 | + "type": "text", | ||
| 155 | + "analyzer": "swedish" | ||
| 156 | + }, | ||
| 157 | + "tr": { | ||
| 158 | + "type": "text", | ||
| 159 | + "analyzer": "turkish" | ||
| 160 | + }, | ||
| 161 | + "th": { | ||
| 162 | + "type": "text", | ||
| 163 | + "analyzer": "thai" | ||
| 81 | } | 164 | } |
| 82 | } | 165 | } |
| 83 | }, | 166 | }, |
| 84 | - "title_en": { | ||
| 85 | - "type": "text", | ||
| 86 | - "analyzer": "english", | ||
| 87 | - "search_analyzer": "english" | ||
| 88 | - }, | ||
| 89 | - "qanchors_en": { | ||
| 90 | - "type": "text", | ||
| 91 | - "analyzer": "english", | ||
| 92 | - "search_analyzer": "english" | ||
| 93 | - }, | ||
| 94 | - "keywords_en": { | ||
| 95 | - "type": "text", | ||
| 96 | - "analyzer": "english", | ||
| 97 | - "search_analyzer": "english" | ||
| 98 | - }, | ||
| 99 | - "brief_en": { | ||
| 100 | - "type": "text", | ||
| 101 | - "analyzer": "english", | ||
| 102 | - "search_analyzer": "english" | ||
| 103 | - }, | ||
| 104 | - "description_en": { | ||
| 105 | - "type": "text", | ||
| 106 | - "analyzer": "english", | ||
| 107 | - "search_analyzer": "english" | ||
| 108 | - }, | ||
| 109 | - "vendor_en": { | ||
| 110 | - "type": "text", | ||
| 111 | - "analyzer": "english", | ||
| 112 | - "search_analyzer": "english", | ||
| 113 | - "fields": { | ||
| 114 | - "keyword": { | ||
| 115 | - "type": "keyword", | ||
| 116 | - "normalizer": "lowercase" | 167 | + "qanchors": { |
| 168 | + "type": "object", | ||
| 169 | + "properties": { | ||
| 170 | + "ar": { | ||
| 171 | + "type": "text", | ||
| 172 | + "analyzer": "arabic" | ||
| 173 | + }, | ||
| 174 | + "hy": { | ||
| 175 | + "type": "text", | ||
| 176 | + "analyzer": "armenian" | ||
| 177 | + }, | ||
| 178 | + "eu": { | ||
| 179 | + "type": "text", | ||
| 180 | + "analyzer": "basque" | ||
| 181 | + }, | ||
| 182 | + "pt_br": { | ||
| 183 | + "type": "text", | ||
| 184 | + "analyzer": "brazilian" | ||
| 185 | + }, | ||
| 186 | + "bg": { | ||
| 187 | + "type": "text", | ||
| 188 | + "analyzer": "bulgarian" | ||
| 189 | + }, | ||
| 190 | + "ca": { | ||
| 191 | + "type": "text", | ||
| 192 | + "analyzer": "catalan" | ||
| 193 | + }, | ||
| 194 | + "zh": { | ||
| 195 | + "type": "text", | ||
| 196 | + "analyzer": "icu_analyzer" | ||
| 197 | + }, | ||
| 198 | + "chinese": { | ||
| 199 | + "type": "text", | ||
| 200 | + "analyzer": "chinese" | ||
| 201 | + }, | ||
| 202 | + "cjk": { | ||
| 203 | + "type": "text", | ||
| 204 | + "analyzer": "cjk" | ||
| 205 | + }, | ||
| 206 | + "cs": { | ||
| 207 | + "type": "text", | ||
| 208 | + "analyzer": "czech" | ||
| 209 | + }, | ||
| 210 | + "da": { | ||
| 211 | + "type": "text", | ||
| 212 | + "analyzer": "danish" | ||
| 213 | + }, | ||
| 214 | + "nl": { | ||
| 215 | + "type": "text", | ||
| 216 | + "analyzer": "dutch" | ||
| 217 | + }, | ||
| 218 | + "en": { | ||
| 219 | + "type": "text", | ||
| 220 | + "analyzer": "english" | ||
| 221 | + }, | ||
| 222 | + "fi": { | ||
| 223 | + "type": "text", | ||
| 224 | + "analyzer": "finnish" | ||
| 225 | + }, | ||
| 226 | + "fr": { | ||
| 227 | + "type": "text", | ||
| 228 | + "analyzer": "french" | ||
| 229 | + }, | ||
| 230 | + "gl": { | ||
| 231 | + "type": "text", | ||
| 232 | + "analyzer": "galician" | ||
| 233 | + }, | ||
| 234 | + "de": { | ||
| 235 | + "type": "text", | ||
| 236 | + "analyzer": "german" | ||
| 237 | + }, | ||
| 238 | + "el": { | ||
| 239 | + "type": "text", | ||
| 240 | + "analyzer": "greek" | ||
| 241 | + }, | ||
| 242 | + "hi": { | ||
| 243 | + "type": "text", | ||
| 244 | + "analyzer": "hindi" | ||
| 245 | + }, | ||
| 246 | + "hu": { | ||
| 247 | + "type": "text", | ||
| 248 | + "analyzer": "hungarian" | ||
| 249 | + }, | ||
| 250 | + "id": { | ||
| 251 | + "type": "text", | ||
| 252 | + "analyzer": "indonesian" | ||
| 253 | + }, | ||
| 254 | + "it": { | ||
| 255 | + "type": "text", | ||
| 256 | + "analyzer": "italian" | ||
| 257 | + }, | ||
| 258 | + "no": { | ||
| 259 | + "type": "text", | ||
| 260 | + "analyzer": "norwegian" | ||
| 261 | + }, | ||
| 262 | + "fa": { | ||
| 263 | + "type": "text", | ||
| 264 | + "analyzer": "persian" | ||
| 265 | + }, | ||
| 266 | + "pt": { | ||
| 267 | + "type": "text", | ||
| 268 | + "analyzer": "portuguese" | ||
| 269 | + }, | ||
| 270 | + "ro": { | ||
| 271 | + "type": "text", | ||
| 272 | + "analyzer": "romanian" | ||
| 273 | + }, | ||
| 274 | + "ru": { | ||
| 275 | + "type": "text", | ||
| 276 | + "analyzer": "russian" | ||
| 277 | + }, | ||
| 278 | + "es": { | ||
| 279 | + "type": "text", | ||
| 280 | + "analyzer": "spanish" | ||
| 281 | + }, | ||
| 282 | + "sv": { | ||
| 283 | + "type": "text", | ||
| 284 | + "analyzer": "swedish" | ||
| 285 | + }, | ||
| 286 | + "tr": { | ||
| 287 | + "type": "text", | ||
| 288 | + "analyzer": "turkish" | ||
| 289 | + }, | ||
| 290 | + "th": { | ||
| 291 | + "type": "text", | ||
| 292 | + "analyzer": "thai" | ||
| 293 | + } | ||
| 294 | + } | ||
| 295 | + }, | ||
| 296 | + "keywords": { | ||
| 297 | + "type": "object", | ||
| 298 | + "properties": { | ||
| 299 | + "ar": { | ||
| 300 | + "type": "text", | ||
| 301 | + "analyzer": "arabic" | ||
| 302 | + }, | ||
| 303 | + "hy": { | ||
| 304 | + "type": "text", | ||
| 305 | + "analyzer": "armenian" | ||
| 306 | + }, | ||
| 307 | + "eu": { | ||
| 308 | + "type": "text", | ||
| 309 | + "analyzer": "basque" | ||
| 310 | + }, | ||
| 311 | + "pt_br": { | ||
| 312 | + "type": "text", | ||
| 313 | + "analyzer": "brazilian" | ||
| 314 | + }, | ||
| 315 | + "bg": { | ||
| 316 | + "type": "text", | ||
| 317 | + "analyzer": "bulgarian" | ||
| 318 | + }, | ||
| 319 | + "ca": { | ||
| 320 | + "type": "text", | ||
| 321 | + "analyzer": "catalan" | ||
| 322 | + }, | ||
| 323 | + "zh": { | ||
| 324 | + "type": "text", | ||
| 325 | + "analyzer": "icu_analyzer" | ||
| 326 | + }, | ||
| 327 | + "chinese": { | ||
| 328 | + "type": "text", | ||
| 329 | + "analyzer": "chinese" | ||
| 330 | + }, | ||
| 331 | + "cjk": { | ||
| 332 | + "type": "text", | ||
| 333 | + "analyzer": "cjk" | ||
| 334 | + }, | ||
| 335 | + "cs": { | ||
| 336 | + "type": "text", | ||
| 337 | + "analyzer": "czech" | ||
| 338 | + }, | ||
| 339 | + "da": { | ||
| 340 | + "type": "text", | ||
| 341 | + "analyzer": "danish" | ||
| 342 | + }, | ||
| 343 | + "nl": { | ||
| 344 | + "type": "text", | ||
| 345 | + "analyzer": "dutch" | ||
| 346 | + }, | ||
| 347 | + "en": { | ||
| 348 | + "type": "text", | ||
| 349 | + "analyzer": "english" | ||
| 350 | + }, | ||
| 351 | + "fi": { | ||
| 352 | + "type": "text", | ||
| 353 | + "analyzer": "finnish" | ||
| 354 | + }, | ||
| 355 | + "fr": { | ||
| 356 | + "type": "text", | ||
| 357 | + "analyzer": "french" | ||
| 358 | + }, | ||
| 359 | + "gl": { | ||
| 360 | + "type": "text", | ||
| 361 | + "analyzer": "galician" | ||
| 362 | + }, | ||
| 363 | + "de": { | ||
| 364 | + "type": "text", | ||
| 365 | + "analyzer": "german" | ||
| 366 | + }, | ||
| 367 | + "el": { | ||
| 368 | + "type": "text", | ||
| 369 | + "analyzer": "greek" | ||
| 370 | + }, | ||
| 371 | + "hi": { | ||
| 372 | + "type": "text", | ||
| 373 | + "analyzer": "hindi" | ||
| 374 | + }, | ||
| 375 | + "hu": { | ||
| 376 | + "type": "text", | ||
| 377 | + "analyzer": "hungarian" | ||
| 378 | + }, | ||
| 379 | + "id": { | ||
| 380 | + "type": "text", | ||
| 381 | + "analyzer": "indonesian" | ||
| 382 | + }, | ||
| 383 | + "it": { | ||
| 384 | + "type": "text", | ||
| 385 | + "analyzer": "italian" | ||
| 386 | + }, | ||
| 387 | + "no": { | ||
| 388 | + "type": "text", | ||
| 389 | + "analyzer": "norwegian" | ||
| 390 | + }, | ||
| 391 | + "fa": { | ||
| 392 | + "type": "text", | ||
| 393 | + "analyzer": "persian" | ||
| 394 | + }, | ||
| 395 | + "pt": { | ||
| 396 | + "type": "text", | ||
| 397 | + "analyzer": "portuguese" | ||
| 398 | + }, | ||
| 399 | + "ro": { | ||
| 400 | + "type": "text", | ||
| 401 | + "analyzer": "romanian" | ||
| 402 | + }, | ||
| 403 | + "ru": { | ||
| 404 | + "type": "text", | ||
| 405 | + "analyzer": "russian" | ||
| 406 | + }, | ||
| 407 | + "es": { | ||
| 408 | + "type": "text", | ||
| 409 | + "analyzer": "spanish" | ||
| 410 | + }, | ||
| 411 | + "sv": { | ||
| 412 | + "type": "text", | ||
| 413 | + "analyzer": "swedish" | ||
| 414 | + }, | ||
| 415 | + "tr": { | ||
| 416 | + "type": "text", | ||
| 417 | + "analyzer": "turkish" | ||
| 418 | + }, | ||
| 419 | + "th": { | ||
| 420 | + "type": "text", | ||
| 421 | + "analyzer": "thai" | ||
| 422 | + } | ||
| 423 | + } | ||
| 424 | + }, | ||
| 425 | + "brief": { | ||
| 426 | + "type": "object", | ||
| 427 | + "properties": { | ||
| 428 | + "ar": { | ||
| 429 | + "type": "text", | ||
| 430 | + "analyzer": "arabic" | ||
| 431 | + }, | ||
| 432 | + "hy": { | ||
| 433 | + "type": "text", | ||
| 434 | + "analyzer": "armenian" | ||
| 435 | + }, | ||
| 436 | + "eu": { | ||
| 437 | + "type": "text", | ||
| 438 | + "analyzer": "basque" | ||
| 439 | + }, | ||
| 440 | + "pt_br": { | ||
| 441 | + "type": "text", | ||
| 442 | + "analyzer": "brazilian" | ||
| 443 | + }, | ||
| 444 | + "bg": { | ||
| 445 | + "type": "text", | ||
| 446 | + "analyzer": "bulgarian" | ||
| 447 | + }, | ||
| 448 | + "ca": { | ||
| 449 | + "type": "text", | ||
| 450 | + "analyzer": "catalan" | ||
| 451 | + }, | ||
| 452 | + "zh": { | ||
| 453 | + "type": "text", | ||
| 454 | + "analyzer": "icu_analyzer" | ||
| 455 | + }, | ||
| 456 | + "chinese": { | ||
| 457 | + "type": "text", | ||
| 458 | + "analyzer": "chinese" | ||
| 459 | + }, | ||
| 460 | + "cjk": { | ||
| 461 | + "type": "text", | ||
| 462 | + "analyzer": "cjk" | ||
| 463 | + }, | ||
| 464 | + "cs": { | ||
| 465 | + "type": "text", | ||
| 466 | + "analyzer": "czech" | ||
| 467 | + }, | ||
| 468 | + "da": { | ||
| 469 | + "type": "text", | ||
| 470 | + "analyzer": "danish" | ||
| 471 | + }, | ||
| 472 | + "nl": { | ||
| 473 | + "type": "text", | ||
| 474 | + "analyzer": "dutch" | ||
| 475 | + }, | ||
| 476 | + "en": { | ||
| 477 | + "type": "text", | ||
| 478 | + "analyzer": "english" | ||
| 479 | + }, | ||
| 480 | + "fi": { | ||
| 481 | + "type": "text", | ||
| 482 | + "analyzer": "finnish" | ||
| 483 | + }, | ||
| 484 | + "fr": { | ||
| 485 | + "type": "text", | ||
| 486 | + "analyzer": "french" | ||
| 487 | + }, | ||
| 488 | + "gl": { | ||
| 489 | + "type": "text", | ||
| 490 | + "analyzer": "galician" | ||
| 491 | + }, | ||
| 492 | + "de": { | ||
| 493 | + "type": "text", | ||
| 494 | + "analyzer": "german" | ||
| 495 | + }, | ||
| 496 | + "el": { | ||
| 497 | + "type": "text", | ||
| 498 | + "analyzer": "greek" | ||
| 499 | + }, | ||
| 500 | + "hi": { | ||
| 501 | + "type": "text", | ||
| 502 | + "analyzer": "hindi" | ||
| 503 | + }, | ||
| 504 | + "hu": { | ||
| 505 | + "type": "text", | ||
| 506 | + "analyzer": "hungarian" | ||
| 507 | + }, | ||
| 508 | + "id": { | ||
| 509 | + "type": "text", | ||
| 510 | + "analyzer": "indonesian" | ||
| 511 | + }, | ||
| 512 | + "it": { | ||
| 513 | + "type": "text", | ||
| 514 | + "analyzer": "italian" | ||
| 515 | + }, | ||
| 516 | + "no": { | ||
| 517 | + "type": "text", | ||
| 518 | + "analyzer": "norwegian" | ||
| 519 | + }, | ||
| 520 | + "fa": { | ||
| 521 | + "type": "text", | ||
| 522 | + "analyzer": "persian" | ||
| 523 | + }, | ||
| 524 | + "pt": { | ||
| 525 | + "type": "text", | ||
| 526 | + "analyzer": "portuguese" | ||
| 527 | + }, | ||
| 528 | + "ro": { | ||
| 529 | + "type": "text", | ||
| 530 | + "analyzer": "romanian" | ||
| 531 | + }, | ||
| 532 | + "ru": { | ||
| 533 | + "type": "text", | ||
| 534 | + "analyzer": "russian" | ||
| 535 | + }, | ||
| 536 | + "es": { | ||
| 537 | + "type": "text", | ||
| 538 | + "analyzer": "spanish" | ||
| 539 | + }, | ||
| 540 | + "sv": { | ||
| 541 | + "type": "text", | ||
| 542 | + "analyzer": "swedish" | ||
| 543 | + }, | ||
| 544 | + "tr": { | ||
| 545 | + "type": "text", | ||
| 546 | + "analyzer": "turkish" | ||
| 547 | + }, | ||
| 548 | + "th": { | ||
| 549 | + "type": "text", | ||
| 550 | + "analyzer": "thai" | ||
| 551 | + } | ||
| 552 | + } | ||
| 553 | + }, | ||
| 554 | + "description": { | ||
| 555 | + "type": "object", | ||
| 556 | + "properties": { | ||
| 557 | + "ar": { | ||
| 558 | + "type": "text", | ||
| 559 | + "analyzer": "arabic" | ||
| 560 | + }, | ||
| 561 | + "hy": { | ||
| 562 | + "type": "text", | ||
| 563 | + "analyzer": "armenian" | ||
| 564 | + }, | ||
| 565 | + "eu": { | ||
| 566 | + "type": "text", | ||
| 567 | + "analyzer": "basque" | ||
| 568 | + }, | ||
| 569 | + "pt_br": { | ||
| 570 | + "type": "text", | ||
| 571 | + "analyzer": "brazilian" | ||
| 572 | + }, | ||
| 573 | + "bg": { | ||
| 574 | + "type": "text", | ||
| 575 | + "analyzer": "bulgarian" | ||
| 576 | + }, | ||
| 577 | + "ca": { | ||
| 578 | + "type": "text", | ||
| 579 | + "analyzer": "catalan" | ||
| 580 | + }, | ||
| 581 | + "zh": { | ||
| 582 | + "type": "text", | ||
| 583 | + "analyzer": "icu_analyzer" | ||
| 584 | + }, | ||
| 585 | + "chinese": { | ||
| 586 | + "type": "text", | ||
| 587 | + "analyzer": "chinese" | ||
| 588 | + }, | ||
| 589 | + "cjk": { | ||
| 590 | + "type": "text", | ||
| 591 | + "analyzer": "cjk" | ||
| 592 | + }, | ||
| 593 | + "cs": { | ||
| 594 | + "type": "text", | ||
| 595 | + "analyzer": "czech" | ||
| 596 | + }, | ||
| 597 | + "da": { | ||
| 598 | + "type": "text", | ||
| 599 | + "analyzer": "danish" | ||
| 600 | + }, | ||
| 601 | + "nl": { | ||
| 602 | + "type": "text", | ||
| 603 | + "analyzer": "dutch" | ||
| 604 | + }, | ||
| 605 | + "en": { | ||
| 606 | + "type": "text", | ||
| 607 | + "analyzer": "english" | ||
| 608 | + }, | ||
| 609 | + "fi": { | ||
| 610 | + "type": "text", | ||
| 611 | + "analyzer": "finnish" | ||
| 612 | + }, | ||
| 613 | + "fr": { | ||
| 614 | + "type": "text", | ||
| 615 | + "analyzer": "french" | ||
| 616 | + }, | ||
| 617 | + "gl": { | ||
| 618 | + "type": "text", | ||
| 619 | + "analyzer": "galician" | ||
| 620 | + }, | ||
| 621 | + "de": { | ||
| 622 | + "type": "text", | ||
| 623 | + "analyzer": "german" | ||
| 624 | + }, | ||
| 625 | + "el": { | ||
| 626 | + "type": "text", | ||
| 627 | + "analyzer": "greek" | ||
| 628 | + }, | ||
| 629 | + "hi": { | ||
| 630 | + "type": "text", | ||
| 631 | + "analyzer": "hindi" | ||
| 632 | + }, | ||
| 633 | + "hu": { | ||
| 634 | + "type": "text", | ||
| 635 | + "analyzer": "hungarian" | ||
| 636 | + }, | ||
| 637 | + "id": { | ||
| 638 | + "type": "text", | ||
| 639 | + "analyzer": "indonesian" | ||
| 640 | + }, | ||
| 641 | + "it": { | ||
| 642 | + "type": "text", | ||
| 643 | + "analyzer": "italian" | ||
| 644 | + }, | ||
| 645 | + "no": { | ||
| 646 | + "type": "text", | ||
| 647 | + "analyzer": "norwegian" | ||
| 648 | + }, | ||
| 649 | + "fa": { | ||
| 650 | + "type": "text", | ||
| 651 | + "analyzer": "persian" | ||
| 652 | + }, | ||
| 653 | + "pt": { | ||
| 654 | + "type": "text", | ||
| 655 | + "analyzer": "portuguese" | ||
| 656 | + }, | ||
| 657 | + "ro": { | ||
| 658 | + "type": "text", | ||
| 659 | + "analyzer": "romanian" | ||
| 660 | + }, | ||
| 661 | + "ru": { | ||
| 662 | + "type": "text", | ||
| 663 | + "analyzer": "russian" | ||
| 664 | + }, | ||
| 665 | + "es": { | ||
| 666 | + "type": "text", | ||
| 667 | + "analyzer": "spanish" | ||
| 668 | + }, | ||
| 669 | + "sv": { | ||
| 670 | + "type": "text", | ||
| 671 | + "analyzer": "swedish" | ||
| 672 | + }, | ||
| 673 | + "tr": { | ||
| 674 | + "type": "text", | ||
| 675 | + "analyzer": "turkish" | ||
| 676 | + }, | ||
| 677 | + "th": { | ||
| 678 | + "type": "text", | ||
| 679 | + "analyzer": "thai" | ||
| 680 | + } | ||
| 681 | + } | ||
| 682 | + }, | ||
| 683 | + "vendor": { | ||
| 684 | + "type": "object", | ||
| 685 | + "properties": { | ||
| 686 | + "ar": { | ||
| 687 | + "type": "text", | ||
| 688 | + "analyzer": "arabic", | ||
| 689 | + "fields": { | ||
| 690 | + "keyword": { | ||
| 691 | + "type": "keyword", | ||
| 692 | + "normalizer": "lowercase" | ||
| 693 | + } | ||
| 694 | + } | ||
| 695 | + }, | ||
| 696 | + "hy": { | ||
| 697 | + "type": "text", | ||
| 698 | + "analyzer": "armenian", | ||
| 699 | + "fields": { | ||
| 700 | + "keyword": { | ||
| 701 | + "type": "keyword", | ||
| 702 | + "normalizer": "lowercase" | ||
| 703 | + } | ||
| 704 | + } | ||
| 705 | + }, | ||
| 706 | + "eu": { | ||
| 707 | + "type": "text", | ||
| 708 | + "analyzer": "basque", | ||
| 709 | + "fields": { | ||
| 710 | + "keyword": { | ||
| 711 | + "type": "keyword", | ||
| 712 | + "normalizer": "lowercase" | ||
| 713 | + } | ||
| 714 | + } | ||
| 715 | + }, | ||
| 716 | + "pt_br": { | ||
| 717 | + "type": "text", | ||
| 718 | + "analyzer": "brazilian", | ||
| 719 | + "fields": { | ||
| 720 | + "keyword": { | ||
| 721 | + "type": "keyword", | ||
| 722 | + "normalizer": "lowercase" | ||
| 723 | + } | ||
| 724 | + } | ||
| 725 | + }, | ||
| 726 | + "bg": { | ||
| 727 | + "type": "text", | ||
| 728 | + "analyzer": "bulgarian", | ||
| 729 | + "fields": { | ||
| 730 | + "keyword": { | ||
| 731 | + "type": "keyword", | ||
| 732 | + "normalizer": "lowercase" | ||
| 733 | + } | ||
| 734 | + } | ||
| 735 | + }, | ||
| 736 | + "ca": { | ||
| 737 | + "type": "text", | ||
| 738 | + "analyzer": "catalan", | ||
| 739 | + "fields": { | ||
| 740 | + "keyword": { | ||
| 741 | + "type": "keyword", | ||
| 742 | + "normalizer": "lowercase" | ||
| 743 | + } | ||
| 744 | + } | ||
| 745 | + }, | ||
| 746 | + "zh": { | ||
| 747 | + "type": "text", | ||
| 748 | + "analyzer": "icu_analyzer", | ||
| 749 | + "fields": { | ||
| 750 | + "keyword": { | ||
| 751 | + "type": "keyword", | ||
| 752 | + "normalizer": "lowercase" | ||
| 753 | + } | ||
| 754 | + } | ||
| 755 | + }, | ||
| 756 | + "chinese": { | ||
| 757 | + "type": "text", | ||
| 758 | + "analyzer": "chinese", | ||
| 759 | + "fields": { | ||
| 760 | + "keyword": { | ||
| 761 | + "type": "keyword", | ||
| 762 | + "normalizer": "lowercase" | ||
| 763 | + } | ||
| 764 | + } | ||
| 765 | + }, | ||
| 766 | + "cjk": { | ||
| 767 | + "type": "text", | ||
| 768 | + "analyzer": "cjk", | ||
| 769 | + "fields": { | ||
| 770 | + "keyword": { | ||
| 771 | + "type": "keyword", | ||
| 772 | + "normalizer": "lowercase" | ||
| 773 | + } | ||
| 774 | + } | ||
| 775 | + }, | ||
| 776 | + "cs": { | ||
| 777 | + "type": "text", | ||
| 778 | + "analyzer": "czech", | ||
| 779 | + "fields": { | ||
| 780 | + "keyword": { | ||
| 781 | + "type": "keyword", | ||
| 782 | + "normalizer": "lowercase" | ||
| 783 | + } | ||
| 784 | + } | ||
| 785 | + }, | ||
| 786 | + "da": { | ||
| 787 | + "type": "text", | ||
| 788 | + "analyzer": "danish", | ||
| 789 | + "fields": { | ||
| 790 | + "keyword": { | ||
| 791 | + "type": "keyword", | ||
| 792 | + "normalizer": "lowercase" | ||
| 793 | + } | ||
| 794 | + } | ||
| 795 | + }, | ||
| 796 | + "nl": { | ||
| 797 | + "type": "text", | ||
| 798 | + "analyzer": "dutch", | ||
| 799 | + "fields": { | ||
| 800 | + "keyword": { | ||
| 801 | + "type": "keyword", | ||
| 802 | + "normalizer": "lowercase" | ||
| 803 | + } | ||
| 804 | + } | ||
| 805 | + }, | ||
| 806 | + "en": { | ||
| 807 | + "type": "text", | ||
| 808 | + "analyzer": "english", | ||
| 809 | + "fields": { | ||
| 810 | + "keyword": { | ||
| 811 | + "type": "keyword", | ||
| 812 | + "normalizer": "lowercase" | ||
| 813 | + } | ||
| 814 | + } | ||
| 815 | + }, | ||
| 816 | + "fi": { | ||
| 817 | + "type": "text", | ||
| 818 | + "analyzer": "finnish", | ||
| 819 | + "fields": { | ||
| 820 | + "keyword": { | ||
| 821 | + "type": "keyword", | ||
| 822 | + "normalizer": "lowercase" | ||
| 823 | + } | ||
| 824 | + } | ||
| 825 | + }, | ||
| 826 | + "fr": { | ||
| 827 | + "type": "text", | ||
| 828 | + "analyzer": "french", | ||
| 829 | + "fields": { | ||
| 830 | + "keyword": { | ||
| 831 | + "type": "keyword", | ||
| 832 | + "normalizer": "lowercase" | ||
| 833 | + } | ||
| 834 | + } | ||
| 835 | + }, | ||
| 836 | + "gl": { | ||
| 837 | + "type": "text", | ||
| 838 | + "analyzer": "galician", | ||
| 839 | + "fields": { | ||
| 840 | + "keyword": { | ||
| 841 | + "type": "keyword", | ||
| 842 | + "normalizer": "lowercase" | ||
| 843 | + } | ||
| 844 | + } | ||
| 845 | + }, | ||
| 846 | + "de": { | ||
| 847 | + "type": "text", | ||
| 848 | + "analyzer": "german", | ||
| 849 | + "fields": { | ||
| 850 | + "keyword": { | ||
| 851 | + "type": "keyword", | ||
| 852 | + "normalizer": "lowercase" | ||
| 853 | + } | ||
| 854 | + } | ||
| 855 | + }, | ||
| 856 | + "el": { | ||
| 857 | + "type": "text", | ||
| 858 | + "analyzer": "greek", | ||
| 859 | + "fields": { | ||
| 860 | + "keyword": { | ||
| 861 | + "type": "keyword", | ||
| 862 | + "normalizer": "lowercase" | ||
| 863 | + } | ||
| 864 | + } | ||
| 865 | + }, | ||
| 866 | + "hi": { | ||
| 867 | + "type": "text", | ||
| 868 | + "analyzer": "hindi", | ||
| 869 | + "fields": { | ||
| 870 | + "keyword": { | ||
| 871 | + "type": "keyword", | ||
| 872 | + "normalizer": "lowercase" | ||
| 873 | + } | ||
| 874 | + } | ||
| 875 | + }, | ||
| 876 | + "hu": { | ||
| 877 | + "type": "text", | ||
| 878 | + "analyzer": "hungarian", | ||
| 879 | + "fields": { | ||
| 880 | + "keyword": { | ||
| 881 | + "type": "keyword", | ||
| 882 | + "normalizer": "lowercase" | ||
| 883 | + } | ||
| 884 | + } | ||
| 885 | + }, | ||
| 886 | + "id": { | ||
| 887 | + "type": "text", | ||
| 888 | + "analyzer": "indonesian", | ||
| 889 | + "fields": { | ||
| 890 | + "keyword": { | ||
| 891 | + "type": "keyword", | ||
| 892 | + "normalizer": "lowercase" | ||
| 893 | + } | ||
| 894 | + } | ||
| 895 | + }, | ||
| 896 | + "it": { | ||
| 897 | + "type": "text", | ||
| 898 | + "analyzer": "italian", | ||
| 899 | + "fields": { | ||
| 900 | + "keyword": { | ||
| 901 | + "type": "keyword", | ||
| 902 | + "normalizer": "lowercase" | ||
| 903 | + } | ||
| 904 | + } | ||
| 905 | + }, | ||
| 906 | + "no": { | ||
| 907 | + "type": "text", | ||
| 908 | + "analyzer": "norwegian", | ||
| 909 | + "fields": { | ||
| 910 | + "keyword": { | ||
| 911 | + "type": "keyword", | ||
| 912 | + "normalizer": "lowercase" | ||
| 913 | + } | ||
| 914 | + } | ||
| 915 | + }, | ||
| 916 | + "fa": { | ||
| 917 | + "type": "text", | ||
| 918 | + "analyzer": "persian", | ||
| 919 | + "fields": { | ||
| 920 | + "keyword": { | ||
| 921 | + "type": "keyword", | ||
| 922 | + "normalizer": "lowercase" | ||
| 923 | + } | ||
| 924 | + } | ||
| 925 | + }, | ||
| 926 | + "pt": { | ||
| 927 | + "type": "text", | ||
| 928 | + "analyzer": "portuguese", | ||
| 929 | + "fields": { | ||
| 930 | + "keyword": { | ||
| 931 | + "type": "keyword", | ||
| 932 | + "normalizer": "lowercase" | ||
| 933 | + } | ||
| 934 | + } | ||
| 935 | + }, | ||
| 936 | + "ro": { | ||
| 937 | + "type": "text", | ||
| 938 | + "analyzer": "romanian", | ||
| 939 | + "fields": { | ||
| 940 | + "keyword": { | ||
| 941 | + "type": "keyword", | ||
| 942 | + "normalizer": "lowercase" | ||
| 943 | + } | ||
| 944 | + } | ||
| 945 | + }, | ||
| 946 | + "ru": { | ||
| 947 | + "type": "text", | ||
| 948 | + "analyzer": "russian", | ||
| 949 | + "fields": { | ||
| 950 | + "keyword": { | ||
| 951 | + "type": "keyword", | ||
| 952 | + "normalizer": "lowercase" | ||
| 953 | + } | ||
| 954 | + } | ||
| 955 | + }, | ||
| 956 | + "es": { | ||
| 957 | + "type": "text", | ||
| 958 | + "analyzer": "spanish", | ||
| 959 | + "fields": { | ||
| 960 | + "keyword": { | ||
| 961 | + "type": "keyword", | ||
| 962 | + "normalizer": "lowercase" | ||
| 963 | + } | ||
| 964 | + } | ||
| 965 | + }, | ||
| 966 | + "sv": { | ||
| 967 | + "type": "text", | ||
| 968 | + "analyzer": "swedish", | ||
| 969 | + "fields": { | ||
| 970 | + "keyword": { | ||
| 971 | + "type": "keyword", | ||
| 972 | + "normalizer": "lowercase" | ||
| 973 | + } | ||
| 974 | + } | ||
| 975 | + }, | ||
| 976 | + "tr": { | ||
| 977 | + "type": "text", | ||
| 978 | + "analyzer": "turkish", | ||
| 979 | + "fields": { | ||
| 980 | + "keyword": { | ||
| 981 | + "type": "keyword", | ||
| 982 | + "normalizer": "lowercase" | ||
| 983 | + } | ||
| 984 | + } | ||
| 985 | + }, | ||
| 986 | + "th": { | ||
| 987 | + "type": "text", | ||
| 988 | + "analyzer": "thai", | ||
| 989 | + "fields": { | ||
| 990 | + "keyword": { | ||
| 991 | + "type": "keyword", | ||
| 992 | + "normalizer": "lowercase" | ||
| 993 | + } | ||
| 994 | + } | ||
| 117 | } | 995 | } |
| 118 | } | 996 | } |
| 119 | }, | 997 | }, |
| @@ -158,25 +1036,263 @@ | @@ -158,25 +1036,263 @@ | ||
| 158 | } | 1036 | } |
| 159 | } | 1037 | } |
| 160 | }, | 1038 | }, |
| 161 | - "category_path_zh": { | ||
| 162 | - "type": "text", | ||
| 163 | - "analyzer": "hanlp_index", | ||
| 164 | - "search_analyzer": "hanlp_standard" | ||
| 165 | - }, | ||
| 166 | - "category_path_en": { | ||
| 167 | - "type": "text", | ||
| 168 | - "analyzer": "english", | ||
| 169 | - "search_analyzer": "english" | ||
| 170 | - }, | ||
| 171 | - "category_name_zh": { | ||
| 172 | - "type": "text", | ||
| 173 | - "analyzer": "hanlp_index", | ||
| 174 | - "search_analyzer": "hanlp_standard" | 1039 | + "category_path": { |
| 1040 | + "type": "object", | ||
| 1041 | + "properties": { | ||
| 1042 | + "ar": { | ||
| 1043 | + "type": "text", | ||
| 1044 | + "analyzer": "arabic" | ||
| 1045 | + }, | ||
| 1046 | + "hy": { | ||
| 1047 | + "type": "text", | ||
| 1048 | + "analyzer": "armenian" | ||
| 1049 | + }, | ||
| 1050 | + "eu": { | ||
| 1051 | + "type": "text", | ||
| 1052 | + "analyzer": "basque" | ||
| 1053 | + }, | ||
| 1054 | + "pt_br": { | ||
| 1055 | + "type": "text", | ||
| 1056 | + "analyzer": "brazilian" | ||
| 1057 | + }, | ||
| 1058 | + "bg": { | ||
| 1059 | + "type": "text", | ||
| 1060 | + "analyzer": "bulgarian" | ||
| 1061 | + }, | ||
| 1062 | + "ca": { | ||
| 1063 | + "type": "text", | ||
| 1064 | + "analyzer": "catalan" | ||
| 1065 | + }, | ||
| 1066 | + "zh": { | ||
| 1067 | + "type": "text", | ||
| 1068 | + "analyzer": "icu_analyzer" | ||
| 1069 | + }, | ||
| 1070 | + "chinese": { | ||
| 1071 | + "type": "text", | ||
| 1072 | + "analyzer": "chinese" | ||
| 1073 | + }, | ||
| 1074 | + "cjk": { | ||
| 1075 | + "type": "text", | ||
| 1076 | + "analyzer": "cjk" | ||
| 1077 | + }, | ||
| 1078 | + "cs": { | ||
| 1079 | + "type": "text", | ||
| 1080 | + "analyzer": "czech" | ||
| 1081 | + }, | ||
| 1082 | + "da": { | ||
| 1083 | + "type": "text", | ||
| 1084 | + "analyzer": "danish" | ||
| 1085 | + }, | ||
| 1086 | + "nl": { | ||
| 1087 | + "type": "text", | ||
| 1088 | + "analyzer": "dutch" | ||
| 1089 | + }, | ||
| 1090 | + "en": { | ||
| 1091 | + "type": "text", | ||
| 1092 | + "analyzer": "english" | ||
| 1093 | + }, | ||
| 1094 | + "fi": { | ||
| 1095 | + "type": "text", | ||
| 1096 | + "analyzer": "finnish" | ||
| 1097 | + }, | ||
| 1098 | + "fr": { | ||
| 1099 | + "type": "text", | ||
| 1100 | + "analyzer": "french" | ||
| 1101 | + }, | ||
| 1102 | + "gl": { | ||
| 1103 | + "type": "text", | ||
| 1104 | + "analyzer": "galician" | ||
| 1105 | + }, | ||
| 1106 | + "de": { | ||
| 1107 | + "type": "text", | ||
| 1108 | + "analyzer": "german" | ||
| 1109 | + }, | ||
| 1110 | + "el": { | ||
| 1111 | + "type": "text", | ||
| 1112 | + "analyzer": "greek" | ||
| 1113 | + }, | ||
| 1114 | + "hi": { | ||
| 1115 | + "type": "text", | ||
| 1116 | + "analyzer": "hindi" | ||
| 1117 | + }, | ||
| 1118 | + "hu": { | ||
| 1119 | + "type": "text", | ||
| 1120 | + "analyzer": "hungarian" | ||
| 1121 | + }, | ||
| 1122 | + "id": { | ||
| 1123 | + "type": "text", | ||
| 1124 | + "analyzer": "indonesian" | ||
| 1125 | + }, | ||
| 1126 | + "it": { | ||
| 1127 | + "type": "text", | ||
| 1128 | + "analyzer": "italian" | ||
| 1129 | + }, | ||
| 1130 | + "no": { | ||
| 1131 | + "type": "text", | ||
| 1132 | + "analyzer": "norwegian" | ||
| 1133 | + }, | ||
| 1134 | + "fa": { | ||
| 1135 | + "type": "text", | ||
| 1136 | + "analyzer": "persian" | ||
| 1137 | + }, | ||
| 1138 | + "pt": { | ||
| 1139 | + "type": "text", | ||
| 1140 | + "analyzer": "portuguese" | ||
| 1141 | + }, | ||
| 1142 | + "ro": { | ||
| 1143 | + "type": "text", | ||
| 1144 | + "analyzer": "romanian" | ||
| 1145 | + }, | ||
| 1146 | + "ru": { | ||
| 1147 | + "type": "text", | ||
| 1148 | + "analyzer": "russian" | ||
| 1149 | + }, | ||
| 1150 | + "es": { | ||
| 1151 | + "type": "text", | ||
| 1152 | + "analyzer": "spanish" | ||
| 1153 | + }, | ||
| 1154 | + "sv": { | ||
| 1155 | + "type": "text", | ||
| 1156 | + "analyzer": "swedish" | ||
| 1157 | + }, | ||
| 1158 | + "tr": { | ||
| 1159 | + "type": "text", | ||
| 1160 | + "analyzer": "turkish" | ||
| 1161 | + }, | ||
| 1162 | + "th": { | ||
| 1163 | + "type": "text", | ||
| 1164 | + "analyzer": "thai" | ||
| 1165 | + } | ||
| 1166 | + } | ||
| 175 | }, | 1167 | }, |
| 176 | - "category_name_en": { | ||
| 177 | - "type": "text", | ||
| 178 | - "analyzer": "english", | ||
| 179 | - "search_analyzer": "english" | 1168 | + "category_name_text": { |
| 1169 | + "type": "object", | ||
| 1170 | + "properties": { | ||
| 1171 | + "ar": { | ||
| 1172 | + "type": "text", | ||
| 1173 | + "analyzer": "arabic" | ||
| 1174 | + }, | ||
| 1175 | + "hy": { | ||
| 1176 | + "type": "text", | ||
| 1177 | + "analyzer": "armenian" | ||
| 1178 | + }, | ||
| 1179 | + "eu": { | ||
| 1180 | + "type": "text", | ||
| 1181 | + "analyzer": "basque" | ||
| 1182 | + }, | ||
| 1183 | + "pt_br": { | ||
| 1184 | + "type": "text", | ||
| 1185 | + "analyzer": "brazilian" | ||
| 1186 | + }, | ||
| 1187 | + "bg": { | ||
| 1188 | + "type": "text", | ||
| 1189 | + "analyzer": "bulgarian" | ||
| 1190 | + }, | ||
| 1191 | + "ca": { | ||
| 1192 | + "type": "text", | ||
| 1193 | + "analyzer": "catalan" | ||
| 1194 | + }, | ||
| 1195 | + "zh": { | ||
| 1196 | + "type": "text", | ||
| 1197 | + "analyzer": "icu_analyzer" | ||
| 1198 | + }, | ||
| 1199 | + "chinese": { | ||
| 1200 | + "type": "text", | ||
| 1201 | + "analyzer": "chinese" | ||
| 1202 | + }, | ||
| 1203 | + "cjk": { | ||
| 1204 | + "type": "text", | ||
| 1205 | + "analyzer": "cjk" | ||
| 1206 | + }, | ||
| 1207 | + "cs": { | ||
| 1208 | + "type": "text", | ||
| 1209 | + "analyzer": "czech" | ||
| 1210 | + }, | ||
| 1211 | + "da": { | ||
| 1212 | + "type": "text", | ||
| 1213 | + "analyzer": "danish" | ||
| 1214 | + }, | ||
| 1215 | + "nl": { | ||
| 1216 | + "type": "text", | ||
| 1217 | + "analyzer": "dutch" | ||
| 1218 | + }, | ||
| 1219 | + "en": { | ||
| 1220 | + "type": "text", | ||
| 1221 | + "analyzer": "english" | ||
| 1222 | + }, | ||
| 1223 | + "fi": { | ||
| 1224 | + "type": "text", | ||
| 1225 | + "analyzer": "finnish" | ||
| 1226 | + }, | ||
| 1227 | + "fr": { | ||
| 1228 | + "type": "text", | ||
| 1229 | + "analyzer": "french" | ||
| 1230 | + }, | ||
| 1231 | + "gl": { | ||
| 1232 | + "type": "text", | ||
| 1233 | + "analyzer": "galician" | ||
| 1234 | + }, | ||
| 1235 | + "de": { | ||
| 1236 | + "type": "text", | ||
| 1237 | + "analyzer": "german" | ||
| 1238 | + }, | ||
| 1239 | + "el": { | ||
| 1240 | + "type": "text", | ||
| 1241 | + "analyzer": "greek" | ||
| 1242 | + }, | ||
| 1243 | + "hi": { | ||
| 1244 | + "type": "text", | ||
| 1245 | + "analyzer": "hindi" | ||
| 1246 | + }, | ||
| 1247 | + "hu": { | ||
| 1248 | + "type": "text", | ||
| 1249 | + "analyzer": "hungarian" | ||
| 1250 | + }, | ||
| 1251 | + "id": { | ||
| 1252 | + "type": "text", | ||
| 1253 | + "analyzer": "indonesian" | ||
| 1254 | + }, | ||
| 1255 | + "it": { | ||
| 1256 | + "type": "text", | ||
| 1257 | + "analyzer": "italian" | ||
| 1258 | + }, | ||
| 1259 | + "no": { | ||
| 1260 | + "type": "text", | ||
| 1261 | + "analyzer": "norwegian" | ||
| 1262 | + }, | ||
| 1263 | + "fa": { | ||
| 1264 | + "type": "text", | ||
| 1265 | + "analyzer": "persian" | ||
| 1266 | + }, | ||
| 1267 | + "pt": { | ||
| 1268 | + "type": "text", | ||
| 1269 | + "analyzer": "portuguese" | ||
| 1270 | + }, | ||
| 1271 | + "ro": { | ||
| 1272 | + "type": "text", | ||
| 1273 | + "analyzer": "romanian" | ||
| 1274 | + }, | ||
| 1275 | + "ru": { | ||
| 1276 | + "type": "text", | ||
| 1277 | + "analyzer": "russian" | ||
| 1278 | + }, | ||
| 1279 | + "es": { | ||
| 1280 | + "type": "text", | ||
| 1281 | + "analyzer": "spanish" | ||
| 1282 | + }, | ||
| 1283 | + "sv": { | ||
| 1284 | + "type": "text", | ||
| 1285 | + "analyzer": "swedish" | ||
| 1286 | + }, | ||
| 1287 | + "tr": { | ||
| 1288 | + "type": "text", | ||
| 1289 | + "analyzer": "turkish" | ||
| 1290 | + }, | ||
| 1291 | + "th": { | ||
| 1292 | + "type": "text", | ||
| 1293 | + "analyzer": "thai" | ||
| 1294 | + } | ||
| 1295 | + } | ||
| 180 | }, | 1296 | }, |
| 181 | "category_id": { | 1297 | "category_id": { |
| 182 | "type": "keyword" | 1298 | "type": "keyword" |
| @@ -294,4 +1410,3 @@ | @@ -294,4 +1410,3 @@ | ||
| 294 | } | 1410 | } |
| 295 | } | 1411 | } |
| 296 | } | 1412 | } |
| 297 | - |
scripts/check_es_data.py
| @@ -28,12 +28,12 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): | @@ -28,12 +28,12 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): | ||
| 28 | "size": size, | 28 | "size": size, |
| 29 | "_source": [ | 29 | "_source": [ |
| 30 | "spu_id", | 30 | "spu_id", |
| 31 | - "title_zh", | 31 | + "title", |
| 32 | "category1_name", | 32 | "category1_name", |
| 33 | "category2_name", | 33 | "category2_name", |
| 34 | "category3_name", | 34 | "category3_name", |
| 35 | "category_name", | 35 | "category_name", |
| 36 | - "category_path_zh", | 36 | + "category_path", |
| 37 | "specifications", | 37 | "specifications", |
| 38 | "option1_name", | 38 | "option1_name", |
| 39 | "option2_name", | 39 | "option2_name", |
| @@ -51,14 +51,16 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): | @@ -51,14 +51,16 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): | ||
| 51 | 51 | ||
| 52 | for i, hit in enumerate(hits, 1): | 52 | for i, hit in enumerate(hits, 1): |
| 53 | source = hit.get('_source', {}) | 53 | source = hit.get('_source', {}) |
| 54 | + title_obj = source.get("title") or {} | ||
| 55 | + category_path_obj = source.get("category_path") or {} | ||
| 54 | print(f"文档 {i}:") | 56 | print(f"文档 {i}:") |
| 55 | print(f" spu_id: {source.get('spu_id')}") | 57 | print(f" spu_id: {source.get('spu_id')}") |
| 56 | - print(f" title_zh: {source.get('title_zh', '')[:50]}") | 58 | + print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}") |
| 57 | print(f" category1_name: {source.get('category1_name')}") | 59 | print(f" category1_name: {source.get('category1_name')}") |
| 58 | print(f" category2_name: {source.get('category2_name')}") | 60 | print(f" category2_name: {source.get('category2_name')}") |
| 59 | print(f" category3_name: {source.get('category3_name')}") | 61 | print(f" category3_name: {source.get('category3_name')}") |
| 60 | print(f" category_name: {source.get('category_name')}") | 62 | print(f" category_name: {source.get('category_name')}") |
| 61 | - print(f" category_path_zh: {source.get('category_path_zh')}") | 63 | + print(f" category_path.zh: {category_path_obj.get('zh') if isinstance(category_path_obj, dict) else None}") |
| 62 | print(f" option1_name: {source.get('option1_name')}") | 64 | print(f" option1_name: {source.get('option1_name')}") |
| 63 | print(f" option2_name: {source.get('option2_name')}") | 65 | print(f" option2_name: {source.get('option2_name')}") |
| 64 | print(f" option3_name: {source.get('option3_name')}") | 66 | print(f" option3_name: {source.get('option3_name')}") |
scripts/check_index_mapping.py
| @@ -20,11 +20,19 @@ def check_field_mapping(mapping_dict, field_path): | @@ -20,11 +20,19 @@ def check_field_mapping(mapping_dict, field_path): | ||
| 20 | current = mapping_dict | 20 | current = mapping_dict |
| 21 | 21 | ||
| 22 | for part in parts: | 22 | for part in parts: |
| 23 | - if isinstance(current, dict): | ||
| 24 | - current = current.get(part) | ||
| 25 | - if current is None: | ||
| 26 | - return None | ||
| 27 | - else: | 23 | + if not isinstance(current, dict): |
| 24 | + return None | ||
| 25 | + | ||
| 26 | + # ES mapping nesting: object fields store subfields under "properties" | ||
| 27 | + if "properties" in current and isinstance(current["properties"], dict): | ||
| 28 | + current = current["properties"] | ||
| 29 | + | ||
| 30 | + # multi-fields store subfields under "fields" (e.g. vendor.zh.keyword) | ||
| 31 | + if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]: | ||
| 32 | + current = current["fields"] | ||
| 33 | + | ||
| 34 | + current = current.get(part) | ||
| 35 | + if current is None: | ||
| 28 | return None | 36 | return None |
| 29 | return current | 37 | return current |
| 30 | 38 | ||
| @@ -70,12 +78,13 @@ def main(): | @@ -70,12 +78,13 @@ def main(): | ||
| 70 | 78 | ||
| 71 | # 检查关键字段 | 79 | # 检查关键字段 |
| 72 | fields_to_check = [ | 80 | fields_to_check = [ |
| 73 | - 'title_zh', | ||
| 74 | - 'brief_zh', | ||
| 75 | - 'description_zh', | ||
| 76 | - 'vendor_zh', | ||
| 77 | - 'category_path_zh', | ||
| 78 | - 'category_name_zh' | 81 | + "title.zh", |
| 82 | + "brief.zh", | ||
| 83 | + "description.zh", | ||
| 84 | + "vendor.zh", | ||
| 85 | + "vendor.zh.keyword", | ||
| 86 | + "category_path.zh", | ||
| 87 | + "category_name_text.zh" | ||
| 79 | ] | 88 | ] |
| 80 | 89 | ||
| 81 | print("=" * 80) | 90 | print("=" * 80) |
| @@ -83,7 +92,7 @@ def main(): | @@ -83,7 +92,7 @@ def main(): | ||
| 83 | print("=" * 80) | 92 | print("=" * 80) |
| 84 | 93 | ||
| 85 | for field_name in fields_to_check: | 94 | for field_name in fields_to_check: |
| 86 | - field_mapping = index_mapping.get(field_name) | 95 | + field_mapping = check_field_mapping(index_mapping, field_name) |
| 87 | 96 | ||
| 88 | if field_mapping is None: | 97 | if field_mapping is None: |
| 89 | print(f"\n❌ {field_name}: 字段不存在") | 98 | print(f"\n❌ {field_name}: 字段不存在") |
search/es_query_builder.py
| @@ -367,37 +367,37 @@ class ESQueryBuilder: | @@ -367,37 +367,37 @@ class ESQueryBuilder: | ||
| 367 | """ | 367 | """ |
| 368 | if language == 'zh': | 368 | if language == 'zh': |
| 369 | all_fields = [ | 369 | all_fields = [ |
| 370 | - "title_zh^3.0", | ||
| 371 | - "brief_zh^1.5", | ||
| 372 | - "description_zh", | ||
| 373 | - "vendor_zh^1.5", | 370 | + "title.zh^3.0", |
| 371 | + "brief.zh^1.5", | ||
| 372 | + "description.zh", | ||
| 373 | + "vendor.zh^1.5", | ||
| 374 | "tags", | 374 | "tags", |
| 375 | - "category_path_zh^1.5", | ||
| 376 | - "category_name_zh^1.5", | 375 | + "category_path.zh^1.5", |
| 376 | + "category_name_text.zh^1.5", | ||
| 377 | "option1_values^0.5" | 377 | "option1_values^0.5" |
| 378 | ] | 378 | ] |
| 379 | core_fields = [ | 379 | core_fields = [ |
| 380 | - "title_zh^3.0", | ||
| 381 | - "brief_zh^1.5", | ||
| 382 | - "vendor_zh^1.5", | ||
| 383 | - "category_name_zh^1.5" | 380 | + "title.zh^3.0", |
| 381 | + "brief.zh^1.5", | ||
| 382 | + "vendor.zh^1.5", | ||
| 383 | + "category_name_text.zh^1.5" | ||
| 384 | ] | 384 | ] |
| 385 | else: # en | 385 | else: # en |
| 386 | all_fields = [ | 386 | all_fields = [ |
| 387 | - "title_en^3.0", | ||
| 388 | - "brief_en^1.5", | ||
| 389 | - "description_en", | ||
| 390 | - "vendor_en^1.5", | 387 | + "title.en^3.0", |
| 388 | + "brief.en^1.5", | ||
| 389 | + "description.en", | ||
| 390 | + "vendor.en^1.5", | ||
| 391 | "tags", | 391 | "tags", |
| 392 | - "category_path_en^1.5", | ||
| 393 | - "category_name_en^1.5", | 392 | + "category_path.en^1.5", |
| 393 | + "category_name_text.en^1.5", | ||
| 394 | "option1_values^0.5" | 394 | "option1_values^0.5" |
| 395 | ] | 395 | ] |
| 396 | core_fields = [ | 396 | core_fields = [ |
| 397 | - "title_en^3.0", | ||
| 398 | - "brief_en^1.5", | ||
| 399 | - "vendor_en^1.5", | ||
| 400 | - "category_name_en^1.5" | 397 | + "title.en^3.0", |
| 398 | + "brief.en^1.5", | ||
| 399 | + "vendor.en^1.5", | ||
| 400 | + "category_name_text.en^1.5" | ||
| 401 | ] | 401 | ] |
| 402 | return all_fields, core_fields | 402 | return all_fields, core_fields |
| 403 | 403 |
tests/conftest.py
| @@ -28,7 +28,7 @@ def sample_index_config() -> IndexConfig: | @@ -28,7 +28,7 @@ def sample_index_config() -> IndexConfig: | ||
| 28 | return IndexConfig( | 28 | return IndexConfig( |
| 29 | name="default", | 29 | name="default", |
| 30 | label="默认索引", | 30 | label="默认索引", |
| 31 | - fields=["title_zh", "brief_zh", "tags"], | 31 | + fields=["title.zh", "brief.zh", "tags"], |
| 32 | boost=1.0 | 32 | boost=1.0 |
| 33 | ) | 33 | ) |
| 34 | 34 | ||
| @@ -60,10 +60,10 @@ def sample_search_config(sample_index_config) -> SearchConfig: | @@ -60,10 +60,10 @@ def sample_search_config(sample_index_config) -> SearchConfig: | ||
| 60 | es_index_name="test_products", | 60 | es_index_name="test_products", |
| 61 | field_boosts={ | 61 | field_boosts={ |
| 62 | "tenant_id": 1.0, | 62 | "tenant_id": 1.0, |
| 63 | - "title_zh": 3.0, | ||
| 64 | - "brief_zh": 1.5, | 63 | + "title.zh": 3.0, |
| 64 | + "brief.zh": 1.5, | ||
| 65 | "tags": 1.0, | 65 | "tags": 1.0, |
| 66 | - "category_path_zh": 1.5, | 66 | + "category_path.zh": 1.5, |
| 67 | }, | 67 | }, |
| 68 | indexes=[sample_index_config], | 68 | indexes=[sample_index_config], |
| 69 | query_config=query_config, | 69 | query_config=query_config, |
| @@ -89,8 +89,8 @@ def mock_es_client() -> Mock: | @@ -89,8 +89,8 @@ def mock_es_client() -> Mock: | ||
| 89 | "_id": "1", | 89 | "_id": "1", |
| 90 | "_score": 2.5, | 90 | "_score": 2.5, |
| 91 | "_source": { | 91 | "_source": { |
| 92 | - "title_zh": "红色连衣裙", | ||
| 93 | - "vendor_zh": "测试品牌", | 92 | + "title": {"zh": "红色连衣裙"}, |
| 93 | + "vendor": {"zh": "测试品牌"}, | ||
| 94 | "min_price": 299.0, | 94 | "min_price": 299.0, |
| 95 | "category_id": "1" | 95 | "category_id": "1" |
| 96 | } | 96 | } |
| @@ -99,8 +99,8 @@ def mock_es_client() -> Mock: | @@ -99,8 +99,8 @@ def mock_es_client() -> Mock: | ||
| 99 | "_id": "2", | 99 | "_id": "2", |
| 100 | "_score": 2.2, | 100 | "_score": 2.2, |
| 101 | "_source": { | 101 | "_source": { |
| 102 | - "title_zh": "蓝色连衣裙", | ||
| 103 | - "vendor_zh": "测试品牌", | 102 | + "title": {"zh": "蓝色连衣裙"}, |
| 103 | + "vendor": {"zh": "测试品牌"}, | ||
| 104 | "min_price": 399.0, | 104 | "min_price": 399.0, |
| 105 | "category_id": "1" | 105 | "category_id": "1" |
| 106 | } | 106 | } |
| @@ -142,8 +142,8 @@ def sample_search_results() -> Dict[str, Any]: | @@ -142,8 +142,8 @@ def sample_search_results() -> Dict[str, Any]: | ||
| 142 | "query": "红色连衣裙", | 142 | "query": "红色连衣裙", |
| 143 | "expected_total": 2, | 143 | "expected_total": 2, |
| 144 | "expected_products": [ | 144 | "expected_products": [ |
| 145 | - {"title_zh": "红色连衣裙", "min_price": 299.0}, | ||
| 146 | - {"title_zh": "蓝色连衣裙", "min_price": 399.0} | 145 | + {"title": "红色连衣裙", "min_price": 299.0}, |
| 146 | + {"title": "蓝色连衣裙", "min_price": 399.0} | ||
| 147 | ] | 147 | ] |
| 148 | } | 148 | } |
| 149 | 149 | ||
| @@ -157,16 +157,16 @@ def temp_config_file() -> Generator[str, None, None]: | @@ -157,16 +157,16 @@ def temp_config_file() -> Generator[str, None, None]: | ||
| 157 | config_data = { | 157 | config_data = { |
| 158 | "es_index_name": "test_products", | 158 | "es_index_name": "test_products", |
| 159 | "field_boosts": { | 159 | "field_boosts": { |
| 160 | - "title_zh": 3.0, | ||
| 161 | - "brief_zh": 1.5, | 160 | + "title.zh": 3.0, |
| 161 | + "brief.zh": 1.5, | ||
| 162 | "tags": 1.0, | 162 | "tags": 1.0, |
| 163 | - "category_path_zh": 1.5 | 163 | + "category_path.zh": 1.5 |
| 164 | }, | 164 | }, |
| 165 | "indexes": [ | 165 | "indexes": [ |
| 166 | { | 166 | { |
| 167 | "name": "default", | 167 | "name": "default", |
| 168 | "label": "默认索引", | 168 | "label": "默认索引", |
| 169 | - "fields": ["title_zh", "brief_zh", "tags"], | 169 | + "fields": ["title.zh", "brief.zh", "tags"], |
| 170 | "boost": 1.0 | 170 | "boost": 1.0 |
| 171 | } | 171 | } |
| 172 | ], | 172 | ], |