5ac64fc7
tangwang
多语言查询
|
1
2
3
4
5
6
7
8
9
10
|
#!/usr/bin/env python3
"""
检查ES索引的实际映射配置,特别是中文字段的analyzer设置
"""
import os
import sys
import json
from pathlib import Path
|
32e9b30c
tangwang
scripts/ 根目录主要保留启...
|
11
|
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
5ac64fc7
tangwang
多语言查询
|
12
13
|
from utils.es_client import get_es_client_from_env
|
80f87e57
tangwang
多语言索引修改 对应的 索引创建、...
|
14
|
from indexer.mapping_generator import get_tenant_index_name
|
5ac64fc7
tangwang
多语言查询
|
15
16
17
18
19
20
21
22
|
def check_field_mapping(mapping_dict, field_path):
"""递归查找字段映射"""
parts = field_path.split('.')
current = mapping_dict
for part in parts:
|
d7d48f52
tangwang
改动(mapping + 灌入结构)
|
23
24
25
26
27
28
29
30
31
32
33
34
35
|
if not isinstance(current, dict):
return None
# ES mapping nesting: object fields store subfields under "properties"
if "properties" in current and isinstance(current["properties"], dict):
current = current["properties"]
# multi-fields store subfields under "fields" (e.g. vendor.zh.keyword)
if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]:
current = current["fields"]
current = current.get(part)
if current is None:
|
5ac64fc7
tangwang
多语言查询
|
36
37
38
39
40
|
return None
return current
def main():
|
80f87e57
tangwang
多语言索引修改 对应的 索引创建、...
|
41
42
43
44
45
46
|
import argparse
parser = argparse.ArgumentParser(description="检查 Elasticsearch 索引实际映射配置")
parser.add_argument("--tenant-id", type=str, required=True, help="租户ID")
args = parser.parse_args()
|
5ac64fc7
tangwang
多语言查询
|
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
print("=" * 80)
print("检查 Elasticsearch 索引实际映射配置")
print("=" * 80)
# 连接ES
try:
es_client = get_es_client_from_env()
if not es_client.ping():
print("✗ 无法连接到 Elasticsearch")
return 1
print("✓ Elasticsearch 连接成功\n")
except Exception as e:
print(f"✗ 连接 Elasticsearch 失败: {e}")
return 1
|
80f87e57
tangwang
多语言索引修改 对应的 索引创建、...
|
62
|
index_name = get_tenant_index_name(args.tenant_id)
|
5ac64fc7
tangwang
多语言查询
|
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# 检查索引是否存在
if not es_client.index_exists(index_name):
print(f"✗ 索引 '{index_name}' 不存在")
return 1
# 获取实际映射
print(f"获取索引 '{index_name}' 的映射配置...\n")
mapping = es_client.get_mapping(index_name)
if not mapping:
print("✗ 无法获取索引映射")
return 1
# 提取实际映射结构
# ES返回格式: {index_name: {mappings: {properties: {...}}}}
index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {})
if not index_mapping:
print("✗ 无法解析映射结构")
return 1
# 检查关键字段
fields_to_check = [
|
d7d48f52
tangwang
改动(mapping + 灌入结构)
|
87
88
89
90
91
92
93
|
"title.zh",
"brief.zh",
"description.zh",
"vendor.zh",
"vendor.zh.keyword",
"category_path.zh",
"category_name_text.zh"
|
5ac64fc7
tangwang
多语言查询
|
94
95
96
97
98
99
100
|
]
print("=" * 80)
print("中文字段实际映射配置")
print("=" * 80)
for field_name in fields_to_check:
|
d7d48f52
tangwang
改动(mapping + 灌入结构)
|
101
|
field_mapping = check_field_mapping(index_mapping, field_name)
|
5ac64fc7
tangwang
多语言查询
|
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
if field_mapping is None:
print(f"\n❌ {field_name}: 字段不存在")
continue
print(f"\n📋 {field_name}:")
print(f" 类型: {field_mapping.get('type', 'N/A')}")
analyzer = field_mapping.get('analyzer')
search_analyzer = field_mapping.get('search_analyzer')
if analyzer:
print(f" 索引分析器 (analyzer): {analyzer}")
else:
print(f" 索引分析器 (analyzer): 未设置(使用默认)")
if search_analyzer:
print(f" 查询分析器 (search_analyzer): {search_analyzer}")
else:
print(f" 查询分析器 (search_analyzer): 未设置(使用analyzer或默认)")
# 检查是否有子字段
if 'fields' in field_mapping:
print(f" 子字段:")
for sub_field, sub_mapping in field_mapping['fields'].items():
print(f" - {sub_field}: {sub_mapping.get('type', 'N/A')}")
if 'normalizer' in sub_mapping:
print(f" normalizer: {sub_mapping['normalizer']}")
# 获取settings中的analyzer定义
print("\n" + "=" * 80)
print("索引 Settings 中的 Analyzer 定义")
print("=" * 80)
try:
settings = es_client.client.indices.get_settings(index=index_name)
index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {})
analysis = index_settings.get('analysis', {})
analyzers = analysis.get('analyzer', {})
if analyzers:
print("\n定义的 Analyzer:")
for analyzer_name, analyzer_config in analyzers.items():
print(f"\n {analyzer_name}:")
if isinstance(analyzer_config, dict):
print(f" 类型: {analyzer_config.get('type', 'N/A')}")
if 'tokenizer' in analyzer_config:
print(f" tokenizer: {analyzer_config['tokenizer']}")
if 'filter' in analyzer_config:
print(f" filter: {analyzer_config['filter']}")
else:
print(f" 配置: {analyzer_config}")
else:
print("\n⚠ 未找到自定义 analyzer 定义")
except Exception as e:
print(f"\n⚠ 无法获取 settings: {e}")
print("\n" + "=" * 80)
print("检查完成")
print("=" * 80)
return 0
if __name__ == '__main__':
sys.exit(main())
|