#!/usr/bin/env python3 """ 检查ES索引的实际映射配置,特别是中文字段的analyzer设置 """ import os import sys import json from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from utils.es_client import get_es_client_from_env from indexer.mapping_generator import DEFAULT_INDEX_NAME def check_field_mapping(mapping_dict, field_path): """递归查找字段映射""" parts = field_path.split('.') current = mapping_dict for part in parts: if isinstance(current, dict): current = current.get(part) if current is None: return None else: return None return current def main(): print("=" * 80) print("检查 Elasticsearch 索引实际映射配置") print("=" * 80) # 连接ES try: es_client = get_es_client_from_env() if not es_client.ping(): print("✗ 无法连接到 Elasticsearch") return 1 print("✓ Elasticsearch 连接成功\n") except Exception as e: print(f"✗ 连接 Elasticsearch 失败: {e}") return 1 index_name = DEFAULT_INDEX_NAME # 检查索引是否存在 if not es_client.index_exists(index_name): print(f"✗ 索引 '{index_name}' 不存在") return 1 # 获取实际映射 print(f"获取索引 '{index_name}' 的映射配置...\n") mapping = es_client.get_mapping(index_name) if not mapping: print("✗ 无法获取索引映射") return 1 # 提取实际映射结构 # ES返回格式: {index_name: {mappings: {properties: {...}}}} index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {}) if not index_mapping: print("✗ 无法解析映射结构") return 1 # 检查关键字段 fields_to_check = [ 'title_zh', 'brief_zh', 'description_zh', 'vendor_zh', 'category_path_zh', 'category_name_zh' ] print("=" * 80) print("中文字段实际映射配置") print("=" * 80) for field_name in fields_to_check: field_mapping = index_mapping.get(field_name) if field_mapping is None: print(f"\n❌ {field_name}: 字段不存在") continue print(f"\n📋 {field_name}:") print(f" 类型: {field_mapping.get('type', 'N/A')}") analyzer = field_mapping.get('analyzer') search_analyzer = field_mapping.get('search_analyzer') if analyzer: print(f" 索引分析器 (analyzer): {analyzer}") else: print(f" 索引分析器 (analyzer): 未设置(使用默认)") if search_analyzer: print(f" 查询分析器 (search_analyzer): {search_analyzer}") else: print(f" 查询分析器 (search_analyzer): 未设置(使用analyzer或默认)") # 检查是否有子字段 if 'fields' in field_mapping: print(f" 子字段:") for sub_field, sub_mapping in field_mapping['fields'].items(): print(f" - {sub_field}: {sub_mapping.get('type', 'N/A')}") if 'normalizer' in sub_mapping: print(f" normalizer: {sub_mapping['normalizer']}") # 获取settings中的analyzer定义 print("\n" + "=" * 80) print("索引 Settings 中的 Analyzer 定义") print("=" * 80) try: settings = es_client.client.indices.get_settings(index=index_name) index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {}) analysis = index_settings.get('analysis', {}) analyzers = analysis.get('analyzer', {}) if analyzers: print("\n定义的 Analyzer:") for analyzer_name, analyzer_config in analyzers.items(): print(f"\n {analyzer_name}:") if isinstance(analyzer_config, dict): print(f" 类型: {analyzer_config.get('type', 'N/A')}") if 'tokenizer' in analyzer_config: print(f" tokenizer: {analyzer_config['tokenizer']}") if 'filter' in analyzer_config: print(f" filter: {analyzer_config['filter']}") else: print(f" 配置: {analyzer_config}") else: print("\n⚠ 未找到自定义 analyzer 定义") except Exception as e: print(f"\n⚠ 无法获取 settings: {e}") print("\n" + "=" * 80) print("检查完成") print("=" * 80) return 0 if __name__ == '__main__': sys.exit(main())