diff --git a/config/env_config.py b/config/env_config.py index fb4afab..837201d 100644 --- a/config/env_config.py +++ b/config/env_config.py @@ -31,8 +31,8 @@ REDIS_CONFIG = { 'socket_timeout': int(os.getenv('REDIS_SOCKET_TIMEOUT', 1)), 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)), 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true', - 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 180)), # 6 months - 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360)), + 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months + 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)), 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'), } diff --git a/docs/常用查询 - ES.md b/docs/常用查询 - ES.md index 6fc1486..3e20d15 100644 --- a/docs/常用查询 - ES.md +++ b/docs/常用查询 - ES.md @@ -1,24 +1,220 @@ -GET /search_products/_search -{ - "query": { - "term": { - "tenant_id": "2" - } - } -} +# ====================================== +# 租户相关 +# ====================================== + +### 1. 根据 tenant_id / spu_id 查询 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ - "size": 5, + "size": 1, "query": { "bool": { "filter": [ - { "term": { "tenant_id": "162" } } + { "term": { "tenant_id": "170" } } ] } } }' +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ + "size": 1, + "_source": ["_id", "*"], + "query": { + "bool": { + "must": [ + { + "match": { + "title_zh": { + "query": "裙子" + } + } + } + ], + "filter": [ + { "term": { "tenant_id": "170" } } + ] + } + } +}' + +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ + "analyzer": "query_ansj", + "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" +}' + +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ + "analyzer": "hanlp_standard", + "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" +}' + +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ + "size": 100, + "from": 0, + "query": { + "bool": { + "must": [ + { + "multi_match": { + "_name": "base_query", + "fields": [ + "title_zh^3.0", + "brief_zh^1.5", + "description_zh", + "vendor_zh^1.5", + "tags", + "category_path_zh^1.5", + "category_name_zh^1.5", + "option1_values^0.5" + ], + "minimum_should_match": "75%", + "operator": "AND", + "query": "裙", + "tie_breaker": 0.9 + } + } + ], + "filter": [ + { + "term": { + "tenant_id": "170" + } + } + ] + } + } +}' + +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ + "size": 1, + "from": 0, + "query": { + "bool": { + "must": [ + { + "multi_match": { + "_name": "base_query", + "fields": [ + "title_zh^3.0", + "brief_zh^1.5", + "description_zh", + "vendor_zh^1.5", + "tags", + "category_path_zh^1.5", + "category_name_zh^1.5", + "option1_values^0.5" + ], + "minimum_should_match": "75%", + "operator": "AND", + "query": "裙", + "tie_breaker": 0.9 + } + } + ], + "filter": [ + { + "term": { + "tenant_id": "170" + } + } + ] + } + }, + "aggs": { + "category1_name_facet": { + "terms": { + "field": "category1_name", + "size": 15, + "order": { + "_count": "desc" + } + } + }, + "specifications_color_facet": { + "nested": { + "path": "specifications" + }, + "aggs": { + "filter_by_name": { + "filter": { + "term": { + "specifications.name": "color" + } + }, + "aggs": { + "value_counts": { + "terms": { + "field": "specifications.value", + "size": 20, + "order": { + "_count": "desc" + } + } + } + } + } + } + }, + "specifications_size_facet": { + "nested": { + "path": "specifications" + }, + "aggs": { + "filter_by_name": { + "filter": { + "term": { + "specifications.name": "size" + } + }, + "aggs": { + "value_counts": { + "terms": { + "field": "specifications.value", + "size": 15, + "order": { + "_count": "desc" + } + } + } + } + } + } + }, + "specifications_material_facet": { + "nested": { + "path": "specifications" + }, + "aggs": { + "filter_by_name": { + "filter": { + "term": { + "specifications.name": "material" + } + }, + "aggs": { + "value_counts": { + "terms": { + "field": "specifications.value", + "size": 10, + "order": { + "_count": "desc" + } + } + } + } + } + } + } + } +}' + +GET /search_products/_search +{ + "query": { + "term": { + "tenant_id": "2" + } + } +} curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ @@ -33,6 +229,16 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ }' +### 2. 统计租户的总文档数 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ + "query": { + "term": { + "tenant_id": "162" + } + } +}' + + # ====================================== # 分面数据诊断相关查询 # ====================================== @@ -359,14 +565,6 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c } }' -### 4.3 统计租户的总文档数 -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ - "query": { - "term": { - "tenant_id": "162" - } - } -}' ## 5. 诊断问题场景 diff --git a/docs/系统设计文档.md b/docs/系统设计文档.md index a06f5d6..3aeaeb4 100644 --- a/docs/系统设计文档.md +++ b/docs/系统设计文档.md @@ -379,7 +379,6 @@ query_config: - "en" - "ru" default_language: "zh" - enable_translation: true translation_service: "deepl" translation_api_key: null # 通过环境变量设置 ``` diff --git a/docs/系统设计文档v1.md b/docs/系统设计文档v1.md index 076dddc..63dfd61 100644 --- a/docs/系统设计文档v1.md +++ b/docs/系统设计文档v1.md @@ -327,7 +327,6 @@ query_config: - "en" - "ru" default_language: "zh" - enable_translation: true translation_service: "deepl" translation_api_key: null # 通过环境变量设置 ``` diff --git a/scripts/check_index_mapping.py b/scripts/check_index_mapping.py new file mode 100644 index 0000000..3640633 --- /dev/null +++ b/scripts/check_index_mapping.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +""" +检查ES索引的实际映射配置,特别是中文字段的analyzer设置 +""" + +import os +import sys +import json +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.es_client import get_es_client_from_env +from indexer.mapping_generator import DEFAULT_INDEX_NAME + + +def check_field_mapping(mapping_dict, field_path): + """递归查找字段映射""" + parts = field_path.split('.') + current = mapping_dict + + for part in parts: + if isinstance(current, dict): + current = current.get(part) + if current is None: + return None + else: + return None + return current + + +def main(): + print("=" * 80) + print("检查 Elasticsearch 索引实际映射配置") + print("=" * 80) + + # 连接ES + try: + es_client = get_es_client_from_env() + if not es_client.ping(): + print("✗ 无法连接到 Elasticsearch") + return 1 + print("✓ Elasticsearch 连接成功\n") + except Exception as e: + print(f"✗ 连接 Elasticsearch 失败: {e}") + return 1 + + index_name = DEFAULT_INDEX_NAME + + # 检查索引是否存在 + if not es_client.index_exists(index_name): + print(f"✗ 索引 '{index_name}' 不存在") + return 1 + + # 获取实际映射 + print(f"获取索引 '{index_name}' 的映射配置...\n") + mapping = es_client.get_mapping(index_name) + + if not mapping: + print("✗ 无法获取索引映射") + return 1 + + # 提取实际映射结构 + # ES返回格式: {index_name: {mappings: {properties: {...}}}} + index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {}) + + if not index_mapping: + print("✗ 无法解析映射结构") + return 1 + + # 检查关键字段 + fields_to_check = [ + 'title_zh', + 'brief_zh', + 'description_zh', + 'vendor_zh', + 'category_path_zh', + 'category_name_zh' + ] + + print("=" * 80) + print("中文字段实际映射配置") + print("=" * 80) + + for field_name in fields_to_check: + field_mapping = index_mapping.get(field_name) + + if field_mapping is None: + print(f"\n❌ {field_name}: 字段不存在") + continue + + print(f"\n📋 {field_name}:") + print(f" 类型: {field_mapping.get('type', 'N/A')}") + + analyzer = field_mapping.get('analyzer') + search_analyzer = field_mapping.get('search_analyzer') + + if analyzer: + print(f" 索引分析器 (analyzer): {analyzer}") + else: + print(f" 索引分析器 (analyzer): 未设置(使用默认)") + + if search_analyzer: + print(f" 查询分析器 (search_analyzer): {search_analyzer}") + else: + print(f" 查询分析器 (search_analyzer): 未设置(使用analyzer或默认)") + + # 检查是否有子字段 + if 'fields' in field_mapping: + print(f" 子字段:") + for sub_field, sub_mapping in field_mapping['fields'].items(): + print(f" - {sub_field}: {sub_mapping.get('type', 'N/A')}") + if 'normalizer' in sub_mapping: + print(f" normalizer: {sub_mapping['normalizer']}") + + # 获取settings中的analyzer定义 + print("\n" + "=" * 80) + print("索引 Settings 中的 Analyzer 定义") + print("=" * 80) + + try: + settings = es_client.client.indices.get_settings(index=index_name) + index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {}) + analysis = index_settings.get('analysis', {}) + analyzers = analysis.get('analyzer', {}) + + if analyzers: + print("\n定义的 Analyzer:") + for analyzer_name, analyzer_config in analyzers.items(): + print(f"\n {analyzer_name}:") + if isinstance(analyzer_config, dict): + print(f" 类型: {analyzer_config.get('type', 'N/A')}") + if 'tokenizer' in analyzer_config: + print(f" tokenizer: {analyzer_config['tokenizer']}") + if 'filter' in analyzer_config: + print(f" filter: {analyzer_config['filter']}") + else: + print(f" 配置: {analyzer_config}") + else: + print("\n⚠ 未找到自定义 analyzer 定义") + + except Exception as e: + print(f"\n⚠ 无法获取 settings: {e}") + + print("\n" + "=" * 80) + print("检查完成") + print("=" * 80) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) -- libgit2 0.21.2