Commit 5ac64fc7ffdce90b1ec0533f1aac8b1e22553a39
1 parent
351a7eb5
多语言查询
Showing
5 changed files
with
371 additions
and
22 deletions
Show diff stats
config/env_config.py
| @@ -31,8 +31,8 @@ REDIS_CONFIG = { | @@ -31,8 +31,8 @@ REDIS_CONFIG = { | ||
| 31 | 'socket_timeout': int(os.getenv('REDIS_SOCKET_TIMEOUT', 1)), | 31 | 'socket_timeout': int(os.getenv('REDIS_SOCKET_TIMEOUT', 1)), |
| 32 | 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)), | 32 | 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)), |
| 33 | 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true', | 33 | 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true', |
| 34 | - 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 180)), # 6 months | ||
| 35 | - 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360)), | 34 | + 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months |
| 35 | + 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)), | ||
| 36 | 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'), | 36 | 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'), |
| 37 | } | 37 | } |
| 38 | 38 |
docs/常用查询 - ES.md
| 1 | -GET /search_products/_search | ||
| 2 | -{ | ||
| 3 | - "query": { | ||
| 4 | - "term": { | ||
| 5 | - "tenant_id": "2" | ||
| 6 | - } | ||
| 7 | - } | ||
| 8 | -} | ||
| 9 | 1 | ||
| 10 | 2 | ||
| 3 | +# ====================================== | ||
| 4 | +# 租户相关 | ||
| 5 | +# ====================================== | ||
| 6 | + | ||
| 7 | +### 1. 根据 tenant_id / spu_id 查询 | ||
| 11 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 8 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 12 | - "size": 5, | 9 | + "size": 1, |
| 13 | "query": { | 10 | "query": { |
| 14 | "bool": { | 11 | "bool": { |
| 15 | "filter": [ | 12 | "filter": [ |
| 16 | - { "term": { "tenant_id": "162" } } | 13 | + { "term": { "tenant_id": "170" } } |
| 17 | ] | 14 | ] |
| 18 | } | 15 | } |
| 19 | } | 16 | } |
| 20 | }' | 17 | }' |
| 21 | 18 | ||
| 19 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 20 | + "size": 1, | ||
| 21 | + "_source": ["_id", "*"], | ||
| 22 | + "query": { | ||
| 23 | + "bool": { | ||
| 24 | + "must": [ | ||
| 25 | + { | ||
| 26 | + "match": { | ||
| 27 | + "title_zh": { | ||
| 28 | + "query": "裙子" | ||
| 29 | + } | ||
| 30 | + } | ||
| 31 | + } | ||
| 32 | + ], | ||
| 33 | + "filter": [ | ||
| 34 | + { "term": { "tenant_id": "170" } } | ||
| 35 | + ] | ||
| 36 | + } | ||
| 37 | + } | ||
| 38 | +}' | ||
| 39 | + | ||
| 40 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ | ||
| 41 | + "analyzer": "query_ansj", | ||
| 42 | + "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" | ||
| 43 | +}' | ||
| 44 | + | ||
| 45 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ | ||
| 46 | + "analyzer": "hanlp_standard", | ||
| 47 | + "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" | ||
| 48 | +}' | ||
| 49 | + | ||
| 50 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 51 | + "size": 100, | ||
| 52 | + "from": 0, | ||
| 53 | + "query": { | ||
| 54 | + "bool": { | ||
| 55 | + "must": [ | ||
| 56 | + { | ||
| 57 | + "multi_match": { | ||
| 58 | + "_name": "base_query", | ||
| 59 | + "fields": [ | ||
| 60 | + "title_zh^3.0", | ||
| 61 | + "brief_zh^1.5", | ||
| 62 | + "description_zh", | ||
| 63 | + "vendor_zh^1.5", | ||
| 64 | + "tags", | ||
| 65 | + "category_path_zh^1.5", | ||
| 66 | + "category_name_zh^1.5", | ||
| 67 | + "option1_values^0.5" | ||
| 68 | + ], | ||
| 69 | + "minimum_should_match": "75%", | ||
| 70 | + "operator": "AND", | ||
| 71 | + "query": "裙", | ||
| 72 | + "tie_breaker": 0.9 | ||
| 73 | + } | ||
| 74 | + } | ||
| 75 | + ], | ||
| 76 | + "filter": [ | ||
| 77 | + { | ||
| 78 | + "term": { | ||
| 79 | + "tenant_id": "170" | ||
| 80 | + } | ||
| 81 | + } | ||
| 82 | + ] | ||
| 83 | + } | ||
| 84 | + } | ||
| 85 | +}' | ||
| 86 | + | ||
| 87 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 88 | + "size": 1, | ||
| 89 | + "from": 0, | ||
| 90 | + "query": { | ||
| 91 | + "bool": { | ||
| 92 | + "must": [ | ||
| 93 | + { | ||
| 94 | + "multi_match": { | ||
| 95 | + "_name": "base_query", | ||
| 96 | + "fields": [ | ||
| 97 | + "title_zh^3.0", | ||
| 98 | + "brief_zh^1.5", | ||
| 99 | + "description_zh", | ||
| 100 | + "vendor_zh^1.5", | ||
| 101 | + "tags", | ||
| 102 | + "category_path_zh^1.5", | ||
| 103 | + "category_name_zh^1.5", | ||
| 104 | + "option1_values^0.5" | ||
| 105 | + ], | ||
| 106 | + "minimum_should_match": "75%", | ||
| 107 | + "operator": "AND", | ||
| 108 | + "query": "裙", | ||
| 109 | + "tie_breaker": 0.9 | ||
| 110 | + } | ||
| 111 | + } | ||
| 112 | + ], | ||
| 113 | + "filter": [ | ||
| 114 | + { | ||
| 115 | + "term": { | ||
| 116 | + "tenant_id": "170" | ||
| 117 | + } | ||
| 118 | + } | ||
| 119 | + ] | ||
| 120 | + } | ||
| 121 | + }, | ||
| 122 | + "aggs": { | ||
| 123 | + "category1_name_facet": { | ||
| 124 | + "terms": { | ||
| 125 | + "field": "category1_name", | ||
| 126 | + "size": 15, | ||
| 127 | + "order": { | ||
| 128 | + "_count": "desc" | ||
| 129 | + } | ||
| 130 | + } | ||
| 131 | + }, | ||
| 132 | + "specifications_color_facet": { | ||
| 133 | + "nested": { | ||
| 134 | + "path": "specifications" | ||
| 135 | + }, | ||
| 136 | + "aggs": { | ||
| 137 | + "filter_by_name": { | ||
| 138 | + "filter": { | ||
| 139 | + "term": { | ||
| 140 | + "specifications.name": "color" | ||
| 141 | + } | ||
| 142 | + }, | ||
| 143 | + "aggs": { | ||
| 144 | + "value_counts": { | ||
| 145 | + "terms": { | ||
| 146 | + "field": "specifications.value", | ||
| 147 | + "size": 20, | ||
| 148 | + "order": { | ||
| 149 | + "_count": "desc" | ||
| 150 | + } | ||
| 151 | + } | ||
| 152 | + } | ||
| 153 | + } | ||
| 154 | + } | ||
| 155 | + } | ||
| 156 | + }, | ||
| 157 | + "specifications_size_facet": { | ||
| 158 | + "nested": { | ||
| 159 | + "path": "specifications" | ||
| 160 | + }, | ||
| 161 | + "aggs": { | ||
| 162 | + "filter_by_name": { | ||
| 163 | + "filter": { | ||
| 164 | + "term": { | ||
| 165 | + "specifications.name": "size" | ||
| 166 | + } | ||
| 167 | + }, | ||
| 168 | + "aggs": { | ||
| 169 | + "value_counts": { | ||
| 170 | + "terms": { | ||
| 171 | + "field": "specifications.value", | ||
| 172 | + "size": 15, | ||
| 173 | + "order": { | ||
| 174 | + "_count": "desc" | ||
| 175 | + } | ||
| 176 | + } | ||
| 177 | + } | ||
| 178 | + } | ||
| 179 | + } | ||
| 180 | + } | ||
| 181 | + }, | ||
| 182 | + "specifications_material_facet": { | ||
| 183 | + "nested": { | ||
| 184 | + "path": "specifications" | ||
| 185 | + }, | ||
| 186 | + "aggs": { | ||
| 187 | + "filter_by_name": { | ||
| 188 | + "filter": { | ||
| 189 | + "term": { | ||
| 190 | + "specifications.name": "material" | ||
| 191 | + } | ||
| 192 | + }, | ||
| 193 | + "aggs": { | ||
| 194 | + "value_counts": { | ||
| 195 | + "terms": { | ||
| 196 | + "field": "specifications.value", | ||
| 197 | + "size": 10, | ||
| 198 | + "order": { | ||
| 199 | + "_count": "desc" | ||
| 200 | + } | ||
| 201 | + } | ||
| 202 | + } | ||
| 203 | + } | ||
| 204 | + } | ||
| 205 | + } | ||
| 206 | + } | ||
| 207 | + } | ||
| 208 | +}' | ||
| 209 | + | ||
| 210 | +GET /search_products/_search | ||
| 211 | +{ | ||
| 212 | + "query": { | ||
| 213 | + "term": { | ||
| 214 | + "tenant_id": "2" | ||
| 215 | + } | ||
| 216 | + } | ||
| 217 | +} | ||
| 22 | 218 | ||
| 23 | 219 | ||
| 24 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 220 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| @@ -33,6 +229,16 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -33,6 +229,16 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 33 | }' | 229 | }' |
| 34 | 230 | ||
| 35 | 231 | ||
| 232 | +### 2. 统计租户的总文档数 | ||
| 233 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 234 | + "query": { | ||
| 235 | + "term": { | ||
| 236 | + "tenant_id": "162" | ||
| 237 | + } | ||
| 238 | + } | ||
| 239 | +}' | ||
| 240 | + | ||
| 241 | + | ||
| 36 | # ====================================== | 242 | # ====================================== |
| 37 | # 分面数据诊断相关查询 | 243 | # 分面数据诊断相关查询 |
| 38 | # ====================================== | 244 | # ====================================== |
| @@ -359,14 +565,6 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c | @@ -359,14 +565,6 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c | ||
| 359 | } | 565 | } |
| 360 | }' | 566 | }' |
| 361 | 567 | ||
| 362 | -### 4.3 统计租户的总文档数 | ||
| 363 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ | ||
| 364 | - "query": { | ||
| 365 | - "term": { | ||
| 366 | - "tenant_id": "162" | ||
| 367 | - } | ||
| 368 | - } | ||
| 369 | -}' | ||
| 370 | 568 | ||
| 371 | ## 5. 诊断问题场景 | 569 | ## 5. 诊断问题场景 |
| 372 | 570 |
docs/系统设计文档.md
| @@ -379,7 +379,6 @@ query_config: | @@ -379,7 +379,6 @@ query_config: | ||
| 379 | - "en" | 379 | - "en" |
| 380 | - "ru" | 380 | - "ru" |
| 381 | default_language: "zh" | 381 | default_language: "zh" |
| 382 | - enable_translation: true | ||
| 383 | translation_service: "deepl" | 382 | translation_service: "deepl" |
| 384 | translation_api_key: null # 通过环境变量设置 | 383 | translation_api_key: null # 通过环境变量设置 |
| 385 | ``` | 384 | ``` |
docs/系统设计文档v1.md
| @@ -327,7 +327,6 @@ query_config: | @@ -327,7 +327,6 @@ query_config: | ||
| 327 | - "en" | 327 | - "en" |
| 328 | - "ru" | 328 | - "ru" |
| 329 | default_language: "zh" | 329 | default_language: "zh" |
| 330 | - enable_translation: true | ||
| 331 | translation_service: "deepl" | 330 | translation_service: "deepl" |
| 332 | translation_api_key: null # 通过环境变量设置 | 331 | translation_api_key: null # 通过环境变量设置 |
| 333 | ``` | 332 | ``` |
| @@ -0,0 +1,153 @@ | @@ -0,0 +1,153 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +""" | ||
| 3 | +检查ES索引的实际映射配置,特别是中文字段的analyzer设置 | ||
| 4 | +""" | ||
| 5 | + | ||
| 6 | +import os | ||
| 7 | +import sys | ||
| 8 | +import json | ||
| 9 | +from pathlib import Path | ||
| 10 | + | ||
| 11 | +sys.path.insert(0, str(Path(__file__).parent.parent)) | ||
| 12 | + | ||
| 13 | +from utils.es_client import get_es_client_from_env | ||
| 14 | +from indexer.mapping_generator import DEFAULT_INDEX_NAME | ||
| 15 | + | ||
| 16 | + | ||
| 17 | +def check_field_mapping(mapping_dict, field_path): | ||
| 18 | + """递归查找字段映射""" | ||
| 19 | + parts = field_path.split('.') | ||
| 20 | + current = mapping_dict | ||
| 21 | + | ||
| 22 | + for part in parts: | ||
| 23 | + if isinstance(current, dict): | ||
| 24 | + current = current.get(part) | ||
| 25 | + if current is None: | ||
| 26 | + return None | ||
| 27 | + else: | ||
| 28 | + return None | ||
| 29 | + return current | ||
| 30 | + | ||
| 31 | + | ||
| 32 | +def main(): | ||
| 33 | + print("=" * 80) | ||
| 34 | + print("检查 Elasticsearch 索引实际映射配置") | ||
| 35 | + print("=" * 80) | ||
| 36 | + | ||
| 37 | + # 连接ES | ||
| 38 | + try: | ||
| 39 | + es_client = get_es_client_from_env() | ||
| 40 | + if not es_client.ping(): | ||
| 41 | + print("✗ 无法连接到 Elasticsearch") | ||
| 42 | + return 1 | ||
| 43 | + print("✓ Elasticsearch 连接成功\n") | ||
| 44 | + except Exception as e: | ||
| 45 | + print(f"✗ 连接 Elasticsearch 失败: {e}") | ||
| 46 | + return 1 | ||
| 47 | + | ||
| 48 | + index_name = DEFAULT_INDEX_NAME | ||
| 49 | + | ||
| 50 | + # 检查索引是否存在 | ||
| 51 | + if not es_client.index_exists(index_name): | ||
| 52 | + print(f"✗ 索引 '{index_name}' 不存在") | ||
| 53 | + return 1 | ||
| 54 | + | ||
| 55 | + # 获取实际映射 | ||
| 56 | + print(f"获取索引 '{index_name}' 的映射配置...\n") | ||
| 57 | + mapping = es_client.get_mapping(index_name) | ||
| 58 | + | ||
| 59 | + if not mapping: | ||
| 60 | + print("✗ 无法获取索引映射") | ||
| 61 | + return 1 | ||
| 62 | + | ||
| 63 | + # 提取实际映射结构 | ||
| 64 | + # ES返回格式: {index_name: {mappings: {properties: {...}}}} | ||
| 65 | + index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {}) | ||
| 66 | + | ||
| 67 | + if not index_mapping: | ||
| 68 | + print("✗ 无法解析映射结构") | ||
| 69 | + return 1 | ||
| 70 | + | ||
| 71 | + # 检查关键字段 | ||
| 72 | + fields_to_check = [ | ||
| 73 | + 'title_zh', | ||
| 74 | + 'brief_zh', | ||
| 75 | + 'description_zh', | ||
| 76 | + 'vendor_zh', | ||
| 77 | + 'category_path_zh', | ||
| 78 | + 'category_name_zh' | ||
| 79 | + ] | ||
| 80 | + | ||
| 81 | + print("=" * 80) | ||
| 82 | + print("中文字段实际映射配置") | ||
| 83 | + print("=" * 80) | ||
| 84 | + | ||
| 85 | + for field_name in fields_to_check: | ||
| 86 | + field_mapping = index_mapping.get(field_name) | ||
| 87 | + | ||
| 88 | + if field_mapping is None: | ||
| 89 | + print(f"\n❌ {field_name}: 字段不存在") | ||
| 90 | + continue | ||
| 91 | + | ||
| 92 | + print(f"\n📋 {field_name}:") | ||
| 93 | + print(f" 类型: {field_mapping.get('type', 'N/A')}") | ||
| 94 | + | ||
| 95 | + analyzer = field_mapping.get('analyzer') | ||
| 96 | + search_analyzer = field_mapping.get('search_analyzer') | ||
| 97 | + | ||
| 98 | + if analyzer: | ||
| 99 | + print(f" 索引分析器 (analyzer): {analyzer}") | ||
| 100 | + else: | ||
| 101 | + print(f" 索引分析器 (analyzer): 未设置(使用默认)") | ||
| 102 | + | ||
| 103 | + if search_analyzer: | ||
| 104 | + print(f" 查询分析器 (search_analyzer): {search_analyzer}") | ||
| 105 | + else: | ||
| 106 | + print(f" 查询分析器 (search_analyzer): 未设置(使用analyzer或默认)") | ||
| 107 | + | ||
| 108 | + # 检查是否有子字段 | ||
| 109 | + if 'fields' in field_mapping: | ||
| 110 | + print(f" 子字段:") | ||
| 111 | + for sub_field, sub_mapping in field_mapping['fields'].items(): | ||
| 112 | + print(f" - {sub_field}: {sub_mapping.get('type', 'N/A')}") | ||
| 113 | + if 'normalizer' in sub_mapping: | ||
| 114 | + print(f" normalizer: {sub_mapping['normalizer']}") | ||
| 115 | + | ||
| 116 | + # 获取settings中的analyzer定义 | ||
| 117 | + print("\n" + "=" * 80) | ||
| 118 | + print("索引 Settings 中的 Analyzer 定义") | ||
| 119 | + print("=" * 80) | ||
| 120 | + | ||
| 121 | + try: | ||
| 122 | + settings = es_client.client.indices.get_settings(index=index_name) | ||
| 123 | + index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {}) | ||
| 124 | + analysis = index_settings.get('analysis', {}) | ||
| 125 | + analyzers = analysis.get('analyzer', {}) | ||
| 126 | + | ||
| 127 | + if analyzers: | ||
| 128 | + print("\n定义的 Analyzer:") | ||
| 129 | + for analyzer_name, analyzer_config in analyzers.items(): | ||
| 130 | + print(f"\n {analyzer_name}:") | ||
| 131 | + if isinstance(analyzer_config, dict): | ||
| 132 | + print(f" 类型: {analyzer_config.get('type', 'N/A')}") | ||
| 133 | + if 'tokenizer' in analyzer_config: | ||
| 134 | + print(f" tokenizer: {analyzer_config['tokenizer']}") | ||
| 135 | + if 'filter' in analyzer_config: | ||
| 136 | + print(f" filter: {analyzer_config['filter']}") | ||
| 137 | + else: | ||
| 138 | + print(f" 配置: {analyzer_config}") | ||
| 139 | + else: | ||
| 140 | + print("\n⚠ 未找到自定义 analyzer 定义") | ||
| 141 | + | ||
| 142 | + except Exception as e: | ||
| 143 | + print(f"\n⚠ 无法获取 settings: {e}") | ||
| 144 | + | ||
| 145 | + print("\n" + "=" * 80) | ||
| 146 | + print("检查完成") | ||
| 147 | + print("=" * 80) | ||
| 148 | + | ||
| 149 | + return 0 | ||
| 150 | + | ||
| 151 | + | ||
| 152 | +if __name__ == '__main__': | ||
| 153 | + sys.exit(main()) |