Commit 5ac64fc7ffdce90b1ec0533f1aac8b1e22553a39

Authored by tangwang
1 parent 351a7eb5

多语言查询

config/env_config.py
... ... @@ -31,8 +31,8 @@ REDIS_CONFIG = {
31 31 'socket_timeout': int(os.getenv('REDIS_SOCKET_TIMEOUT', 1)),
32 32 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)),
33 33 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true',
34   - 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 180)), # 6 months
35   - 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360)),
  34 + 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months
  35 + 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)),
36 36 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'),
37 37 }
38 38  
... ...
docs/常用查询 - ES.md
1   -GET /search_products/_search
2   -{
3   - "query": {
4   - "term": {
5   - "tenant_id": "2"
6   - }
7   - }
8   -}
9 1  
10 2  
  3 +# ======================================
  4 +# 租户相关
  5 +# ======================================
  6 +
  7 +### 1. 根据 tenant_id / spu_id 查询
11 8 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
12   - "size": 5,
  9 + "size": 1,
13 10 "query": {
14 11 "bool": {
15 12 "filter": [
16   - { "term": { "tenant_id": "162" } }
  13 + { "term": { "tenant_id": "170" } }
17 14 ]
18 15 }
19 16 }
20 17 }'
21 18  
  19 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  20 + "size": 1,
  21 + "_source": ["_id", "*"],
  22 + "query": {
  23 + "bool": {
  24 + "must": [
  25 + {
  26 + "match": {
  27 + "title_zh": {
  28 + "query": "裙子"
  29 + }
  30 + }
  31 + }
  32 + ],
  33 + "filter": [
  34 + { "term": { "tenant_id": "170" } }
  35 + ]
  36 + }
  37 + }
  38 +}'
  39 +
  40 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{
  41 + "analyzer": "query_ansj",
  42 + "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
  43 +}'
  44 +
  45 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{
  46 + "analyzer": "hanlp_standard",
  47 + "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
  48 +}'
  49 +
  50 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  51 + "size": 100,
  52 + "from": 0,
  53 + "query": {
  54 + "bool": {
  55 + "must": [
  56 + {
  57 + "multi_match": {
  58 + "_name": "base_query",
  59 + "fields": [
  60 + "title_zh^3.0",
  61 + "brief_zh^1.5",
  62 + "description_zh",
  63 + "vendor_zh^1.5",
  64 + "tags",
  65 + "category_path_zh^1.5",
  66 + "category_name_zh^1.5",
  67 + "option1_values^0.5"
  68 + ],
  69 + "minimum_should_match": "75%",
  70 + "operator": "AND",
  71 + "query": "裙",
  72 + "tie_breaker": 0.9
  73 + }
  74 + }
  75 + ],
  76 + "filter": [
  77 + {
  78 + "term": {
  79 + "tenant_id": "170"
  80 + }
  81 + }
  82 + ]
  83 + }
  84 + }
  85 +}'
  86 +
  87 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  88 + "size": 1,
  89 + "from": 0,
  90 + "query": {
  91 + "bool": {
  92 + "must": [
  93 + {
  94 + "multi_match": {
  95 + "_name": "base_query",
  96 + "fields": [
  97 + "title_zh^3.0",
  98 + "brief_zh^1.5",
  99 + "description_zh",
  100 + "vendor_zh^1.5",
  101 + "tags",
  102 + "category_path_zh^1.5",
  103 + "category_name_zh^1.5",
  104 + "option1_values^0.5"
  105 + ],
  106 + "minimum_should_match": "75%",
  107 + "operator": "AND",
  108 + "query": "裙",
  109 + "tie_breaker": 0.9
  110 + }
  111 + }
  112 + ],
  113 + "filter": [
  114 + {
  115 + "term": {
  116 + "tenant_id": "170"
  117 + }
  118 + }
  119 + ]
  120 + }
  121 + },
  122 + "aggs": {
  123 + "category1_name_facet": {
  124 + "terms": {
  125 + "field": "category1_name",
  126 + "size": 15,
  127 + "order": {
  128 + "_count": "desc"
  129 + }
  130 + }
  131 + },
  132 + "specifications_color_facet": {
  133 + "nested": {
  134 + "path": "specifications"
  135 + },
  136 + "aggs": {
  137 + "filter_by_name": {
  138 + "filter": {
  139 + "term": {
  140 + "specifications.name": "color"
  141 + }
  142 + },
  143 + "aggs": {
  144 + "value_counts": {
  145 + "terms": {
  146 + "field": "specifications.value",
  147 + "size": 20,
  148 + "order": {
  149 + "_count": "desc"
  150 + }
  151 + }
  152 + }
  153 + }
  154 + }
  155 + }
  156 + },
  157 + "specifications_size_facet": {
  158 + "nested": {
  159 + "path": "specifications"
  160 + },
  161 + "aggs": {
  162 + "filter_by_name": {
  163 + "filter": {
  164 + "term": {
  165 + "specifications.name": "size"
  166 + }
  167 + },
  168 + "aggs": {
  169 + "value_counts": {
  170 + "terms": {
  171 + "field": "specifications.value",
  172 + "size": 15,
  173 + "order": {
  174 + "_count": "desc"
  175 + }
  176 + }
  177 + }
  178 + }
  179 + }
  180 + }
  181 + },
  182 + "specifications_material_facet": {
  183 + "nested": {
  184 + "path": "specifications"
  185 + },
  186 + "aggs": {
  187 + "filter_by_name": {
  188 + "filter": {
  189 + "term": {
  190 + "specifications.name": "material"
  191 + }
  192 + },
  193 + "aggs": {
  194 + "value_counts": {
  195 + "terms": {
  196 + "field": "specifications.value",
  197 + "size": 10,
  198 + "order": {
  199 + "_count": "desc"
  200 + }
  201 + }
  202 + }
  203 + }
  204 + }
  205 + }
  206 + }
  207 + }
  208 +}'
  209 +
  210 +GET /search_products/_search
  211 +{
  212 + "query": {
  213 + "term": {
  214 + "tenant_id": "2"
  215 + }
  216 + }
  217 +}
22 218  
23 219  
24 220 curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
... ... @@ -33,6 +229,16 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/
33 229 }'
34 230  
35 231  
  232 +### 2. 统计租户的总文档数
  233 +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{
  234 + "query": {
  235 + "term": {
  236 + "tenant_id": "162"
  237 + }
  238 + }
  239 +}'
  240 +
  241 +
36 242 # ======================================
37 243 # 分面数据诊断相关查询
38 244 # ======================================
... ... @@ -359,14 +565,6 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c
359 565 }
360 566 }'
361 567  
362   -### 4.3 统计租户的总文档数
363   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{
364   - "query": {
365   - "term": {
366   - "tenant_id": "162"
367   - }
368   - }
369   -}'
370 568  
371 569 ## 5. 诊断问题场景
372 570  
... ...
docs/系统设计文档.md
... ... @@ -379,7 +379,6 @@ query_config:
379 379 - "en"
380 380 - "ru"
381 381 default_language: "zh"
382   - enable_translation: true
383 382 translation_service: "deepl"
384 383 translation_api_key: null # 通过环境变量设置
385 384 ```
... ...
docs/系统设计文档v1.md
... ... @@ -327,7 +327,6 @@ query_config:
327 327 - "en"
328 328 - "ru"
329 329 default_language: "zh"
330   - enable_translation: true
331 330 translation_service: "deepl"
332 331 translation_api_key: null # 通过环境变量设置
333 332 ```
... ...
scripts/check_index_mapping.py 0 → 100644
... ... @@ -0,0 +1,153 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +检查ES索引的实际映射配置,特别是中文字段的analyzer设置
  4 +"""
  5 +
  6 +import os
  7 +import sys
  8 +import json
  9 +from pathlib import Path
  10 +
  11 +sys.path.insert(0, str(Path(__file__).parent.parent))
  12 +
  13 +from utils.es_client import get_es_client_from_env
  14 +from indexer.mapping_generator import DEFAULT_INDEX_NAME
  15 +
  16 +
  17 +def check_field_mapping(mapping_dict, field_path):
  18 + """递归查找字段映射"""
  19 + parts = field_path.split('.')
  20 + current = mapping_dict
  21 +
  22 + for part in parts:
  23 + if isinstance(current, dict):
  24 + current = current.get(part)
  25 + if current is None:
  26 + return None
  27 + else:
  28 + return None
  29 + return current
  30 +
  31 +
  32 +def main():
  33 + print("=" * 80)
  34 + print("检查 Elasticsearch 索引实际映射配置")
  35 + print("=" * 80)
  36 +
  37 + # 连接ES
  38 + try:
  39 + es_client = get_es_client_from_env()
  40 + if not es_client.ping():
  41 + print("✗ 无法连接到 Elasticsearch")
  42 + return 1
  43 + print("✓ Elasticsearch 连接成功\n")
  44 + except Exception as e:
  45 + print(f"✗ 连接 Elasticsearch 失败: {e}")
  46 + return 1
  47 +
  48 + index_name = DEFAULT_INDEX_NAME
  49 +
  50 + # 检查索引是否存在
  51 + if not es_client.index_exists(index_name):
  52 + print(f"✗ 索引 '{index_name}' 不存在")
  53 + return 1
  54 +
  55 + # 获取实际映射
  56 + print(f"获取索引 '{index_name}' 的映射配置...\n")
  57 + mapping = es_client.get_mapping(index_name)
  58 +
  59 + if not mapping:
  60 + print("✗ 无法获取索引映射")
  61 + return 1
  62 +
  63 + # 提取实际映射结构
  64 + # ES返回格式: {index_name: {mappings: {properties: {...}}}}
  65 + index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {})
  66 +
  67 + if not index_mapping:
  68 + print("✗ 无法解析映射结构")
  69 + return 1
  70 +
  71 + # 检查关键字段
  72 + fields_to_check = [
  73 + 'title_zh',
  74 + 'brief_zh',
  75 + 'description_zh',
  76 + 'vendor_zh',
  77 + 'category_path_zh',
  78 + 'category_name_zh'
  79 + ]
  80 +
  81 + print("=" * 80)
  82 + print("中文字段实际映射配置")
  83 + print("=" * 80)
  84 +
  85 + for field_name in fields_to_check:
  86 + field_mapping = index_mapping.get(field_name)
  87 +
  88 + if field_mapping is None:
  89 + print(f"\n❌ {field_name}: 字段不存在")
  90 + continue
  91 +
  92 + print(f"\n📋 {field_name}:")
  93 + print(f" 类型: {field_mapping.get('type', 'N/A')}")
  94 +
  95 + analyzer = field_mapping.get('analyzer')
  96 + search_analyzer = field_mapping.get('search_analyzer')
  97 +
  98 + if analyzer:
  99 + print(f" 索引分析器 (analyzer): {analyzer}")
  100 + else:
  101 + print(f" 索引分析器 (analyzer): 未设置(使用默认)")
  102 +
  103 + if search_analyzer:
  104 + print(f" 查询分析器 (search_analyzer): {search_analyzer}")
  105 + else:
  106 + print(f" 查询分析器 (search_analyzer): 未设置(使用analyzer或默认)")
  107 +
  108 + # 检查是否有子字段
  109 + if 'fields' in field_mapping:
  110 + print(f" 子字段:")
  111 + for sub_field, sub_mapping in field_mapping['fields'].items():
  112 + print(f" - {sub_field}: {sub_mapping.get('type', 'N/A')}")
  113 + if 'normalizer' in sub_mapping:
  114 + print(f" normalizer: {sub_mapping['normalizer']}")
  115 +
  116 + # 获取settings中的analyzer定义
  117 + print("\n" + "=" * 80)
  118 + print("索引 Settings 中的 Analyzer 定义")
  119 + print("=" * 80)
  120 +
  121 + try:
  122 + settings = es_client.client.indices.get_settings(index=index_name)
  123 + index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {})
  124 + analysis = index_settings.get('analysis', {})
  125 + analyzers = analysis.get('analyzer', {})
  126 +
  127 + if analyzers:
  128 + print("\n定义的 Analyzer:")
  129 + for analyzer_name, analyzer_config in analyzers.items():
  130 + print(f"\n {analyzer_name}:")
  131 + if isinstance(analyzer_config, dict):
  132 + print(f" 类型: {analyzer_config.get('type', 'N/A')}")
  133 + if 'tokenizer' in analyzer_config:
  134 + print(f" tokenizer: {analyzer_config['tokenizer']}")
  135 + if 'filter' in analyzer_config:
  136 + print(f" filter: {analyzer_config['filter']}")
  137 + else:
  138 + print(f" 配置: {analyzer_config}")
  139 + else:
  140 + print("\n⚠ 未找到自定义 analyzer 定义")
  141 +
  142 + except Exception as e:
  143 + print(f"\n⚠ 无法获取 settings: {e}")
  144 +
  145 + print("\n" + "=" * 80)
  146 + print("检查完成")
  147 + print("=" * 80)
  148 +
  149 + return 0
  150 +
  151 +
  152 +if __name__ == '__main__':
  153 + sys.exit(main())
... ...