Blame view

search/query_config.py 3.97 KB
59b0a342   tangwang   创建手写 mapping JSON
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
  """
  Query configuration constants.
  
  Since all tenants share the same ES mapping, we can hardcode field lists here.
  """
  
  import os
  from typing import Dict, List
  
  # Default index name
  DEFAULT_INDEX_NAME = "search_products"
  
  # Text embedding field
  TEXT_EMBEDDING_FIELD = "title_embedding"
  
  # Image embedding field
  IMAGE_EMBEDDING_FIELD = "image_embedding"
  
  # Default match fields for text search (with boost)
f0d020c3   tangwang   多语言查询改为只支持中英文两种,f...
20
  # 文本召回:同时搜索中英文字段,两者相互补充
59b0a342   tangwang   创建手写 mapping JSON
21
  DEFAULT_MATCH_FIELDS = [
f0d020c3   tangwang   多语言查询改为只支持中英文两种,f...
22
      # 中文字段
59b0a342   tangwang   创建手写 mapping JSON
23
24
25
26
      "title_zh^3.0",
      "brief_zh^1.5",
      "description_zh^1.0",
      "vendor_zh^1.5",
59b0a342   tangwang   创建手写 mapping JSON
27
      "category_path_zh^1.5",
f0d020c3   tangwang   多语言查询改为只支持中英文两种,f...
28
29
30
31
32
33
34
35
36
37
      "category_name_zh^1.5",
      # 英文字段
      "title_en^3.0",
      "brief_en^1.5",
      "description_en^1.0",
      "vendor_en^1.5",
      "category_path_en^1.5",
      "category_name_en^1.5",
      # 语言无关字段
      "tags^1.0",
59b0a342   tangwang   创建手写 mapping JSON
38
39
40
41
42
43
44
45
46
47
48
49
  ]
  
  # Domain-specific match fields
  DOMAIN_FIELDS: Dict[str, List[str]] = {
      "default": DEFAULT_MATCH_FIELDS,
      "title": ["title_zh^2.0"],
      "vendor": ["vendor_zh^1.5"],
      "category": ["category_path_zh^1.5", "category_name_zh^1.5"],
      "tags": ["tags^1.0"]
  }
  
  # Source fields to return in search results
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
50
51
  # 注意:为了在后端做多语言选择,_zh / _en 字段仍然需要从 ES 取出,
  # 但不会原样透出给前端,而是统一映射到 title / description / vendor 等字段。
59b0a342   tangwang   创建手写 mapping JSON
52
  SOURCE_FIELDS = [
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
53
      # 基本标识
59b0a342   tangwang   创建手写 mapping JSON
54
55
      "tenant_id",
      "spu_id",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
56
57
58
59
      "create_time",
      "update_time",
  
      # 多语言文本字段(仅用于后端选择,不直接返回给前端)
59b0a342   tangwang   创建手写 mapping JSON
60
      "title_zh",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
61
      "title_en",
59b0a342   tangwang   创建手写 mapping JSON
62
      "brief_zh",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
63
      "brief_en",
59b0a342   tangwang   创建手写 mapping JSON
64
      "description_zh",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
65
      "description_en",
59b0a342   tangwang   创建手写 mapping JSON
66
      "vendor_zh",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
67
      "vendor_en",
59b0a342   tangwang   创建手写 mapping JSON
68
      "category_path_zh",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
69
      "category_path_en",
59b0a342   tangwang   创建手写 mapping JSON
70
      "category_name_zh",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
71
72
73
74
75
      "category_name_en",
  
      # 语言无关字段(直接返回给前端)
      "tags",
      "image_url",
59b0a342   tangwang   创建手写 mapping JSON
76
77
78
79
80
81
82
83
84
85
86
87
      "category_id",
      "category_name",
      "category_level",
      "category1_name",
      "category2_name",
      "category3_name",
      "option1_name",
      "option2_name",
      "option3_name",
      "min_price",
      "max_price",
      "compare_at_price",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
88
89
90
      "sku_prices",
      "sku_weights",
      "sku_weight_units",
59b0a342   tangwang   创建手写 mapping JSON
91
      "total_inventory",
59b0a342   tangwang   创建手写 mapping JSON
92
      "skus",
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
93
      "specifications",
59b0a342   tangwang   创建手写 mapping JSON
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
  ]
  
  # Query processing settings
  ENABLE_TRANSLATION = os.environ.get("ENABLE_TRANSLATION", "true").lower() == "true"
  ENABLE_TEXT_EMBEDDING = os.environ.get("ENABLE_TEXT_EMBEDDING", "true").lower() == "true"
  TRANSLATION_API_KEY = os.environ.get("DEEPL_API_KEY")
  TRANSLATION_SERVICE = "deepl"
  
  # Ranking expression (currently disabled)
  RANKING_EXPRESSION = "bm25() + 0.2*text_embedding_relevance()"
  
  # Function score config
  FUNCTION_SCORE_CONFIG = {
      "score_mode": "sum",
      "boost_mode": "multiply",
      "functions": []
  }
  
  # Load rewrite dictionary from file if exists
  def load_rewrite_dictionary() -> Dict[str, str]:
      """Load query rewrite dictionary from file."""
      rewrite_file = os.path.join(
          os.path.dirname(os.path.dirname(__file__)),
          "config",
          "query_rewrite.dict"
      )
      
      if not os.path.exists(rewrite_file):
          return {}
      
      rewrite_dict = {}
      try:
          with open(rewrite_file, 'r', encoding='utf-8') as f:
              for line in f:
                  line = line.strip()
                  if not line or line.startswith('#'):
                      continue
                  parts = line.split('\t')
                  if len(parts) == 2:
                      rewrite_dict[parts[0].strip()] = parts[1].strip()
      except Exception as e:
          print(f"Warning: Failed to load rewrite dictionary: {e}")
      
      return rewrite_dict
  
  REWRITE_DICTIONARY = load_rewrite_dictionary()
  
bf89b597   tangwang   feat(search): ada...
141
142
143
144
145
146
147
148
149
  # Default facets for faceted search
  # 分类分面:使用category1_name, category2_name, category3_name
  # specifications分面:使用嵌套聚合,按name分组,然后按value聚合
  DEFAULT_FACETS = [
      "category1_name",  # 一级分类
      "category2_name",  # 二级分类
      "category3_name",  # 三级分类
      "specifications"  # 规格分面(特殊处理:嵌套聚合)
  ]