Blame view

search/query_config.py 3.19 KB
59b0a342   tangwang   创建手写 mapping JSON
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
  """
  Query configuration constants.
  
  Since all tenants share the same ES mapping, we can hardcode field lists here.
  """
  
  import os
  from typing import Dict, List
  
  # Default index name
  DEFAULT_INDEX_NAME = "search_products"
  
  # Text embedding field
  TEXT_EMBEDDING_FIELD = "title_embedding"
  
  # Image embedding field
  IMAGE_EMBEDDING_FIELD = "image_embedding"
  
  # Default match fields for text search (with boost)
  DEFAULT_MATCH_FIELDS = [
      "title_zh^3.0",
      "brief_zh^1.5",
      "description_zh^1.0",
      "vendor_zh^1.5",
      "tags^1.0",
      "category_path_zh^1.5",
      "category_name_zh^1.5"
  ]
  
  # Domain-specific match fields
  DOMAIN_FIELDS: Dict[str, List[str]] = {
      "default": DEFAULT_MATCH_FIELDS,
      "title": ["title_zh^2.0"],
      "vendor": ["vendor_zh^1.5"],
      "category": ["category_path_zh^1.5", "category_name_zh^1.5"],
      "tags": ["tags^1.0"]
  }
  
  # Source fields to return in search results
  SOURCE_FIELDS = [
      "tenant_id",
      "spu_id",
      "title_zh",
      "brief_zh",
      "description_zh",
      "vendor_zh",
      "tags",
      "image_url",
      "category_path_zh",
      "category_name_zh",
      "category_id",
      "category_name",
      "category_level",
      "category1_name",
      "category2_name",
      "category3_name",
      "option1_name",
      "option2_name",
      "option3_name",
      "min_price",
      "max_price",
      "compare_at_price",
      "total_inventory",
      "create_time",
      "update_time",
      "skus",
      "specifications"
  ]
  
  # Query processing settings
  ENABLE_TRANSLATION = os.environ.get("ENABLE_TRANSLATION", "true").lower() == "true"
  ENABLE_TEXT_EMBEDDING = os.environ.get("ENABLE_TEXT_EMBEDDING", "true").lower() == "true"
  TRANSLATION_API_KEY = os.environ.get("DEEPL_API_KEY")
  TRANSLATION_SERVICE = "deepl"
  
  # Ranking expression (currently disabled)
  RANKING_EXPRESSION = "bm25() + 0.2*text_embedding_relevance()"
  
  # Function score config
  FUNCTION_SCORE_CONFIG = {
      "score_mode": "sum",
      "boost_mode": "multiply",
      "functions": []
  }
  
  # Load rewrite dictionary from file if exists
  def load_rewrite_dictionary() -> Dict[str, str]:
      """Load query rewrite dictionary from file."""
      rewrite_file = os.path.join(
          os.path.dirname(os.path.dirname(__file__)),
          "config",
          "query_rewrite.dict"
      )
      
      if not os.path.exists(rewrite_file):
          return {}
      
      rewrite_dict = {}
      try:
          with open(rewrite_file, 'r', encoding='utf-8') as f:
              for line in f:
                  line = line.strip()
                  if not line or line.startswith('#'):
                      continue
                  parts = line.split('\t')
                  if len(parts) == 2:
                      rewrite_dict[parts[0].strip()] = parts[1].strip()
      except Exception as e:
          print(f"Warning: Failed to load rewrite dictionary: {e}")
      
      return rewrite_dict
  
  REWRITE_DICTIONARY = load_rewrite_dictionary()
  
bf89b597   tangwang   feat(search): ada...
115
116
117
118
119
120
121
122
123
  # Default facets for faceted search
  # 分类分面:使用category1_name, category2_name, category3_name
  # specifications分面:使用嵌套聚合,按name分组,然后按value聚合
  DEFAULT_FACETS = [
      "category1_name",  # 一级分类
      "category2_name",  # 二级分类
      "category3_name",  # 三级分类
      "specifications"  # 规格分面(特殊处理:嵌套聚合)
  ]