Blame view

search/query_config.py 2.8 KB
59b0a342   tangwang   εˆ›ε»Ίζ‰‹ε†™ mapping JSON
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  """
  Query configuration constants.
  
  Since all tenants share the same ES mapping, we can hardcode field lists here.
  """
  
  import os
  from typing import Dict, List
  
  # Default index name
  DEFAULT_INDEX_NAME = "search_products"
  
  # Text embedding field
  TEXT_EMBEDDING_FIELD = "title_embedding"
  
  # Image embedding field
  IMAGE_EMBEDDING_FIELD = "image_embedding"
  
  # Default match fields for text search (with boost)
  DEFAULT_MATCH_FIELDS = [
      "title_zh^3.0",
      "brief_zh^1.5",
      "description_zh^1.0",
      "vendor_zh^1.5",
      "tags^1.0",
      "category_path_zh^1.5",
      "category_name_zh^1.5"
  ]
  
  # Domain-specific match fields
  DOMAIN_FIELDS: Dict[str, List[str]] = {
      "default": DEFAULT_MATCH_FIELDS,
      "title": ["title_zh^2.0"],
      "vendor": ["vendor_zh^1.5"],
      "category": ["category_path_zh^1.5", "category_name_zh^1.5"],
      "tags": ["tags^1.0"]
  }
  
  # Source fields to return in search results
  SOURCE_FIELDS = [
      "tenant_id",
      "spu_id",
      "title_zh",
      "brief_zh",
      "description_zh",
      "vendor_zh",
      "tags",
      "image_url",
      "category_path_zh",
      "category_name_zh",
      "category_id",
      "category_name",
      "category_level",
      "category1_name",
      "category2_name",
      "category3_name",
      "option1_name",
      "option2_name",
      "option3_name",
      "min_price",
      "max_price",
      "compare_at_price",
      "total_inventory",
      "create_time",
      "update_time",
      "skus",
      "specifications"
  ]
  
  # Query processing settings
  ENABLE_TRANSLATION = os.environ.get("ENABLE_TRANSLATION", "true").lower() == "true"
  ENABLE_TEXT_EMBEDDING = os.environ.get("ENABLE_TEXT_EMBEDDING", "true").lower() == "true"
  TRANSLATION_API_KEY = os.environ.get("DEEPL_API_KEY")
  TRANSLATION_SERVICE = "deepl"
  
  # Ranking expression (currently disabled)
  RANKING_EXPRESSION = "bm25() + 0.2*text_embedding_relevance()"
  
  # Function score config
  FUNCTION_SCORE_CONFIG = {
      "score_mode": "sum",
      "boost_mode": "multiply",
      "functions": []
  }
  
  # Load rewrite dictionary from file if exists
  def load_rewrite_dictionary() -> Dict[str, str]:
      """Load query rewrite dictionary from file."""
      rewrite_file = os.path.join(
          os.path.dirname(os.path.dirname(__file__)),
          "config",
          "query_rewrite.dict"
      )
      
      if not os.path.exists(rewrite_file):
          return {}
      
      rewrite_dict = {}
      try:
          with open(rewrite_file, 'r', encoding='utf-8') as f:
              for line in f:
                  line = line.strip()
                  if not line or line.startswith('#'):
                      continue
                  parts = line.split('\t')
                  if len(parts) == 2:
                      rewrite_dict[parts[0].strip()] = parts[1].strip()
      except Exception as e:
          print(f"Warning: Failed to load rewrite dictionary: {e}")
      
      return rewrite_dict
  
  REWRITE_DICTIONARY = load_rewrite_dictionary()