query_config.py
2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Query configuration constants.
Since all tenants share the same ES mapping, we can hardcode field lists here.
"""
import os
from typing import Dict, List
# Default index name
DEFAULT_INDEX_NAME = "search_products"
# Text embedding field
TEXT_EMBEDDING_FIELD = "title_embedding"
# Image embedding field
IMAGE_EMBEDDING_FIELD = "image_embedding"
# Default match fields for text search (with boost)
DEFAULT_MATCH_FIELDS = [
"title_zh^3.0",
"brief_zh^1.5",
"description_zh^1.0",
"vendor_zh^1.5",
"tags^1.0",
"category_path_zh^1.5",
"category_name_zh^1.5"
]
# Domain-specific match fields
DOMAIN_FIELDS: Dict[str, List[str]] = {
"default": DEFAULT_MATCH_FIELDS,
"title": ["title_zh^2.0"],
"vendor": ["vendor_zh^1.5"],
"category": ["category_path_zh^1.5", "category_name_zh^1.5"],
"tags": ["tags^1.0"]
}
# Source fields to return in search results
SOURCE_FIELDS = [
"tenant_id",
"spu_id",
"title_zh",
"brief_zh",
"description_zh",
"vendor_zh",
"tags",
"image_url",
"category_path_zh",
"category_name_zh",
"category_id",
"category_name",
"category_level",
"category1_name",
"category2_name",
"category3_name",
"option1_name",
"option2_name",
"option3_name",
"min_price",
"max_price",
"compare_at_price",
"total_inventory",
"create_time",
"update_time",
"skus",
"specifications"
]
# Query processing settings
ENABLE_TRANSLATION = os.environ.get("ENABLE_TRANSLATION", "true").lower() == "true"
ENABLE_TEXT_EMBEDDING = os.environ.get("ENABLE_TEXT_EMBEDDING", "true").lower() == "true"
TRANSLATION_API_KEY = os.environ.get("DEEPL_API_KEY")
TRANSLATION_SERVICE = "deepl"
# Ranking expression (currently disabled)
RANKING_EXPRESSION = "bm25() + 0.2*text_embedding_relevance()"
# Function score config
FUNCTION_SCORE_CONFIG = {
"score_mode": "sum",
"boost_mode": "multiply",
"functions": []
}
# Load rewrite dictionary from file if exists
def load_rewrite_dictionary() -> Dict[str, str]:
"""Load query rewrite dictionary from file."""
rewrite_file = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"config",
"query_rewrite.dict"
)
if not os.path.exists(rewrite_file):
return {}
rewrite_dict = {}
try:
with open(rewrite_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
parts = line.split('\t')
if len(parts) == 2:
rewrite_dict[parts[0].strip()] = parts[1].strip()
except Exception as e:
print(f"Warning: Failed to load rewrite dictionary: {e}")
return rewrite_dict
REWRITE_DICTIONARY = load_rewrite_dictionary()