be52af70
tangwang
first commit
|
1
|
"""
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
2
|
Configuration loader and validator for search engine configurations.
|
be52af70
tangwang
first commit
|
3
4
|
This module handles loading, parsing, and validating YAML configuration files
|
33839b37
tangwang
属性值参与搜索:
|
5
6
7
8
|
that define how search should be executed (NOT how data should be indexed).
索引结构由 mappings/search_products.json 定义。
此配置只定义搜索行为:字段权重、搜索域、查询策略等。
|
be52af70
tangwang
first commit
|
9
10
11
12
13
14
15
16
|
"""
import yaml
import os
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field
from pathlib import Path
|
be52af70
tangwang
first commit
|
17
18
19
20
21
22
|
@dataclass
class IndexConfig:
"""Configuration for an index domain (e.g., default, title, brand)."""
name: str
label: str
|
33839b37
tangwang
属性值参与搜索:
|
23
|
fields: List[str] # List of field names to include in this search domain
|
be52af70
tangwang
first commit
|
24
25
26
|
boost: float = 1.0
example: Optional[str] = None
|
be52af70
tangwang
first commit
|
27
28
29
30
31
|
@dataclass
class QueryConfig:
"""Configuration for query processing."""
supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"])
|
2739b281
tangwang
多语言索引调整
|
32
|
default_language: str = "en"
|
33839b37
tangwang
属性值参与搜索:
|
33
34
|
# Feature flags
|
be52af70
tangwang
first commit
|
35
36
|
enable_text_embedding: bool = True
enable_query_rewrite: bool = True
|
7bc756c5
tangwang
优化 ES 查询构建
|
37
|
enable_multilang_search: bool = True # Enable multi-language search using translations
|
33839b37
tangwang
属性值参与搜索:
|
38
39
|
# Query rewrite dictionary (loaded from external file)
|
be52af70
tangwang
first commit
|
40
|
rewrite_dictionary: Dict[str, str] = field(default_factory=dict)
|
33839b37
tangwang
属性值参与搜索:
|
41
42
43
|
# Translation settings
translation_service: str = "deepl"
|
be52af70
tangwang
first commit
|
44
|
translation_api_key: Optional[str] = None
|
33839b37
tangwang
属性值参与搜索:
|
45
46
|
translation_glossary_id: Optional[str] = None
translation_context: str = "e-commerce product search"
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
47
|
translation_prompts: Dict[str, str] = field(default_factory=dict) # Translation prompts for different use cases
|
33839b37
tangwang
属性值参与搜索:
|
48
49
50
51
52
|
# Embedding field names
text_embedding_field: Optional[str] = "title_embedding"
image_embedding_field: Optional[str] = None
|
9f96d6f3
tangwang
短query不用语义搜索
|
53
|
# Embedding disable thresholds (disable vector search for short queries)
|
33839b37
tangwang
属性值参与搜索:
|
54
55
56
57
|
embedding_disable_chinese_char_limit: int = 4
embedding_disable_english_word_limit: int = 3
# Source fields configuration
|
cd3799c6
tangwang
tenant2 1w测试数据 mo...
|
58
|
source_fields: Optional[List[str]] = None
|
70dab99f
tangwang
add logs
|
59
60
61
|
# KNN boost configuration
knn_boost: float = 0.25 # Boost value for KNN (embedding recall)
|
13377199
tangwang
接口优化
|
62
|
|
be52af70
tangwang
first commit
|
63
64
65
66
67
|
@dataclass
class SPUConfig:
"""Configuration for SPU aggregation."""
enabled: bool = False
|
33839b37
tangwang
属性值参与搜索:
|
68
|
spu_field: Optional[str] = None
|
be52af70
tangwang
first commit
|
69
|
inner_hits_size: int = 3
|
33839b37
tangwang
属性值参与搜索:
|
70
71
|
# 配置哪些option维度参与检索(进索引、以及在线搜索)
searchable_option_dimensions: List[str] = field(default_factory=lambda: ['option1', 'option2', 'option3'])
|
be52af70
tangwang
first commit
|
72
73
74
|
@dataclass
|
a00c3672
tangwang
feat: Function Sc...
|
75
76
|
class FunctionScoreConfig:
"""Function Score配置(ES层打分规则)"""
|
33839b37
tangwang
属性值参与搜索:
|
77
78
|
score_mode: str = "sum"
boost_mode: str = "multiply"
|
a00c3672
tangwang
feat: Function Sc...
|
79
80
81
82
|
functions: List[Dict[str, Any]] = field(default_factory=list)
@dataclass
|
33839b37
tangwang
属性值参与搜索:
|
83
84
85
86
87
88
89
|
class RankingConfig:
"""Configuration for ranking expressions."""
expression: str = "bm25()"
description: str = "Default BM25 ranking"
@dataclass
|
a00c3672
tangwang
feat: Function Sc...
|
90
|
class RerankConfig:
|
ff32d894
tangwang
rerank
|
91
92
|
"""重排配置(唯一实现:调用外部 BGE 重排服务,由请求参数 enable_rerank 控制是否执行)"""
# 重排窗口:enable_rerank 且 from+size<=rerank_window 时,从 ES 取前 rerank_window 条重排后再分页
|
506c39b7
tangwang
feat(search): 统一重...
|
93
94
95
96
97
98
|
rerank_window: int = 1000
# 可选:重排服务 URL,为空时使用 reranker 模块默认端口 6007
service_url: Optional[str] = None
timeout_sec: float = 15.0
weight_es: float = 0.4
weight_ai: float = 0.6
|
ff32d894
tangwang
rerank
|
99
100
101
102
103
|
# 模板:用于将搜索请求/文档字段组装成重排服务输入
# - rerank_query_template:支持 {query}
# - rerank_doc_template:支持 {title} {brief} {vendor} {description} {category_path}
rerank_query_template: str = "{query}"
rerank_doc_template: str = "{title}"
|
a00c3672
tangwang
feat: Function Sc...
|
104
105
106
|
@dataclass
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
107
|
class SearchConfig:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
108
|
"""Complete configuration for search engine (multi-tenant)."""
|
33839b37
tangwang
属性值参与搜索:
|
109
110
111
112
|
# 字段权重配置(用于搜索)
field_boosts: Dict[str, float]
|
be52af70
tangwang
first commit
|
113
114
|
# Index structure (query domains)
indexes: List[IndexConfig]
|
33839b37
tangwang
属性值参与搜索:
|
115
|
|
be52af70
tangwang
first commit
|
116
117
|
# Query processing
query_config: QueryConfig
|
33839b37
tangwang
属性值参与搜索:
|
118
|
|
be52af70
tangwang
first commit
|
119
120
|
# Ranking configuration
ranking: RankingConfig
|
33839b37
tangwang
属性值参与搜索:
|
121
|
|
a00c3672
tangwang
feat: Function Sc...
|
122
123
|
# Function Score configuration (ES层打分)
function_score: FunctionScoreConfig
|
33839b37
tangwang
属性值参与搜索:
|
124
|
|
a00c3672
tangwang
feat: Function Sc...
|
125
126
|
# Rerank configuration (本地重排)
rerank: RerankConfig
|
33839b37
tangwang
属性值参与搜索:
|
127
|
|
be52af70
tangwang
first commit
|
128
129
|
# SPU configuration
spu_config: SPUConfig
|
33839b37
tangwang
属性值参与搜索:
|
130
|
|
be52af70
tangwang
first commit
|
131
132
|
# ES index settings
es_index_name: str
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
133
134
135
136
137
|
# Tenant configuration
tenant_config: Dict[str, Any] = field(default_factory=dict)
# ES settings
|
be52af70
tangwang
first commit
|
138
139
140
141
142
143
144
145
146
|
es_settings: Dict[str, Any] = field(default_factory=dict)
class ConfigurationError(Exception):
"""Raised when configuration validation fails."""
pass
class ConfigLoader:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
147
|
"""Loads and validates unified search engine configuration from YAML file."""
|
a77693fe
tangwang
调整配置目录结构
|
148
|
|
33839b37
tangwang
属性值参与搜索:
|
149
|
def __init__(self, config_file: Optional[Path] = None):
|
a77693fe
tangwang
调整配置目录结构
|
150
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
151
|
Initialize config loader.
|
a77693fe
tangwang
调整配置目录结构
|
152
|
|
33839b37
tangwang
属性值参与搜索:
|
153
154
|
Args:
config_file: Path to config YAML file (defaults to config/config.yaml)
|
a77693fe
tangwang
调整配置目录结构
|
155
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
156
157
158
159
160
161
162
163
|
if config_file is None:
config_file = Path(__file__).parent / "config.yaml"
self.config_file = Path(config_file)
def _load_rewrite_dictionary(self) -> Dict[str, str]:
"""Load query rewrite dictionary from external file."""
rewrite_file = Path(__file__).parent / "rewrite_dictionary.txt"
rewrite_dict = {}
|
a77693fe
tangwang
调整配置目录结构
|
164
|
|
33839b37
tangwang
属性值参与搜索:
|
165
166
|
if not rewrite_file.exists():
return rewrite_dict
|
a77693fe
tangwang
调整配置目录结构
|
167
|
|
a77693fe
tangwang
调整配置目录结构
|
168
|
try:
|
33839b37
tangwang
属性值参与搜索:
|
169
170
|
with open(rewrite_file, 'r', encoding='utf-8') as f:
for line in f:
|
a77693fe
tangwang
调整配置目录结构
|
171
|
line = line.strip()
|
a77693fe
tangwang
调整配置目录结构
|
172
173
174
|
if not line or line.startswith('#'):
continue
|
a77693fe
tangwang
调整配置目录结构
|
175
|
parts = line.split('\t')
|
33839b37
tangwang
属性值参与搜索:
|
176
177
178
179
180
|
if len(parts) >= 2:
original = parts[0].strip()
replacement = parts[1].strip()
if original and replacement:
rewrite_dict[original] = replacement
|
a77693fe
tangwang
调整配置目录结构
|
181
|
except Exception as e:
|
33839b37
tangwang
属性值参与搜索:
|
182
|
print(f"Warning: Failed to load rewrite dictionary: {e}")
|
a77693fe
tangwang
调整配置目录结构
|
183
184
|
return rewrite_dict
|
33839b37
tangwang
属性值参与搜索:
|
185
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
186
|
def load_config(self, validate: bool = True) -> SearchConfig:
|
be52af70
tangwang
first commit
|
187
|
"""
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
188
|
Load unified configuration from YAML file.
|
33839b37
tangwang
属性值参与搜索:
|
189
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
190
|
Args:
|
33839b37
tangwang
属性值参与搜索:
|
191
192
|
validate: Whether to validate configuration after loading
|
be52af70
tangwang
first commit
|
193
|
Returns:
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
194
|
SearchConfig object
|
33839b37
tangwang
属性值参与搜索:
|
195
|
|
be52af70
tangwang
first commit
|
196
|
Raises:
|
9f96d6f3
tangwang
短query不用语义搜索
|
197
|
ConfigurationError: If config file not found, invalid, or validation fails
|
be52af70
tangwang
first commit
|
198
|
"""
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
199
200
|
if not self.config_file.exists():
raise ConfigurationError(f"Configuration file not found: {self.config_file}")
|
33839b37
tangwang
属性值参与搜索:
|
201
|
|
be52af70
tangwang
first commit
|
202
|
try:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
203
|
with open(self.config_file, 'r', encoding='utf-8') as f:
|
be52af70
tangwang
first commit
|
204
205
|
config_data = yaml.safe_load(f)
except yaml.YAMLError as e:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
206
|
raise ConfigurationError(f"Invalid YAML in {self.config_file}: {e}")
|
33839b37
tangwang
属性值参与搜索:
|
207
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
208
209
210
211
212
213
214
215
216
217
|
config = self._parse_config(config_data)
# Auto-validate configuration
if validate:
errors = self.validate_config(config)
if errors:
error_msg = "Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors)
raise ConfigurationError(error_msg)
return config
|
33839b37
tangwang
属性值参与搜索:
|
218
|
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
219
220
|
def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig:
"""Parse configuration dictionary into SearchConfig object."""
|
33839b37
tangwang
属性值参与搜索:
|
221
222
223
224
225
226
|
# Parse field_boosts
field_boosts = config_data.get("field_boosts", {})
if not isinstance(field_boosts, dict):
raise ConfigurationError("field_boosts must be a dictionary")
|
be52af70
tangwang
first commit
|
227
228
229
230
|
# Parse indexes
indexes = []
for index_data in config_data.get("indexes", []):
indexes.append(self._parse_index_config(index_data))
|
33839b37
tangwang
属性值参与搜索:
|
231
|
|
be52af70
tangwang
first commit
|
232
233
|
# Parse query config
query_config_data = config_data.get("query_config", {})
|
a77693fe
tangwang
调整配置目录结构
|
234
|
|
33839b37
tangwang
属性值参与搜索:
|
235
|
# Load rewrite dictionary from external file
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
236
|
rewrite_dictionary = self._load_rewrite_dictionary()
|
a77693fe
tangwang
调整配置目录结构
|
237
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
238
239
240
|
# Parse embedding disable thresholds
embedding_thresholds = query_config_data.get("embedding_disable_thresholds", {})
|
be52af70
tangwang
first commit
|
241
|
query_config = QueryConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
242
|
supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
|
2739b281
tangwang
多语言索引调整
|
243
|
default_language=query_config_data.get("default_language") or "en",
|
be52af70
tangwang
first commit
|
244
245
|
enable_text_embedding=query_config_data.get("enable_text_embedding", True),
enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
|
a77693fe
tangwang
调整配置目录结构
|
246
|
rewrite_dictionary=rewrite_dictionary,
|
be52af70
tangwang
first commit
|
247
|
translation_api_key=query_config_data.get("translation_api_key"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
248
|
translation_service=query_config_data.get("translation_service") or "deepl",
|
522a3964
tangwang
多语言搜索翻译的优化(deepL添...
|
249
|
translation_glossary_id=query_config_data.get("translation_glossary_id"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
250
|
translation_context=query_config_data.get("translation_context") or "e-commerce product search",
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
251
|
translation_prompts=query_config_data.get("translation_prompts", {}),
|
325eec03
tangwang
1. 日志、配置基础设施,使用优化
|
252
|
text_embedding_field=query_config_data.get("text_embedding_field"),
|
cd3799c6
tangwang
tenant2 1w测试数据 mo...
|
253
|
image_embedding_field=query_config_data.get("image_embedding_field"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
254
255
|
embedding_disable_chinese_char_limit=embedding_thresholds.get("chinese_char_limit", 4),
embedding_disable_english_word_limit=embedding_thresholds.get("english_word_limit", 3),
|
70dab99f
tangwang
add logs
|
256
257
|
source_fields=query_config_data.get("source_fields"),
knn_boost=query_config_data.get("knn_boost", 0.25)
|
be52af70
tangwang
first commit
|
258
|
)
|
33839b37
tangwang
属性值参与搜索:
|
259
|
|
be52af70
tangwang
first commit
|
260
261
262
|
# Parse ranking config
ranking_data = config_data.get("ranking", {})
ranking = RankingConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
263
264
|
expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()",
description=ranking_data.get("description") or "Default BM25 + text embedding ranking"
|
be52af70
tangwang
first commit
|
265
|
)
|
33839b37
tangwang
属性值参与搜索:
|
266
|
|
a00c3672
tangwang
feat: Function Sc...
|
267
268
269
|
# Parse Function Score configuration
fs_data = config_data.get("function_score", {})
function_score = FunctionScoreConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
270
271
272
|
score_mode=fs_data.get("score_mode") or "sum",
boost_mode=fs_data.get("boost_mode") or "multiply",
functions=fs_data.get("functions") or []
|
a00c3672
tangwang
feat: Function Sc...
|
273
|
)
|
33839b37
tangwang
属性值参与搜索:
|
274
|
|
ff32d894
tangwang
rerank
|
275
|
# Parse Rerank configuration(唯一实现:外部重排服务,由 enable_rerank 控制)
|
a00c3672
tangwang
feat: Function Sc...
|
276
277
|
rerank_data = config_data.get("rerank", {})
rerank = RerankConfig(
|
506c39b7
tangwang
feat(search): 统一重...
|
278
279
280
281
282
|
rerank_window=int(rerank_data.get("rerank_window", 1000)),
service_url=rerank_data.get("service_url") or None,
timeout_sec=float(rerank_data.get("timeout_sec", 15.0)),
weight_es=float(rerank_data.get("weight_es", 0.4)),
weight_ai=float(rerank_data.get("weight_ai", 0.6)),
|
ff32d894
tangwang
rerank
|
283
284
|
rerank_query_template=str(rerank_data.get("rerank_query_template") or "{query}"),
rerank_doc_template=str(rerank_data.get("rerank_doc_template") or "{title}"),
|
a00c3672
tangwang
feat: Function Sc...
|
285
|
)
|
33839b37
tangwang
属性值参与搜索:
|
286
|
|
be52af70
tangwang
first commit
|
287
288
289
290
291
|
# Parse SPU config
spu_data = config_data.get("spu_config", {})
spu_config = SPUConfig(
enabled=spu_data.get("enabled", False),
spu_field=spu_data.get("spu_field"),
|
33839b37
tangwang
属性值参与搜索:
|
292
293
|
inner_hits_size=spu_data.get("inner_hits_size", 3),
searchable_option_dimensions=spu_data.get("searchable_option_dimensions", ['option1', 'option2', 'option3'])
|
be52af70
tangwang
first commit
|
294
|
)
|
33839b37
tangwang
属性值参与搜索:
|
295
|
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
296
297
298
|
# Parse tenant config
tenant_config_data = config_data.get("tenant_config", {})
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
299
|
return SearchConfig(
|
33839b37
tangwang
属性值参与搜索:
|
300
|
field_boosts=field_boosts,
|
be52af70
tangwang
first commit
|
301
302
303
|
indexes=indexes,
query_config=query_config,
ranking=ranking,
|
a00c3672
tangwang
feat: Function Sc...
|
304
305
|
function_score=function_score,
rerank=rerank,
|
be52af70
tangwang
first commit
|
306
|
spu_config=spu_config,
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
307
|
tenant_config=tenant_config_data,
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
308
|
es_index_name=config_data.get("es_index_name", "search_products"),
|
be52af70
tangwang
first commit
|
309
310
|
es_settings=config_data.get("es_settings", {})
)
|
33839b37
tangwang
属性值参与搜索:
|
311
|
|
be52af70
tangwang
first commit
|
312
313
|
def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig:
"""Parse index configuration from dictionary."""
|
be52af70
tangwang
first commit
|
314
315
316
|
return IndexConfig(
name=index_data["name"],
label=index_data.get("label", index_data["name"]),
|
33839b37
tangwang
属性值参与搜索:
|
317
|
fields=index_data.get("fields", []),
|
be52af70
tangwang
first commit
|
318
|
boost=index_data.get("boost", 1.0),
|
33839b37
tangwang
属性值参与搜索:
|
319
|
example=index_data.get("example")
|
be52af70
tangwang
first commit
|
320
|
)
|
33839b37
tangwang
属性值参与搜索:
|
321
|
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
322
|
def validate_config(self, config: SearchConfig) -> List[str]:
|
be52af70
tangwang
first commit
|
323
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
324
325
|
Validate configuration for common errors.
|
be52af70
tangwang
first commit
|
326
|
Args:
|
33839b37
tangwang
属性值参与搜索:
|
327
328
|
config: SearchConfig to validate
|
be52af70
tangwang
first commit
|
329
|
Returns:
|
33839b37
tangwang
属性值参与搜索:
|
330
|
List of error messages (empty if valid)
|
be52af70
tangwang
first commit
|
331
332
|
"""
errors = []
|
b926f678
tangwang
多语言查询
|
333
|
|
33839b37
tangwang
属性值参与搜索:
|
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
|
# Validate es_index_name
if not config.es_index_name:
errors.append("es_index_name is required")
# Validate field_boosts
if not config.field_boosts:
errors.append("field_boosts is empty")
for field_name, boost in config.field_boosts.items():
if not isinstance(boost, (int, float)):
errors.append(f"field_boosts['{field_name}']: boost must be a number, got {type(boost).__name__}")
elif boost < 0:
errors.append(f"field_boosts['{field_name}']: boost must be non-negative")
# Validate indexes
if not config.indexes:
errors.append("At least one index domain must be defined")
index_names = set()
|
be52af70
tangwang
first commit
|
353
|
for index in config.indexes:
|
33839b37
tangwang
属性值参与搜索:
|
354
355
356
357
|
# Check for duplicate index names
if index.name in index_names:
errors.append(f"Duplicate index name: {index.name}")
index_names.add(index.name)
|
b926f678
tangwang
多语言查询
|
358
|
|
33839b37
tangwang
属性值参与搜索:
|
359
360
361
362
|
# Validate fields in index
if not index.fields:
errors.append(f"Index '{index.name}': fields list is empty")
|
be52af70
tangwang
first commit
|
363
364
365
366
|
# Validate SPU config
if config.spu_config.enabled:
if not config.spu_config.spu_field:
errors.append("SPU aggregation enabled but no spu_field specified")
|
33839b37
tangwang
属性值参与搜索:
|
367
368
369
370
371
372
373
374
375
376
377
|
# Validate query config
if not config.query_config.supported_languages:
errors.append("At least one supported language must be specified")
if config.query_config.default_language not in config.query_config.supported_languages:
errors.append(
f"Default language '{config.query_config.default_language}' "
f"not in supported languages: {config.query_config.supported_languages}"
)
|
be52af70
tangwang
first commit
|
378
|
return errors
|
33839b37
tangwang
属性值参与搜索:
|
379
380
381
|
def to_dict(self, config: SearchConfig) -> Dict[str, Any]:
"""Convert SearchConfig to dictionary representation."""
|
a77693fe
tangwang
调整配置目录结构
|
382
|
|
33839b37
tangwang
属性值参与搜索:
|
383
|
# Build query_config dict
|
9f96d6f3
tangwang
短query不用语义搜索
|
384
385
386
|
query_config_dict = {
"supported_languages": config.query_config.supported_languages,
"default_language": config.query_config.default_language,
|
9f96d6f3
tangwang
短query不用语义搜索
|
387
388
389
|
"enable_text_embedding": config.query_config.enable_text_embedding,
"enable_query_rewrite": config.query_config.enable_query_rewrite,
"translation_service": config.query_config.translation_service,
|
33839b37
tangwang
属性值参与搜索:
|
390
391
392
|
"text_embedding_field": config.query_config.text_embedding_field,
"image_embedding_field": config.query_config.image_embedding_field,
"embedding_disable_thresholds": {
|
9f96d6f3
tangwang
短query不用语义搜索
|
393
394
|
"chinese_char_limit": config.query_config.embedding_disable_chinese_char_limit,
"english_word_limit": config.query_config.embedding_disable_english_word_limit
|
33839b37
tangwang
属性值参与搜索:
|
395
396
397
|
},
"source_fields": config.query_config.source_fields
}
|
9f96d6f3
tangwang
短query不用语义搜索
|
398
|
|
33839b37
tangwang
属性值参与搜索:
|
399
|
return {
|
be52af70
tangwang
first commit
|
400
401
|
"es_index_name": config.es_index_name,
"es_settings": config.es_settings,
|
33839b37
tangwang
属性值参与搜索:
|
402
|
"field_boosts": config.field_boosts,
|
be52af70
tangwang
first commit
|
403
|
"indexes": [self._index_to_dict(index) for index in config.indexes],
|
9f96d6f3
tangwang
短query不用语义搜索
|
404
|
"query_config": query_config_dict,
|
be52af70
tangwang
first commit
|
405
406
407
408
|
"ranking": {
"expression": config.ranking.expression,
"description": config.ranking.description
},
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
409
410
411
412
413
414
|
"function_score": {
"score_mode": config.function_score.score_mode,
"boost_mode": config.function_score.boost_mode,
"functions": config.function_score.functions
},
"rerank": {
|
506c39b7
tangwang
feat(search): 统一重...
|
415
416
417
418
419
|
"rerank_window": config.rerank.rerank_window,
"service_url": config.rerank.service_url,
"timeout_sec": config.rerank.timeout_sec,
"weight_es": config.rerank.weight_es,
"weight_ai": config.rerank.weight_ai,
|
ff32d894
tangwang
rerank
|
420
421
|
"rerank_query_template": config.rerank.rerank_query_template,
"rerank_doc_template": config.rerank.rerank_doc_template,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
422
|
},
|
be52af70
tangwang
first commit
|
423
424
425
|
"spu_config": {
"enabled": config.spu_config.enabled,
"spu_field": config.spu_config.spu_field,
|
33839b37
tangwang
属性值参与搜索:
|
426
427
|
"inner_hits_size": config.spu_config.inner_hits_size,
"searchable_option_dimensions": config.spu_config.searchable_option_dimensions
|
be52af70
tangwang
first commit
|
428
429
|
}
}
|
a77693fe
tangwang
调整配置目录结构
|
430
|
|
be52af70
tangwang
first commit
|
431
|
def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]:
|
33839b37
tangwang
属性值参与搜索:
|
432
|
"""Convert IndexConfig to dictionary."""
|
b926f678
tangwang
多语言查询
|
433
|
result = {
|
be52af70
tangwang
first commit
|
434
435
436
|
"name": index.name,
"label": index.label,
"fields": index.fields,
|
33839b37
tangwang
属性值参与搜索:
|
437
|
"boost": index.boost
|
b926f678
tangwang
多语言查询
|
438
|
}
|
9f96d6f3
tangwang
短query不用语义搜索
|
439
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
440
441
|
if index.example:
result["example"] = index.example
|
33839b37
tangwang
属性值参与搜索:
|
442
443
444
|
return result
|
b926f678
tangwang
多语言查询
|
445
|
|
33839b37
tangwang
属性值参与搜索:
|
446
447
448
449
450
451
452
453
454
455
456
457
|
def load_tenant_config(tenant_id: Optional[str] = None) -> SearchConfig:
"""
Load tenant configuration (backward compatibility wrapper).
Args:
tenant_id: Ignored (kept for backward compatibility)
Returns:
SearchConfig loaded from config/config.yaml
"""
loader = ConfigLoader()
return loader.load_config()
|