be52af70
tangwang
first commit
|
1
|
"""
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
2
|
Configuration loader and validator for search engine configurations.
|
be52af70
tangwang
first commit
|
3
4
|
This module handles loading, parsing, and validating YAML configuration files
|
33839b37
tangwang
属性值参与搜索:
|
5
6
7
8
|
that define how search should be executed (NOT how data should be indexed).
索引结构由 mappings/search_products.json 定义。
此配置只定义搜索行为:字段权重、搜索域、查询策略等。
|
be52af70
tangwang
first commit
|
9
10
11
|
"""
import yaml
|
be52af70
tangwang
first commit
|
12
13
14
15
|
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field
from pathlib import Path
|
be52af70
tangwang
first commit
|
16
17
18
19
20
21
|
@dataclass
class IndexConfig:
"""Configuration for an index domain (e.g., default, title, brand)."""
name: str
label: str
|
33839b37
tangwang
属性值参与搜索:
|
22
|
fields: List[str] # List of field names to include in this search domain
|
be52af70
tangwang
first commit
|
23
24
25
|
boost: float = 1.0
example: Optional[str] = None
|
be52af70
tangwang
first commit
|
26
27
28
29
30
|
@dataclass
class QueryConfig:
"""Configuration for query processing."""
supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"])
|
2739b281
tangwang
多语言索引调整
|
31
|
default_language: str = "en"
|
33839b37
tangwang
属性值参与搜索:
|
32
33
|
# Feature flags
|
be52af70
tangwang
first commit
|
34
35
|
enable_text_embedding: bool = True
enable_query_rewrite: bool = True
|
7bc756c5
tangwang
优化 ES 查询构建
|
36
|
enable_multilang_search: bool = True # Enable multi-language search using translations
|
33839b37
tangwang
属性值参与搜索:
|
37
38
|
# Query rewrite dictionary (loaded from external file)
|
be52af70
tangwang
first commit
|
39
|
rewrite_dictionary: Dict[str, str] = field(default_factory=dict)
|
33839b37
tangwang
属性值参与搜索:
|
40
|
|
42e3aea6
tangwang
tidy
|
41
|
# Translation settings (provider/URL in services.translation)
|
33839b37
tangwang
属性值参与搜索:
|
42
|
translation_service: str = "deepl"
|
be52af70
tangwang
first commit
|
43
|
translation_api_key: Optional[str] = None
|
33839b37
tangwang
属性值参与搜索:
|
44
45
|
translation_glossary_id: Optional[str] = None
translation_context: str = "e-commerce product search"
|
42e3aea6
tangwang
tidy
|
46
|
translation_prompts: Dict[str, str] = field(default_factory=dict)
|
33839b37
tangwang
属性值参与搜索:
|
47
48
49
50
51
|
# Embedding field names
text_embedding_field: Optional[str] = "title_embedding"
image_embedding_field: Optional[str] = None
|
9f96d6f3
tangwang
短query不用语义搜索
|
52
|
# Embedding disable thresholds (disable vector search for short queries)
|
33839b37
tangwang
属性值参与搜索:
|
53
54
55
56
|
embedding_disable_chinese_char_limit: int = 4
embedding_disable_english_word_limit: int = 3
# Source fields configuration
|
cd3799c6
tangwang
tenant2 1w测试数据 mo...
|
57
|
source_fields: Optional[List[str]] = None
|
70dab99f
tangwang
add logs
|
58
59
60
|
# KNN boost configuration
knn_boost: float = 0.25 # Boost value for KNN (embedding recall)
|
13377199
tangwang
接口优化
|
61
|
|
be52af70
tangwang
first commit
|
62
63
64
65
66
|
@dataclass
class SPUConfig:
"""Configuration for SPU aggregation."""
enabled: bool = False
|
33839b37
tangwang
属性值参与搜索:
|
67
|
spu_field: Optional[str] = None
|
be52af70
tangwang
first commit
|
68
|
inner_hits_size: int = 3
|
33839b37
tangwang
属性值参与搜索:
|
69
70
|
# 配置哪些option维度参与检索(进索引、以及在线搜索)
searchable_option_dimensions: List[str] = field(default_factory=lambda: ['option1', 'option2', 'option3'])
|
be52af70
tangwang
first commit
|
71
72
73
|
@dataclass
|
a00c3672
tangwang
feat: Function Sc...
|
74
75
|
class FunctionScoreConfig:
"""Function Score配置(ES层打分规则)"""
|
33839b37
tangwang
属性值参与搜索:
|
76
77
|
score_mode: str = "sum"
boost_mode: str = "multiply"
|
a00c3672
tangwang
feat: Function Sc...
|
78
79
80
81
|
functions: List[Dict[str, Any]] = field(default_factory=list)
@dataclass
|
33839b37
tangwang
属性值参与搜索:
|
82
83
84
85
86
87
88
|
class RankingConfig:
"""Configuration for ranking expressions."""
expression: str = "bm25()"
description: str = "Default BM25 ranking"
@dataclass
|
a00c3672
tangwang
feat: Function Sc...
|
89
|
class RerankConfig:
|
42e3aea6
tangwang
tidy
|
90
|
"""重排配置(provider/URL 在 services.rerank)"""
|
506c39b7
tangwang
feat(search): 统一重...
|
91
|
rerank_window: int = 1000
|
506c39b7
tangwang
feat(search): 统一重...
|
92
93
94
|
timeout_sec: float = 15.0
weight_es: float = 0.4
weight_ai: float = 0.6
|
ff32d894
tangwang
rerank
|
95
96
|
rerank_query_template: str = "{query}"
rerank_doc_template: str = "{title}"
|
a00c3672
tangwang
feat: Function Sc...
|
97
98
99
|
@dataclass
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
100
|
class SearchConfig:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
101
|
"""Complete configuration for search engine (multi-tenant)."""
|
33839b37
tangwang
属性值参与搜索:
|
102
103
104
105
|
# 字段权重配置(用于搜索)
field_boosts: Dict[str, float]
|
be52af70
tangwang
first commit
|
106
107
|
# Index structure (query domains)
indexes: List[IndexConfig]
|
33839b37
tangwang
属性值参与搜索:
|
108
|
|
be52af70
tangwang
first commit
|
109
110
|
# Query processing
query_config: QueryConfig
|
33839b37
tangwang
属性值参与搜索:
|
111
|
|
be52af70
tangwang
first commit
|
112
113
|
# Ranking configuration
ranking: RankingConfig
|
33839b37
tangwang
属性值参与搜索:
|
114
|
|
a00c3672
tangwang
feat: Function Sc...
|
115
116
|
# Function Score configuration (ES层打分)
function_score: FunctionScoreConfig
|
33839b37
tangwang
属性值参与搜索:
|
117
|
|
a00c3672
tangwang
feat: Function Sc...
|
118
119
|
# Rerank configuration (本地重排)
rerank: RerankConfig
|
33839b37
tangwang
属性值参与搜索:
|
120
|
|
be52af70
tangwang
first commit
|
121
122
|
# SPU configuration
spu_config: SPUConfig
|
33839b37
tangwang
属性值参与搜索:
|
123
|
|
be52af70
tangwang
first commit
|
124
125
|
# ES index settings
es_index_name: str
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
126
127
128
129
130
|
# Tenant configuration
tenant_config: Dict[str, Any] = field(default_factory=dict)
# ES settings
|
be52af70
tangwang
first commit
|
131
|
es_settings: Dict[str, Any] = field(default_factory=dict)
|
42e3aea6
tangwang
tidy
|
132
133
|
# Extensible service/provider registry (translation/embedding/rerank/...)
services: Dict[str, Any] = field(default_factory=dict)
|
be52af70
tangwang
first commit
|
134
135
136
137
138
139
140
141
|
class ConfigurationError(Exception):
"""Raised when configuration validation fails."""
pass
class ConfigLoader:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
142
|
"""Loads and validates unified search engine configuration from YAML file."""
|
a77693fe
tangwang
调整配置目录结构
|
143
|
|
33839b37
tangwang
属性值参与搜索:
|
144
|
def __init__(self, config_file: Optional[Path] = None):
|
a77693fe
tangwang
调整配置目录结构
|
145
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
146
|
Initialize config loader.
|
a77693fe
tangwang
调整配置目录结构
|
147
|
|
33839b37
tangwang
属性值参与搜索:
|
148
149
|
Args:
config_file: Path to config YAML file (defaults to config/config.yaml)
|
a77693fe
tangwang
调整配置目录结构
|
150
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
151
152
153
154
155
156
157
158
|
if config_file is None:
config_file = Path(__file__).parent / "config.yaml"
self.config_file = Path(config_file)
def _load_rewrite_dictionary(self) -> Dict[str, str]:
"""Load query rewrite dictionary from external file."""
rewrite_file = Path(__file__).parent / "rewrite_dictionary.txt"
rewrite_dict = {}
|
a77693fe
tangwang
调整配置目录结构
|
159
|
|
33839b37
tangwang
属性值参与搜索:
|
160
161
|
if not rewrite_file.exists():
return rewrite_dict
|
a77693fe
tangwang
调整配置目录结构
|
162
|
|
a77693fe
tangwang
调整配置目录结构
|
163
|
try:
|
33839b37
tangwang
属性值参与搜索:
|
164
165
|
with open(rewrite_file, 'r', encoding='utf-8') as f:
for line in f:
|
a77693fe
tangwang
调整配置目录结构
|
166
|
line = line.strip()
|
a77693fe
tangwang
调整配置目录结构
|
167
168
169
|
if not line or line.startswith('#'):
continue
|
a77693fe
tangwang
调整配置目录结构
|
170
|
parts = line.split('\t')
|
33839b37
tangwang
属性值参与搜索:
|
171
172
173
174
175
|
if len(parts) >= 2:
original = parts[0].strip()
replacement = parts[1].strip()
if original and replacement:
rewrite_dict[original] = replacement
|
a77693fe
tangwang
调整配置目录结构
|
176
|
except Exception as e:
|
33839b37
tangwang
属性值参与搜索:
|
177
|
print(f"Warning: Failed to load rewrite dictionary: {e}")
|
a77693fe
tangwang
调整配置目录结构
|
178
179
|
return rewrite_dict
|
33839b37
tangwang
属性值参与搜索:
|
180
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
181
|
def load_config(self, validate: bool = True) -> SearchConfig:
|
be52af70
tangwang
first commit
|
182
|
"""
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
183
|
Load unified configuration from YAML file.
|
33839b37
tangwang
属性值参与搜索:
|
184
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
185
|
Args:
|
33839b37
tangwang
属性值参与搜索:
|
186
187
|
validate: Whether to validate configuration after loading
|
be52af70
tangwang
first commit
|
188
|
Returns:
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
189
|
SearchConfig object
|
33839b37
tangwang
属性值参与搜索:
|
190
|
|
be52af70
tangwang
first commit
|
191
|
Raises:
|
9f96d6f3
tangwang
短query不用语义搜索
|
192
|
ConfigurationError: If config file not found, invalid, or validation fails
|
be52af70
tangwang
first commit
|
193
|
"""
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
194
195
|
if not self.config_file.exists():
raise ConfigurationError(f"Configuration file not found: {self.config_file}")
|
33839b37
tangwang
属性值参与搜索:
|
196
|
|
be52af70
tangwang
first commit
|
197
|
try:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
198
|
with open(self.config_file, 'r', encoding='utf-8') as f:
|
be52af70
tangwang
first commit
|
199
200
|
config_data = yaml.safe_load(f)
except yaml.YAMLError as e:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
201
|
raise ConfigurationError(f"Invalid YAML in {self.config_file}: {e}")
|
33839b37
tangwang
属性值参与搜索:
|
202
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
203
204
205
206
207
208
209
210
211
212
|
config = self._parse_config(config_data)
# Auto-validate configuration
if validate:
errors = self.validate_config(config)
if errors:
error_msg = "Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors)
raise ConfigurationError(error_msg)
return config
|
33839b37
tangwang
属性值参与搜索:
|
213
|
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
214
215
|
def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig:
"""Parse configuration dictionary into SearchConfig object."""
|
33839b37
tangwang
属性值参与搜索:
|
216
217
218
219
220
221
|
# Parse field_boosts
field_boosts = config_data.get("field_boosts", {})
if not isinstance(field_boosts, dict):
raise ConfigurationError("field_boosts must be a dictionary")
|
be52af70
tangwang
first commit
|
222
223
224
225
|
# Parse indexes
indexes = []
for index_data in config_data.get("indexes", []):
indexes.append(self._parse_index_config(index_data))
|
33839b37
tangwang
属性值参与搜索:
|
226
|
|
be52af70
tangwang
first commit
|
227
228
|
# Parse query config
query_config_data = config_data.get("query_config", {})
|
42e3aea6
tangwang
tidy
|
229
|
services_data = config_data.get("services", {}) if isinstance(config_data.get("services", {}), dict) else {}
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
230
|
rewrite_dictionary = self._load_rewrite_dictionary()
|
9f96d6f3
tangwang
短query不用语义搜索
|
231
|
embedding_thresholds = query_config_data.get("embedding_disable_thresholds", {})
|
42e3aea6
tangwang
tidy
|
232
|
|
be52af70
tangwang
first commit
|
233
|
query_config = QueryConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
234
|
supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
|
2739b281
tangwang
多语言索引调整
|
235
|
default_language=query_config_data.get("default_language") or "en",
|
be52af70
tangwang
first commit
|
236
237
|
enable_text_embedding=query_config_data.get("enable_text_embedding", True),
enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
|
a77693fe
tangwang
调整配置目录结构
|
238
|
rewrite_dictionary=rewrite_dictionary,
|
be52af70
tangwang
first commit
|
239
|
translation_api_key=query_config_data.get("translation_api_key"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
240
|
translation_service=query_config_data.get("translation_service") or "deepl",
|
522a3964
tangwang
多语言搜索翻译的优化(deepL添...
|
241
|
translation_glossary_id=query_config_data.get("translation_glossary_id"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
242
|
translation_context=query_config_data.get("translation_context") or "e-commerce product search",
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
243
|
translation_prompts=query_config_data.get("translation_prompts", {}),
|
325eec03
tangwang
1. 日志、配置基础设施,使用优化
|
244
|
text_embedding_field=query_config_data.get("text_embedding_field"),
|
cd3799c6
tangwang
tenant2 1w测试数据 mo...
|
245
|
image_embedding_field=query_config_data.get("image_embedding_field"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
246
247
|
embedding_disable_chinese_char_limit=embedding_thresholds.get("chinese_char_limit", 4),
embedding_disable_english_word_limit=embedding_thresholds.get("english_word_limit", 3),
|
70dab99f
tangwang
add logs
|
248
249
|
source_fields=query_config_data.get("source_fields"),
knn_boost=query_config_data.get("knn_boost", 0.25)
|
be52af70
tangwang
first commit
|
250
|
)
|
33839b37
tangwang
属性值参与搜索:
|
251
|
|
be52af70
tangwang
first commit
|
252
253
254
|
# Parse ranking config
ranking_data = config_data.get("ranking", {})
ranking = RankingConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
255
256
|
expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()",
description=ranking_data.get("description") or "Default BM25 + text embedding ranking"
|
be52af70
tangwang
first commit
|
257
|
)
|
33839b37
tangwang
属性值参与搜索:
|
258
|
|
a00c3672
tangwang
feat: Function Sc...
|
259
260
261
|
# Parse Function Score configuration
fs_data = config_data.get("function_score", {})
function_score = FunctionScoreConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
262
263
264
|
score_mode=fs_data.get("score_mode") or "sum",
boost_mode=fs_data.get("boost_mode") or "multiply",
functions=fs_data.get("functions") or []
|
a00c3672
tangwang
feat: Function Sc...
|
265
|
)
|
33839b37
tangwang
属性值参与搜索:
|
266
|
|
42e3aea6
tangwang
tidy
|
267
|
# Parse Rerank (provider/URL in services.rerank)
|
a00c3672
tangwang
feat: Function Sc...
|
268
269
|
rerank_data = config_data.get("rerank", {})
rerank = RerankConfig(
|
506c39b7
tangwang
feat(search): 统一重...
|
270
|
rerank_window=int(rerank_data.get("rerank_window", 1000)),
|
506c39b7
tangwang
feat(search): 统一重...
|
271
272
273
|
timeout_sec=float(rerank_data.get("timeout_sec", 15.0)),
weight_es=float(rerank_data.get("weight_es", 0.4)),
weight_ai=float(rerank_data.get("weight_ai", 0.6)),
|
ff32d894
tangwang
rerank
|
274
275
|
rerank_query_template=str(rerank_data.get("rerank_query_template") or "{query}"),
rerank_doc_template=str(rerank_data.get("rerank_doc_template") or "{title}"),
|
a00c3672
tangwang
feat: Function Sc...
|
276
|
)
|
33839b37
tangwang
属性值参与搜索:
|
277
|
|
be52af70
tangwang
first commit
|
278
279
280
281
282
|
# Parse SPU config
spu_data = config_data.get("spu_config", {})
spu_config = SPUConfig(
enabled=spu_data.get("enabled", False),
spu_field=spu_data.get("spu_field"),
|
33839b37
tangwang
属性值参与搜索:
|
283
284
|
inner_hits_size=spu_data.get("inner_hits_size", 3),
searchable_option_dimensions=spu_data.get("searchable_option_dimensions", ['option1', 'option2', 'option3'])
|
be52af70
tangwang
first commit
|
285
|
)
|
33839b37
tangwang
属性值参与搜索:
|
286
|
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
287
288
289
|
# Parse tenant config
tenant_config_data = config_data.get("tenant_config", {})
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
290
|
return SearchConfig(
|
33839b37
tangwang
属性值参与搜索:
|
291
|
field_boosts=field_boosts,
|
be52af70
tangwang
first commit
|
292
293
294
|
indexes=indexes,
query_config=query_config,
ranking=ranking,
|
a00c3672
tangwang
feat: Function Sc...
|
295
296
|
function_score=function_score,
rerank=rerank,
|
be52af70
tangwang
first commit
|
297
|
spu_config=spu_config,
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
298
|
tenant_config=tenant_config_data,
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
299
|
es_index_name=config_data.get("es_index_name", "search_products"),
|
42e3aea6
tangwang
tidy
|
300
301
|
es_settings=config_data.get("es_settings", {}),
services=services_data
|
be52af70
tangwang
first commit
|
302
|
)
|
33839b37
tangwang
属性值参与搜索:
|
303
|
|
be52af70
tangwang
first commit
|
304
305
|
def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig:
"""Parse index configuration from dictionary."""
|
be52af70
tangwang
first commit
|
306
307
308
|
return IndexConfig(
name=index_data["name"],
label=index_data.get("label", index_data["name"]),
|
33839b37
tangwang
属性值参与搜索:
|
309
|
fields=index_data.get("fields", []),
|
be52af70
tangwang
first commit
|
310
|
boost=index_data.get("boost", 1.0),
|
33839b37
tangwang
属性值参与搜索:
|
311
|
example=index_data.get("example")
|
be52af70
tangwang
first commit
|
312
|
)
|
33839b37
tangwang
属性值参与搜索:
|
313
|
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
314
|
def validate_config(self, config: SearchConfig) -> List[str]:
|
be52af70
tangwang
first commit
|
315
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
316
317
|
Validate configuration for common errors.
|
be52af70
tangwang
first commit
|
318
|
Args:
|
33839b37
tangwang
属性值参与搜索:
|
319
320
|
config: SearchConfig to validate
|
be52af70
tangwang
first commit
|
321
|
Returns:
|
33839b37
tangwang
属性值参与搜索:
|
322
|
List of error messages (empty if valid)
|
be52af70
tangwang
first commit
|
323
324
|
"""
errors = []
|
b926f678
tangwang
多语言查询
|
325
|
|
33839b37
tangwang
属性值参与搜索:
|
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
# Validate es_index_name
if not config.es_index_name:
errors.append("es_index_name is required")
# Validate field_boosts
if not config.field_boosts:
errors.append("field_boosts is empty")
for field_name, boost in config.field_boosts.items():
if not isinstance(boost, (int, float)):
errors.append(f"field_boosts['{field_name}']: boost must be a number, got {type(boost).__name__}")
elif boost < 0:
errors.append(f"field_boosts['{field_name}']: boost must be non-negative")
# Validate indexes
if not config.indexes:
errors.append("At least one index domain must be defined")
index_names = set()
|
be52af70
tangwang
first commit
|
345
|
for index in config.indexes:
|
33839b37
tangwang
属性值参与搜索:
|
346
347
348
349
|
# Check for duplicate index names
if index.name in index_names:
errors.append(f"Duplicate index name: {index.name}")
index_names.add(index.name)
|
b926f678
tangwang
多语言查询
|
350
|
|
33839b37
tangwang
属性值参与搜索:
|
351
352
353
354
|
# Validate fields in index
if not index.fields:
errors.append(f"Index '{index.name}': fields list is empty")
|
be52af70
tangwang
first commit
|
355
356
357
358
|
# Validate SPU config
if config.spu_config.enabled:
if not config.spu_config.spu_field:
errors.append("SPU aggregation enabled but no spu_field specified")
|
33839b37
tangwang
属性值参与搜索:
|
359
360
361
362
363
364
365
366
367
368
|
# Validate query config
if not config.query_config.supported_languages:
errors.append("At least one supported language must be specified")
if config.query_config.default_language not in config.query_config.supported_languages:
errors.append(
f"Default language '{config.query_config.default_language}' "
f"not in supported languages: {config.query_config.supported_languages}"
)
|
42e3aea6
tangwang
tidy
|
369
|
|
26b910bd
tangwang
refactor service ...
|
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
|
# Validate source_fields tri-state semantics
source_fields = config.query_config.source_fields
if source_fields is not None:
if not isinstance(source_fields, list):
errors.append("query_config.source_fields must be null or list[str]")
else:
for idx, field_name in enumerate(source_fields):
if not isinstance(field_name, str) or not field_name.strip():
errors.append(
f"query_config.source_fields[{idx}] must be a non-empty string"
)
# Validate tenant config shape (default must exist in config)
tenant_cfg = config.tenant_config
if not isinstance(tenant_cfg, dict):
errors.append("tenant_config must be an object")
else:
default_cfg = tenant_cfg.get("default")
if not isinstance(default_cfg, dict):
errors.append("tenant_config.default must be configured")
else:
index_languages = default_cfg.get("index_languages")
if not isinstance(index_languages, list) or len(index_languages) == 0:
errors.append("tenant_config.default.index_languages must be a non-empty list")
|
be52af70
tangwang
first commit
|
395
|
return errors
|
33839b37
tangwang
属性值参与搜索:
|
396
397
398
|
def to_dict(self, config: SearchConfig) -> Dict[str, Any]:
"""Convert SearchConfig to dictionary representation."""
|
a77693fe
tangwang
调整配置目录结构
|
399
|
|
33839b37
tangwang
属性值参与搜索:
|
400
|
# Build query_config dict
|
9f96d6f3
tangwang
短query不用语义搜索
|
401
402
403
|
query_config_dict = {
"supported_languages": config.query_config.supported_languages,
"default_language": config.query_config.default_language,
|
9f96d6f3
tangwang
短query不用语义搜索
|
404
405
406
|
"enable_text_embedding": config.query_config.enable_text_embedding,
"enable_query_rewrite": config.query_config.enable_query_rewrite,
"translation_service": config.query_config.translation_service,
|
33839b37
tangwang
属性值参与搜索:
|
407
408
409
|
"text_embedding_field": config.query_config.text_embedding_field,
"image_embedding_field": config.query_config.image_embedding_field,
"embedding_disable_thresholds": {
|
9f96d6f3
tangwang
短query不用语义搜索
|
410
411
|
"chinese_char_limit": config.query_config.embedding_disable_chinese_char_limit,
"english_word_limit": config.query_config.embedding_disable_english_word_limit
|
33839b37
tangwang
属性值参与搜索:
|
412
413
414
|
},
"source_fields": config.query_config.source_fields
}
|
9f96d6f3
tangwang
短query不用语义搜索
|
415
|
|
33839b37
tangwang
属性值参与搜索:
|
416
|
return {
|
be52af70
tangwang
first commit
|
417
418
|
"es_index_name": config.es_index_name,
"es_settings": config.es_settings,
|
33839b37
tangwang
属性值参与搜索:
|
419
|
"field_boosts": config.field_boosts,
|
be52af70
tangwang
first commit
|
420
|
"indexes": [self._index_to_dict(index) for index in config.indexes],
|
9f96d6f3
tangwang
短query不用语义搜索
|
421
|
"query_config": query_config_dict,
|
be52af70
tangwang
first commit
|
422
423
424
425
|
"ranking": {
"expression": config.ranking.expression,
"description": config.ranking.description
},
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
426
427
428
429
430
431
|
"function_score": {
"score_mode": config.function_score.score_mode,
"boost_mode": config.function_score.boost_mode,
"functions": config.function_score.functions
},
"rerank": {
|
506c39b7
tangwang
feat(search): 统一重...
|
432
|
"rerank_window": config.rerank.rerank_window,
|
506c39b7
tangwang
feat(search): 统一重...
|
433
434
435
|
"timeout_sec": config.rerank.timeout_sec,
"weight_es": config.rerank.weight_es,
"weight_ai": config.rerank.weight_ai,
|
ff32d894
tangwang
rerank
|
436
437
|
"rerank_query_template": config.rerank.rerank_query_template,
"rerank_doc_template": config.rerank.rerank_doc_template,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
438
|
},
|
be52af70
tangwang
first commit
|
439
440
441
|
"spu_config": {
"enabled": config.spu_config.enabled,
"spu_field": config.spu_config.spu_field,
|
33839b37
tangwang
属性值参与搜索:
|
442
443
|
"inner_hits_size": config.spu_config.inner_hits_size,
"searchable_option_dimensions": config.spu_config.searchable_option_dimensions
|
42e3aea6
tangwang
tidy
|
444
445
|
},
"services": config.services,
|
be52af70
tangwang
first commit
|
446
|
}
|
a77693fe
tangwang
调整配置目录结构
|
447
|
|
be52af70
tangwang
first commit
|
448
|
def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]:
|
33839b37
tangwang
属性值参与搜索:
|
449
|
"""Convert IndexConfig to dictionary."""
|
b926f678
tangwang
多语言查询
|
450
|
result = {
|
be52af70
tangwang
first commit
|
451
452
453
|
"name": index.name,
"label": index.label,
"fields": index.fields,
|
33839b37
tangwang
属性值参与搜索:
|
454
|
"boost": index.boost
|
b926f678
tangwang
多语言查询
|
455
|
}
|
9f96d6f3
tangwang
短query不用语义搜索
|
456
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
457
458
|
if index.example:
result["example"] = index.example
|
33839b37
tangwang
属性值参与搜索:
|
459
460
|
return result
|