be52af70
tangwang
first commit
|
1
|
"""
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
2
|
Configuration loader and validator for search engine configurations.
|
be52af70
tangwang
first commit
|
3
4
|
This module handles loading, parsing, and validating YAML configuration files
|
33839b37
tangwang
属性值参与搜索:
|
5
6
7
8
|
that define how search should be executed (NOT how data should be indexed).
索引结构由 mappings/search_products.json 定义。
此配置只定义搜索行为:字段权重、搜索域、查询策略等。
|
be52af70
tangwang
first commit
|
9
10
11
|
"""
import yaml
|
be52af70
tangwang
first commit
|
12
13
14
15
|
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field
from pathlib import Path
|
be52af70
tangwang
first commit
|
16
17
18
19
20
21
|
@dataclass
class IndexConfig:
"""Configuration for an index domain (e.g., default, title, brand)."""
name: str
label: str
|
33839b37
tangwang
属性值参与搜索:
|
22
|
fields: List[str] # List of field names to include in this search domain
|
be52af70
tangwang
first commit
|
23
24
25
|
boost: float = 1.0
example: Optional[str] = None
|
be52af70
tangwang
first commit
|
26
27
28
29
30
|
@dataclass
class QueryConfig:
"""Configuration for query processing."""
supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"])
|
2739b281
tangwang
多语言索引调整
|
31
|
default_language: str = "en"
|
33839b37
tangwang
属性值参与搜索:
|
32
33
|
# Feature flags
|
be52af70
tangwang
first commit
|
34
35
|
enable_text_embedding: bool = True
enable_query_rewrite: bool = True
|
24e92141
tangwang
delete enable_mul...
|
36
|
|
33839b37
tangwang
属性值参与搜索:
|
37
|
# Query rewrite dictionary (loaded from external file)
|
be52af70
tangwang
first commit
|
38
|
rewrite_dictionary: Dict[str, str] = field(default_factory=dict)
|
33839b37
tangwang
属性值参与搜索:
|
39
|
|
42e3aea6
tangwang
tidy
|
40
|
# Translation settings (provider/URL in services.translation)
|
33839b37
tangwang
属性值参与搜索:
|
41
|
translation_service: str = "deepl"
|
be52af70
tangwang
first commit
|
42
|
translation_api_key: Optional[str] = None
|
33839b37
tangwang
属性值参与搜索:
|
43
44
|
translation_glossary_id: Optional[str] = None
translation_context: str = "e-commerce product search"
|
33839b37
tangwang
属性值参与搜索:
|
45
46
47
48
|
# Embedding field names
text_embedding_field: Optional[str] = "title_embedding"
image_embedding_field: Optional[str] = None
|
bcada818
tangwang
last
|
49
|
|
33839b37
tangwang
属性值参与搜索:
|
50
|
# Source fields configuration
|
cd3799c6
tangwang
tenant2 1w测试数据 mo...
|
51
|
source_fields: Optional[List[str]] = None
|
70dab99f
tangwang
add logs
|
52
53
54
|
# KNN boost configuration
knn_boost: float = 0.25 # Boost value for KNN (embedding recall)
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
# Dynamic text fields for multi-language retrieval
multilingual_fields: List[str] = field(
default_factory=lambda: ["title", "brief", "description", "vendor", "category_path", "category_name_text"]
)
shared_fields: List[str] = field(
default_factory=lambda: ["tags", "option1_values", "option2_values", "option3_values"]
)
core_multilingual_fields: List[str] = field(
default_factory=lambda: ["title", "brief", "vendor", "category_name_text"]
)
# Unified text strategy tuning
base_minimum_should_match: str = "75%"
translation_minimum_should_match: str = "75%"
translation_boost: float = 0.4
translation_boost_when_source_missing: float = 1.0
source_boost_when_missing: float = 0.6
|
bcada818
tangwang
last
|
73
|
original_query_fallback_boost_when_translation_missing: float = 0.2
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
74
75
76
77
|
keywords_boost: float = 0.1
enable_phrase_query: bool = True
tie_breaker_base_query: float = 0.9
tie_breaker_keywords: float = 0.9
|
13377199
tangwang
接口优化
|
78
|
|
be52af70
tangwang
first commit
|
79
80
81
82
83
|
@dataclass
class SPUConfig:
"""Configuration for SPU aggregation."""
enabled: bool = False
|
33839b37
tangwang
属性值参与搜索:
|
84
|
spu_field: Optional[str] = None
|
be52af70
tangwang
first commit
|
85
|
inner_hits_size: int = 3
|
33839b37
tangwang
属性值参与搜索:
|
86
87
|
# 配置哪些option维度参与检索(进索引、以及在线搜索)
searchable_option_dimensions: List[str] = field(default_factory=lambda: ['option1', 'option2', 'option3'])
|
be52af70
tangwang
first commit
|
88
89
90
|
@dataclass
|
a00c3672
tangwang
feat: Function Sc...
|
91
92
|
class FunctionScoreConfig:
"""Function Score配置(ES层打分规则)"""
|
33839b37
tangwang
属性值参与搜索:
|
93
94
|
score_mode: str = "sum"
boost_mode: str = "multiply"
|
a00c3672
tangwang
feat: Function Sc...
|
95
96
97
98
|
functions: List[Dict[str, Any]] = field(default_factory=list)
@dataclass
|
a00c3672
tangwang
feat: Function Sc...
|
99
|
class RerankConfig:
|
42e3aea6
tangwang
tidy
|
100
|
"""重排配置(provider/URL 在 services.rerank)"""
|
5f7d7f09
tangwang
性能测试报告.md
|
101
|
enabled: bool = True
|
c51d254f
tangwang
性能测试
|
102
|
rerank_window: int = 384
|
506c39b7
tangwang
feat(search): 统一重...
|
103
104
105
|
timeout_sec: float = 15.0
weight_es: float = 0.4
weight_ai: float = 0.6
|
ff32d894
tangwang
rerank
|
106
107
|
rerank_query_template: str = "{query}"
rerank_doc_template: str = "{title}"
|
a00c3672
tangwang
feat: Function Sc...
|
108
109
110
|
@dataclass
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
111
|
class SearchConfig:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
112
|
"""Complete configuration for search engine (multi-tenant)."""
|
33839b37
tangwang
属性值参与搜索:
|
113
114
115
116
|
# 字段权重配置(用于搜索)
field_boosts: Dict[str, float]
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
117
|
# Legacy index domains (deprecated; kept for compatibility)
|
be52af70
tangwang
first commit
|
118
|
indexes: List[IndexConfig]
|
33839b37
tangwang
属性值参与搜索:
|
119
|
|
be52af70
tangwang
first commit
|
120
121
|
# Query processing
query_config: QueryConfig
|
33839b37
tangwang
属性值参与搜索:
|
122
|
|
a00c3672
tangwang
feat: Function Sc...
|
123
124
|
# Function Score configuration (ES层打分)
function_score: FunctionScoreConfig
|
33839b37
tangwang
属性值参与搜索:
|
125
|
|
a00c3672
tangwang
feat: Function Sc...
|
126
127
|
# Rerank configuration (本地重排)
rerank: RerankConfig
|
33839b37
tangwang
属性值参与搜索:
|
128
|
|
be52af70
tangwang
first commit
|
129
130
|
# SPU configuration
spu_config: SPUConfig
|
33839b37
tangwang
属性值参与搜索:
|
131
|
|
be52af70
tangwang
first commit
|
132
133
|
# ES index settings
es_index_name: str
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
134
135
136
137
138
|
# Tenant configuration
tenant_config: Dict[str, Any] = field(default_factory=dict)
# ES settings
|
be52af70
tangwang
first commit
|
139
|
es_settings: Dict[str, Any] = field(default_factory=dict)
|
42e3aea6
tangwang
tidy
|
140
141
|
# Extensible service/provider registry (translation/embedding/rerank/...)
services: Dict[str, Any] = field(default_factory=dict)
|
be52af70
tangwang
first commit
|
142
143
144
145
146
147
148
149
|
class ConfigurationError(Exception):
"""Raised when configuration validation fails."""
pass
class ConfigLoader:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
150
|
"""Loads and validates unified search engine configuration from YAML file."""
|
a77693fe
tangwang
调整配置目录结构
|
151
|
|
33839b37
tangwang
属性值参与搜索:
|
152
|
def __init__(self, config_file: Optional[Path] = None):
|
a77693fe
tangwang
调整配置目录结构
|
153
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
154
|
Initialize config loader.
|
a77693fe
tangwang
调整配置目录结构
|
155
|
|
33839b37
tangwang
属性值参与搜索:
|
156
157
|
Args:
config_file: Path to config YAML file (defaults to config/config.yaml)
|
a77693fe
tangwang
调整配置目录结构
|
158
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
159
160
161
162
163
164
165
166
|
if config_file is None:
config_file = Path(__file__).parent / "config.yaml"
self.config_file = Path(config_file)
def _load_rewrite_dictionary(self) -> Dict[str, str]:
"""Load query rewrite dictionary from external file."""
rewrite_file = Path(__file__).parent / "rewrite_dictionary.txt"
rewrite_dict = {}
|
a77693fe
tangwang
调整配置目录结构
|
167
|
|
33839b37
tangwang
属性值参与搜索:
|
168
169
|
if not rewrite_file.exists():
return rewrite_dict
|
a77693fe
tangwang
调整配置目录结构
|
170
|
|
a77693fe
tangwang
调整配置目录结构
|
171
|
try:
|
33839b37
tangwang
属性值参与搜索:
|
172
173
|
with open(rewrite_file, 'r', encoding='utf-8') as f:
for line in f:
|
a77693fe
tangwang
调整配置目录结构
|
174
|
line = line.strip()
|
a77693fe
tangwang
调整配置目录结构
|
175
176
177
|
if not line or line.startswith('#'):
continue
|
a77693fe
tangwang
调整配置目录结构
|
178
|
parts = line.split('\t')
|
33839b37
tangwang
属性值参与搜索:
|
179
180
181
182
183
|
if len(parts) >= 2:
original = parts[0].strip()
replacement = parts[1].strip()
if original and replacement:
rewrite_dict[original] = replacement
|
a77693fe
tangwang
调整配置目录结构
|
184
|
except Exception as e:
|
33839b37
tangwang
属性值参与搜索:
|
185
|
print(f"Warning: Failed to load rewrite dictionary: {e}")
|
a77693fe
tangwang
调整配置目录结构
|
186
187
|
return rewrite_dict
|
33839b37
tangwang
属性值参与搜索:
|
188
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
189
|
def load_config(self, validate: bool = True) -> SearchConfig:
|
be52af70
tangwang
first commit
|
190
|
"""
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
191
|
Load unified configuration from YAML file.
|
33839b37
tangwang
属性值参与搜索:
|
192
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
193
|
Args:
|
33839b37
tangwang
属性值参与搜索:
|
194
195
|
validate: Whether to validate configuration after loading
|
be52af70
tangwang
first commit
|
196
|
Returns:
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
197
|
SearchConfig object
|
33839b37
tangwang
属性值参与搜索:
|
198
|
|
be52af70
tangwang
first commit
|
199
|
Raises:
|
9f96d6f3
tangwang
短query不用语义搜索
|
200
|
ConfigurationError: If config file not found, invalid, or validation fails
|
be52af70
tangwang
first commit
|
201
|
"""
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
202
203
|
if not self.config_file.exists():
raise ConfigurationError(f"Configuration file not found: {self.config_file}")
|
33839b37
tangwang
属性值参与搜索:
|
204
|
|
be52af70
tangwang
first commit
|
205
|
try:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
206
|
with open(self.config_file, 'r', encoding='utf-8') as f:
|
be52af70
tangwang
first commit
|
207
208
|
config_data = yaml.safe_load(f)
except yaml.YAMLError as e:
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
209
|
raise ConfigurationError(f"Invalid YAML in {self.config_file}: {e}")
|
33839b37
tangwang
属性值参与搜索:
|
210
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
211
212
213
214
215
216
217
218
219
220
|
config = self._parse_config(config_data)
# Auto-validate configuration
if validate:
errors = self.validate_config(config)
if errors:
error_msg = "Configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors)
raise ConfigurationError(error_msg)
return config
|
33839b37
tangwang
属性值参与搜索:
|
221
|
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
222
223
|
def _parse_config(self, config_data: Dict[str, Any]) -> SearchConfig:
"""Parse configuration dictionary into SearchConfig object."""
|
33839b37
tangwang
属性值参与搜索:
|
224
225
226
227
228
229
|
# Parse field_boosts
field_boosts = config_data.get("field_boosts", {})
if not isinstance(field_boosts, dict):
raise ConfigurationError("field_boosts must be a dictionary")
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
230
|
# Parse indexes (deprecated; compatibility only)
|
be52af70
tangwang
first commit
|
231
232
233
|
indexes = []
for index_data in config_data.get("indexes", []):
indexes.append(self._parse_index_config(index_data))
|
33839b37
tangwang
属性值参与搜索:
|
234
|
|
be52af70
tangwang
first commit
|
235
236
|
# Parse query config
query_config_data = config_data.get("query_config", {})
|
42e3aea6
tangwang
tidy
|
237
|
services_data = config_data.get("services", {}) if isinstance(config_data.get("services", {}), dict) else {}
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
238
|
rewrite_dictionary = self._load_rewrite_dictionary()
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
239
240
|
search_fields_cfg = query_config_data.get("search_fields", {})
text_strategy_cfg = query_config_data.get("text_query_strategy", {})
|
42e3aea6
tangwang
tidy
|
241
|
|
be52af70
tangwang
first commit
|
242
|
query_config = QueryConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
243
|
supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
|
2739b281
tangwang
多语言索引调整
|
244
|
default_language=query_config_data.get("default_language") or "en",
|
be52af70
tangwang
first commit
|
245
246
|
enable_text_embedding=query_config_data.get("enable_text_embedding", True),
enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
|
a77693fe
tangwang
调整配置目录结构
|
247
|
rewrite_dictionary=rewrite_dictionary,
|
be52af70
tangwang
first commit
|
248
|
translation_api_key=query_config_data.get("translation_api_key"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
249
|
translation_service=query_config_data.get("translation_service") or "deepl",
|
522a3964
tangwang
多语言搜索翻译的优化(deepL添...
|
250
|
translation_glossary_id=query_config_data.get("translation_glossary_id"),
|
9f96d6f3
tangwang
短query不用语义搜索
|
251
|
translation_context=query_config_data.get("translation_context") or "e-commerce product search",
|
325eec03
tangwang
1. 日志、配置基础设施,使用优化
|
252
|
text_embedding_field=query_config_data.get("text_embedding_field"),
|
cd3799c6
tangwang
tenant2 1w测试数据 mo...
|
253
|
image_embedding_field=query_config_data.get("image_embedding_field"),
|
70dab99f
tangwang
add logs
|
254
|
source_fields=query_config_data.get("source_fields"),
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
|
knn_boost=query_config_data.get("knn_boost", 0.25),
multilingual_fields=search_fields_cfg.get(
"multilingual_fields",
["title", "brief", "description", "vendor", "category_path", "category_name_text"],
),
shared_fields=search_fields_cfg.get(
"shared_fields",
["tags", "option1_values", "option2_values", "option3_values"],
),
core_multilingual_fields=search_fields_cfg.get(
"core_multilingual_fields",
["title", "brief", "vendor", "category_name_text"],
),
base_minimum_should_match=str(text_strategy_cfg.get("base_minimum_should_match", "75%")),
translation_minimum_should_match=str(text_strategy_cfg.get("translation_minimum_should_match", "75%")),
translation_boost=float(text_strategy_cfg.get("translation_boost", 0.4)),
translation_boost_when_source_missing=float(
text_strategy_cfg.get("translation_boost_when_source_missing", 1.0)
),
source_boost_when_missing=float(text_strategy_cfg.get("source_boost_when_missing", 0.6)),
|
bcada818
tangwang
last
|
275
276
277
|
original_query_fallback_boost_when_translation_missing=float(
text_strategy_cfg.get("original_query_fallback_boost_when_translation_missing", 0.2)
),
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
278
279
280
281
|
keywords_boost=float(text_strategy_cfg.get("keywords_boost", 0.1)),
enable_phrase_query=bool(text_strategy_cfg.get("enable_phrase_query", True)),
tie_breaker_base_query=float(text_strategy_cfg.get("tie_breaker_base_query", 0.9)),
tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)),
|
be52af70
tangwang
first commit
|
282
|
)
|
33839b37
tangwang
属性值参与搜索:
|
283
|
|
a00c3672
tangwang
feat: Function Sc...
|
284
285
286
|
# Parse Function Score configuration
fs_data = config_data.get("function_score", {})
function_score = FunctionScoreConfig(
|
9f96d6f3
tangwang
短query不用语义搜索
|
287
288
289
|
score_mode=fs_data.get("score_mode") or "sum",
boost_mode=fs_data.get("boost_mode") or "multiply",
functions=fs_data.get("functions") or []
|
a00c3672
tangwang
feat: Function Sc...
|
290
|
)
|
33839b37
tangwang
属性值参与搜索:
|
291
|
|
42e3aea6
tangwang
tidy
|
292
|
# Parse Rerank (provider/URL in services.rerank)
|
a00c3672
tangwang
feat: Function Sc...
|
293
294
|
rerank_data = config_data.get("rerank", {})
rerank = RerankConfig(
|
5f7d7f09
tangwang
性能测试报告.md
|
295
|
enabled=bool(rerank_data.get("enabled", True)),
|
c51d254f
tangwang
性能测试
|
296
|
rerank_window=int(rerank_data.get("rerank_window", 384)),
|
506c39b7
tangwang
feat(search): 统一重...
|
297
298
299
|
timeout_sec=float(rerank_data.get("timeout_sec", 15.0)),
weight_es=float(rerank_data.get("weight_es", 0.4)),
weight_ai=float(rerank_data.get("weight_ai", 0.6)),
|
ff32d894
tangwang
rerank
|
300
301
|
rerank_query_template=str(rerank_data.get("rerank_query_template") or "{query}"),
rerank_doc_template=str(rerank_data.get("rerank_doc_template") or "{title}"),
|
a00c3672
tangwang
feat: Function Sc...
|
302
|
)
|
33839b37
tangwang
属性值参与搜索:
|
303
|
|
be52af70
tangwang
first commit
|
304
305
306
307
308
|
# Parse SPU config
spu_data = config_data.get("spu_config", {})
spu_config = SPUConfig(
enabled=spu_data.get("enabled", False),
spu_field=spu_data.get("spu_field"),
|
33839b37
tangwang
属性值参与搜索:
|
309
310
|
inner_hits_size=spu_data.get("inner_hits_size", 3),
searchable_option_dimensions=spu_data.get("searchable_option_dimensions", ['option1', 'option2', 'option3'])
|
be52af70
tangwang
first commit
|
311
|
)
|
33839b37
tangwang
属性值参与搜索:
|
312
|
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
313
314
315
|
# Parse tenant config
tenant_config_data = config_data.get("tenant_config", {})
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
316
|
return SearchConfig(
|
33839b37
tangwang
属性值参与搜索:
|
317
|
field_boosts=field_boosts,
|
be52af70
tangwang
first commit
|
318
319
|
indexes=indexes,
query_config=query_config,
|
a00c3672
tangwang
feat: Function Sc...
|
320
321
|
function_score=function_score,
rerank=rerank,
|
be52af70
tangwang
first commit
|
322
|
spu_config=spu_config,
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
323
|
tenant_config=tenant_config_data,
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
324
|
es_index_name=config_data.get("es_index_name", "search_products"),
|
42e3aea6
tangwang
tidy
|
325
326
|
es_settings=config_data.get("es_settings", {}),
services=services_data
|
be52af70
tangwang
first commit
|
327
|
)
|
33839b37
tangwang
属性值参与搜索:
|
328
|
|
be52af70
tangwang
first commit
|
329
330
|
def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig:
"""Parse index configuration from dictionary."""
|
be52af70
tangwang
first commit
|
331
332
333
|
return IndexConfig(
name=index_data["name"],
label=index_data.get("label", index_data["name"]),
|
33839b37
tangwang
属性值参与搜索:
|
334
|
fields=index_data.get("fields", []),
|
be52af70
tangwang
first commit
|
335
|
boost=index_data.get("boost", 1.0),
|
33839b37
tangwang
属性值参与搜索:
|
336
|
example=index_data.get("example")
|
be52af70
tangwang
first commit
|
337
|
)
|
33839b37
tangwang
属性值参与搜索:
|
338
|
|
9cb7528e
tangwang
店匠体系数据的搜索:mock da...
|
339
|
def validate_config(self, config: SearchConfig) -> List[str]:
|
be52af70
tangwang
first commit
|
340
|
"""
|
33839b37
tangwang
属性值参与搜索:
|
341
342
|
Validate configuration for common errors.
|
be52af70
tangwang
first commit
|
343
|
Args:
|
33839b37
tangwang
属性值参与搜索:
|
344
345
|
config: SearchConfig to validate
|
be52af70
tangwang
first commit
|
346
|
Returns:
|
33839b37
tangwang
属性值参与搜索:
|
347
|
List of error messages (empty if valid)
|
be52af70
tangwang
first commit
|
348
349
|
"""
errors = []
|
b926f678
tangwang
多语言查询
|
350
|
|
33839b37
tangwang
属性值参与搜索:
|
351
352
353
354
355
356
357
358
359
360
361
362
363
364
|
# Validate es_index_name
if not config.es_index_name:
errors.append("es_index_name is required")
# Validate field_boosts
if not config.field_boosts:
errors.append("field_boosts is empty")
for field_name, boost in config.field_boosts.items():
if not isinstance(boost, (int, float)):
errors.append(f"field_boosts['{field_name}']: boost must be a number, got {type(boost).__name__}")
elif boost < 0:
errors.append(f"field_boosts['{field_name}']: boost must be non-negative")
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
365
|
# Validate indexes (deprecated, optional)
|
33839b37
tangwang
属性值参与搜索:
|
366
|
index_names = set()
|
be52af70
tangwang
first commit
|
367
|
for index in config.indexes:
|
33839b37
tangwang
属性值参与搜索:
|
368
369
370
371
|
# Check for duplicate index names
if index.name in index_names:
errors.append(f"Duplicate index name: {index.name}")
index_names.add(index.name)
|
b926f678
tangwang
多语言查询
|
372
|
|
33839b37
tangwang
属性值参与搜索:
|
373
374
375
376
|
# Validate fields in index
if not index.fields:
errors.append(f"Index '{index.name}': fields list is empty")
|
be52af70
tangwang
first commit
|
377
378
379
380
|
# Validate SPU config
if config.spu_config.enabled:
if not config.spu_config.spu_field:
errors.append("SPU aggregation enabled but no spu_field specified")
|
33839b37
tangwang
属性值参与搜索:
|
381
382
383
384
385
386
387
388
389
390
|
# Validate query config
if not config.query_config.supported_languages:
errors.append("At least one supported language must be specified")
if config.query_config.default_language not in config.query_config.supported_languages:
errors.append(
f"Default language '{config.query_config.default_language}' "
f"not in supported languages: {config.query_config.supported_languages}"
)
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
|
# Validate dynamic search fields
def _validate_str_list(name: str, values: List[str]) -> None:
if not isinstance(values, list) or not values:
errors.append(f"query_config.{name} must be a non-empty list[str]")
return
for i, val in enumerate(values):
if not isinstance(val, str) or not val.strip():
errors.append(f"query_config.{name}[{i}] must be a non-empty string")
_validate_str_list("multilingual_fields", config.query_config.multilingual_fields)
_validate_str_list("shared_fields", config.query_config.shared_fields)
_validate_str_list("core_multilingual_fields", config.query_config.core_multilingual_fields)
core_set = set(config.query_config.core_multilingual_fields)
multi_set = set(config.query_config.multilingual_fields)
if not core_set.issubset(multi_set):
errors.append("query_config.core_multilingual_fields must be subset of multilingual_fields")
# Validate text query strategy numbers
for name in (
"translation_boost",
"translation_boost_when_source_missing",
"source_boost_when_missing",
|
bcada818
tangwang
last
|
415
|
"original_query_fallback_boost_when_translation_missing",
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
416
417
418
419
420
421
422
423
424
|
"keywords_boost",
"tie_breaker_base_query",
"tie_breaker_keywords",
):
value = getattr(config.query_config, name, None)
if not isinstance(value, (int, float)):
errors.append(f"query_config.{name} must be a number")
elif value < 0:
errors.append(f"query_config.{name} must be non-negative")
|
42e3aea6
tangwang
tidy
|
425
|
|
26b910bd
tangwang
refactor service ...
|
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
|
# Validate source_fields tri-state semantics
source_fields = config.query_config.source_fields
if source_fields is not None:
if not isinstance(source_fields, list):
errors.append("query_config.source_fields must be null or list[str]")
else:
for idx, field_name in enumerate(source_fields):
if not isinstance(field_name, str) or not field_name.strip():
errors.append(
f"query_config.source_fields[{idx}] must be a non-empty string"
)
# Validate tenant config shape (default must exist in config)
tenant_cfg = config.tenant_config
if not isinstance(tenant_cfg, dict):
errors.append("tenant_config must be an object")
else:
default_cfg = tenant_cfg.get("default")
if not isinstance(default_cfg, dict):
errors.append("tenant_config.default must be configured")
else:
index_languages = default_cfg.get("index_languages")
if not isinstance(index_languages, list) or len(index_languages) == 0:
errors.append("tenant_config.default.index_languages must be a non-empty list")
|
be52af70
tangwang
first commit
|
451
|
return errors
|
33839b37
tangwang
属性值参与搜索:
|
452
453
454
|
def to_dict(self, config: SearchConfig) -> Dict[str, Any]:
"""Convert SearchConfig to dictionary representation."""
|
a77693fe
tangwang
调整配置目录结构
|
455
|
|
33839b37
tangwang
属性值参与搜索:
|
456
|
# Build query_config dict
|
9f96d6f3
tangwang
短query不用语义搜索
|
457
458
459
|
query_config_dict = {
"supported_languages": config.query_config.supported_languages,
"default_language": config.query_config.default_language,
|
9f96d6f3
tangwang
短query不用语义搜索
|
460
461
462
|
"enable_text_embedding": config.query_config.enable_text_embedding,
"enable_query_rewrite": config.query_config.enable_query_rewrite,
"translation_service": config.query_config.translation_service,
|
33839b37
tangwang
属性值参与搜索:
|
463
464
|
"text_embedding_field": config.query_config.text_embedding_field,
"image_embedding_field": config.query_config.image_embedding_field,
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
465
466
467
468
469
470
471
472
473
474
475
476
|
"source_fields": config.query_config.source_fields,
"search_fields": {
"multilingual_fields": config.query_config.multilingual_fields,
"shared_fields": config.query_config.shared_fields,
"core_multilingual_fields": config.query_config.core_multilingual_fields,
},
"text_query_strategy": {
"base_minimum_should_match": config.query_config.base_minimum_should_match,
"translation_minimum_should_match": config.query_config.translation_minimum_should_match,
"translation_boost": config.query_config.translation_boost,
"translation_boost_when_source_missing": config.query_config.translation_boost_when_source_missing,
"source_boost_when_missing": config.query_config.source_boost_when_missing,
|
bcada818
tangwang
last
|
477
478
479
|
"original_query_fallback_boost_when_translation_missing": (
config.query_config.original_query_fallback_boost_when_translation_missing
),
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
480
481
482
483
484
|
"keywords_boost": config.query_config.keywords_boost,
"enable_phrase_query": config.query_config.enable_phrase_query,
"tie_breaker_base_query": config.query_config.tie_breaker_base_query,
"tie_breaker_keywords": config.query_config.tie_breaker_keywords,
}
|
33839b37
tangwang
属性值参与搜索:
|
485
|
}
|
9f96d6f3
tangwang
短query不用语义搜索
|
486
|
|
33839b37
tangwang
属性值参与搜索:
|
487
|
return {
|
be52af70
tangwang
first commit
|
488
489
|
"es_index_name": config.es_index_name,
"es_settings": config.es_settings,
|
33839b37
tangwang
属性值参与搜索:
|
490
|
"field_boosts": config.field_boosts,
|
be52af70
tangwang
first commit
|
491
|
"indexes": [self._index_to_dict(index) for index in config.indexes],
|
9f96d6f3
tangwang
短query不用语义搜索
|
492
|
"query_config": query_config_dict,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
493
494
495
496
497
498
|
"function_score": {
"score_mode": config.function_score.score_mode,
"boost_mode": config.function_score.boost_mode,
"functions": config.function_score.functions
},
"rerank": {
|
5f7d7f09
tangwang
性能测试报告.md
|
499
|
"enabled": config.rerank.enabled,
|
506c39b7
tangwang
feat(search): 统一重...
|
500
|
"rerank_window": config.rerank.rerank_window,
|
506c39b7
tangwang
feat(search): 统一重...
|
501
502
503
|
"timeout_sec": config.rerank.timeout_sec,
"weight_es": config.rerank.weight_es,
"weight_ai": config.rerank.weight_ai,
|
ff32d894
tangwang
rerank
|
504
505
|
"rerank_query_template": config.rerank.rerank_query_template,
"rerank_doc_template": config.rerank.rerank_doc_template,
|
1f6d15fa
tangwang
重构:SPU级别索引、统一索引架构...
|
506
|
},
|
be52af70
tangwang
first commit
|
507
508
509
|
"spu_config": {
"enabled": config.spu_config.enabled,
"spu_field": config.spu_config.spu_field,
|
33839b37
tangwang
属性值参与搜索:
|
510
511
|
"inner_hits_size": config.spu_config.inner_hits_size,
"searchable_option_dimensions": config.spu_config.searchable_option_dimensions
|
42e3aea6
tangwang
tidy
|
512
513
|
},
"services": config.services,
|
be52af70
tangwang
first commit
|
514
|
}
|
a77693fe
tangwang
调整配置目录结构
|
515
|
|
be52af70
tangwang
first commit
|
516
|
def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]:
|
33839b37
tangwang
属性值参与搜索:
|
517
|
"""Convert IndexConfig to dictionary."""
|
b926f678
tangwang
多语言查询
|
518
|
result = {
|
be52af70
tangwang
first commit
|
519
520
521
|
"name": index.name,
"label": index.label,
"fields": index.fields,
|
33839b37
tangwang
属性值参与搜索:
|
522
|
"boost": index.boost
|
b926f678
tangwang
多语言查询
|
523
|
}
|
9f96d6f3
tangwang
短query不用语义搜索
|
524
|
|
9f96d6f3
tangwang
短query不用语义搜索
|
525
526
|
if index.example:
result["example"] = index.example
|
33839b37
tangwang
属性值参与搜索:
|
527
528
|
return result
|