Commit a00c367212a5cc73e5a187121eeae31614b9765d
1 parent
43f1139f
feat: Function Score配置化 - 基于ES原生能力
核心改动:
1. 配置化打分规则
- 新增FunctionScoreConfig和RerankConfig配置类
- 支持filter_weight、field_value_factor、decay三种ES原生function
- 从代码中移除硬编码的打分逻辑
2. 配置模型定义
- FunctionScoreConfig: score_mode, boost_mode, functions
- RerankConfig: enabled, expression(当前禁用)
- 添加到CustomerConfig中
3. 查询构建器改造
- MultiLanguageQueryBuilder.init添加function_score_config引用
- _build_score_functions从配置动态构建ES functions
- 支持配置的score_mode和boost_mode
4. 配置文件示例
- 添加完整的function_score配置示例
- 包含3种function类型的详细注释
- 提供常见场景的配置模板
5. ES原生能力支持
- Filter+Weight: 条件匹配提权
- Field Value Factor: 字段值映射打分
* modifier支持: none, log, log1p, log2p, ln, ln1p, ln2p, square, sqrt, reciprocal
- Decay Functions: 衰减函数
* 支持: gauss, exp, linear
配置示例:
- 7天新品提权(weight: 1.3)
- 30天新品提权(weight: 1.15)
- 有视频提权(weight: 1.05)
- 销量因子(field_value_factor + log1p)
- 时间衰减(gauss decay)
优势:
✓ 配置化 - 客户自己调整,无需改代码
✓ 基于ES原生 - 性能最优,功能完整
✓ 灵活易用 - YAML格式,有示例和注释
✓ 统一约定 - function_score必需,简化设计
参考:https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-function-score-query
Showing
6 changed files
with
1159 additions
and
20 deletions
Show diff stats
.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.基于ES_fuction_score原生能力优化.md
0 → 100644
| ... | ... | @@ -0,0 +1,378 @@ |
| 1 | +<!-- 25a9f060-257b-486f-b598-bbb062d1adf9 af9cc72d-91a6-4c51-af4f-90ed22b18af7 --> | |
| 2 | +# Function Score配置化实施方案 | |
| 3 | + | |
| 4 | +## ES Function Score能力清单(基于官方文档) | |
| 5 | + | |
| 6 | +参考:https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-function-score-query | |
| 7 | + | |
| 8 | +### 支持的Function类型 | |
| 9 | + | |
| 10 | +1. **Weight** - 固定权重(可带filter条件) | |
| 11 | +2. **Field Value Factor** - 字段值映射 | |
| 12 | + | |
| 13 | + - modifier支持:`none`, `log`, `log1p`, `log2p`, `ln`, `ln1p`, `ln2p`, `square`, `sqrt`, `reciprocal` | |
| 14 | + | |
| 15 | +3. **Decay Functions** - 衰减函数 | |
| 16 | + | |
| 17 | + - 类型:`gauss`, `exp`, `linear` | |
| 18 | + - 适用字段:numeric, date, geopoint | |
| 19 | + | |
| 20 | +4. **Random Score** - 随机分数 | |
| 21 | +5. **Script Score** - 脚本打分 | |
| 22 | + | |
| 23 | +### boost_mode选项 | |
| 24 | + | |
| 25 | +`multiply`, `replace`, `sum`, `avg`, `max`, `min` | |
| 26 | + | |
| 27 | +### score_mode选项 | |
| 28 | + | |
| 29 | +`multiply`, `sum`, `avg`, `first`, `max`, `min` | |
| 30 | + | |
| 31 | +## 配置设计(简化版) | |
| 32 | + | |
| 33 | +### `/home/tw/SearchEngine/config/schema/customer1/config.yaml` | |
| 34 | + | |
| 35 | +```yaml | |
| 36 | +# Function Score配置(ES层打分规则) | |
| 37 | +# 约定:function_score是必需的,不需要enabled开关 | |
| 38 | +function_score: | |
| 39 | + score_mode: "sum" # multiply, sum, avg, first, max, min | |
| 40 | + boost_mode: "multiply" # multiply, replace, sum, avg, max, min | |
| 41 | + | |
| 42 | + functions: | |
| 43 | + # 1. Filter + Weight(条件权重) | |
| 44 | + - type: "filter_weight" | |
| 45 | + name: "7天新品提权" | |
| 46 | + filter: | |
| 47 | + range: | |
| 48 | + days_since_last_update: | |
| 49 | + lte: 7 | |
| 50 | + weight: 1.3 | |
| 51 | + | |
| 52 | + - type: "filter_weight" | |
| 53 | + name: "30天新品提权" | |
| 54 | + filter: | |
| 55 | + range: | |
| 56 | + days_since_last_update: | |
| 57 | + lte: 30 | |
| 58 | + weight: 1.15 | |
| 59 | + | |
| 60 | + - type: "filter_weight" | |
| 61 | + name: "有视频提权" | |
| 62 | + filter: | |
| 63 | + term: | |
| 64 | + is_video: true | |
| 65 | + weight: 1.05 | |
| 66 | + | |
| 67 | + - type: "filter_weight" | |
| 68 | + name: "特定标签提权" | |
| 69 | + filter: | |
| 70 | + term: | |
| 71 | + labelId_by_skuId_essa_3: 165 | |
| 72 | + weight: 1.1 | |
| 73 | + | |
| 74 | + - type: "filter_weight" | |
| 75 | + name: "主力价格段" | |
| 76 | + filter: | |
| 77 | + range: | |
| 78 | + price: | |
| 79 | + gte: 50 | |
| 80 | + lte: 200 | |
| 81 | + weight: 1.1 | |
| 82 | + | |
| 83 | + # 2. Field Value Factor(字段值映射) | |
| 84 | + - type: "field_value_factor" | |
| 85 | + name: "在售天数因子" | |
| 86 | + field: "on_sell_days_boost" | |
| 87 | + factor: 1.0 | |
| 88 | + modifier: "none" # none, log, log1p, log2p, ln, ln1p, ln2p, square, sqrt, reciprocal | |
| 89 | + missing: 1.0 | |
| 90 | + | |
| 91 | + - type: "field_value_factor" | |
| 92 | + name: "销量因子" | |
| 93 | + field: "sales_count" | |
| 94 | + factor: 0.01 | |
| 95 | + modifier: "log1p" # 对数映射,避免极端值 | |
| 96 | + missing: 1.0 | |
| 97 | + | |
| 98 | + - type: "field_value_factor" | |
| 99 | + name: "评分因子" | |
| 100 | + field: "rating" | |
| 101 | + factor: 0.5 | |
| 102 | + modifier: "sqrt" | |
| 103 | + missing: 1.0 | |
| 104 | + | |
| 105 | + # 3. Decay Functions(衰减函数) | |
| 106 | + - type: "decay" | |
| 107 | + name: "时间衰减" | |
| 108 | + function: "gauss" # gauss, exp, linear | |
| 109 | + field: "create_time" | |
| 110 | + origin: "now" | |
| 111 | + scale: "30d" | |
| 112 | + offset: "0d" | |
| 113 | + decay: 0.5 | |
| 114 | + | |
| 115 | + - type: "decay" | |
| 116 | + name: "价格衰减" | |
| 117 | + function: "linear" | |
| 118 | + field: "price" | |
| 119 | + origin: "100" | |
| 120 | + scale: "50" | |
| 121 | + decay: 0.5 | |
| 122 | + | |
| 123 | +# Rerank配置(本地重排,当前禁用) | |
| 124 | +rerank: | |
| 125 | + enabled: false | |
| 126 | + expression: "bm25() + 0.2*text_embedding_relevance()" | |
| 127 | + description: "Local reranking (disabled, use ES function_score instead)" | |
| 128 | +``` | |
| 129 | + | |
| 130 | +## 实施步骤 | |
| 131 | + | |
| 132 | +### 1. 定义配置模型 | |
| 133 | + | |
| 134 | +**文件**: `/home/tw/SearchEngine/config/models.py`(新建或更新customer_config.py) | |
| 135 | + | |
| 136 | +```python | |
| 137 | +from dataclasses import dataclass, field | |
| 138 | +from typing import List, Dict, Any, Optional, Literal | |
| 139 | + | |
| 140 | +@dataclass | |
| 141 | +class FilterWeightFunction: | |
| 142 | + """Filter + Weight 打分函数""" | |
| 143 | + type: Literal["filter_weight"] = "filter_weight" | |
| 144 | + name: str = "" | |
| 145 | + filter: Dict[str, Any] = field(default_factory=dict) | |
| 146 | + weight: float = 1.0 | |
| 147 | + | |
| 148 | +@dataclass | |
| 149 | +class FieldValueFactorFunction: | |
| 150 | + """Field Value Factor 打分函数""" | |
| 151 | + type: Literal["field_value_factor"] = "field_value_factor" | |
| 152 | + name: str = "" | |
| 153 | + field: str = "" | |
| 154 | + factor: float = 1.0 | |
| 155 | + modifier: Literal["none", "log", "log1p", "log2p", "ln", "ln1p", "ln2p", "square", "sqrt", "reciprocal"] = "none" | |
| 156 | + missing: float = 1.0 | |
| 157 | + | |
| 158 | +@dataclass | |
| 159 | +class DecayFunction: | |
| 160 | + """Decay 衰减函数""" | |
| 161 | + type: Literal["decay"] = "decay" | |
| 162 | + name: str = "" | |
| 163 | + function: Literal["gauss", "exp", "linear"] = "gauss" | |
| 164 | + field: str = "" | |
| 165 | + origin: str = "" # 支持数值、日期、坐标 | |
| 166 | + scale: str = "" | |
| 167 | + offset: str = "0" | |
| 168 | + decay: float = 0.5 | |
| 169 | + | |
| 170 | +@dataclass | |
| 171 | +class FunctionScoreConfig: | |
| 172 | + """Function Score配置""" | |
| 173 | + enabled: bool = True | |
| 174 | + score_mode: Literal["multiply", "sum", "avg", "first", "max", "min"] = "sum" | |
| 175 | + boost_mode: Literal["multiply", "replace", "sum", "avg", "max", "min"] = "multiply" | |
| 176 | + max_boost: Optional[float] = None | |
| 177 | + functions: List[Dict[str, Any]] = field(default_factory=list) | |
| 178 | + | |
| 179 | +@dataclass | |
| 180 | +class RerankConfig: | |
| 181 | + """本地重排配置""" | |
| 182 | + enabled: bool = False | |
| 183 | + expression: str = "" | |
| 184 | + description: str = "" | |
| 185 | +``` | |
| 186 | + | |
| 187 | +### 2. 修改 MultiLanguageQueryBuilder | |
| 188 | + | |
| 189 | +**文件**: `/home/tw/SearchEngine/search/multilang_query_builder.py` | |
| 190 | + | |
| 191 | +**修改 init 方法**: | |
| 192 | + | |
| 193 | +```python | |
| 194 | +def __init__(self, config, index_name, text_embedding_field=None, image_embedding_field=None): | |
| 195 | + super().__init__(config, index_name, text_embedding_field, image_embedding_field) | |
| 196 | + self.function_score_config = getattr(config, 'function_score', None) | |
| 197 | +``` | |
| 198 | + | |
| 199 | +**完全重写 _build_score_functions 方法**(212-237行): | |
| 200 | + | |
| 201 | +```python | |
| 202 | +def _build_score_functions(self) -> List[Dict[str, Any]]: | |
| 203 | + """ | |
| 204 | + 从配置构建 function_score 的打分函数列表 | |
| 205 | + | |
| 206 | + Returns: | |
| 207 | + 打分函数列表(ES原生格式) | |
| 208 | + """ | |
| 209 | + if not self.function_score_config or not self.function_score_config.enabled: | |
| 210 | + return [] | |
| 211 | + | |
| 212 | + functions = [] | |
| 213 | + | |
| 214 | + for func_config in self.function_score_config.functions: | |
| 215 | + func_type = func_config.get('type') | |
| 216 | + | |
| 217 | + if func_type == 'filter_weight': | |
| 218 | + # Filter + Weight | |
| 219 | + functions.append({ | |
| 220 | + "filter": func_config['filter'], | |
| 221 | + "weight": func_config.get('weight', 1.0) | |
| 222 | + }) | |
| 223 | + | |
| 224 | + elif func_type == 'field_value_factor': | |
| 225 | + # Field Value Factor | |
| 226 | + functions.append({ | |
| 227 | + "field_value_factor": { | |
| 228 | + "field": func_config['field'], | |
| 229 | + "factor": func_config.get('factor', 1.0), | |
| 230 | + "modifier": func_config.get('modifier', 'none'), | |
| 231 | + "missing": func_config.get('missing', 1.0) | |
| 232 | + } | |
| 233 | + }) | |
| 234 | + | |
| 235 | + elif func_type == 'decay': | |
| 236 | + # Decay Function (gauss/exp/linear) | |
| 237 | + decay_func = func_config.get('function', 'gauss') | |
| 238 | + field = func_config['field'] | |
| 239 | + | |
| 240 | + decay_params = { | |
| 241 | + "origin": func_config.get('origin', 'now'), | |
| 242 | + "scale": func_config['scale'] | |
| 243 | + } | |
| 244 | + | |
| 245 | + if 'offset' in func_config: | |
| 246 | + decay_params['offset'] = func_config['offset'] | |
| 247 | + if 'decay' in func_config: | |
| 248 | + decay_params['decay'] = func_config['decay'] | |
| 249 | + | |
| 250 | + functions.append({ | |
| 251 | + decay_func: { | |
| 252 | + field: decay_params | |
| 253 | + } | |
| 254 | + }) | |
| 255 | + | |
| 256 | + return functions | |
| 257 | +``` | |
| 258 | + | |
| 259 | +**修改 build_multilang_query 方法**(使用配置的score_mode和boost_mode): | |
| 260 | + | |
| 261 | +```python | |
| 262 | +# 包裹function_score | |
| 263 | +fs_config = self.function_score_config | |
| 264 | +function_score_query = { | |
| 265 | + "function_score": { | |
| 266 | + "query": outer_bool, | |
| 267 | + "functions": self._build_score_functions(), | |
| 268 | + "score_mode": fs_config.score_mode if fs_config else "sum", | |
| 269 | + "boost_mode": fs_config.boost_mode if fs_config else "multiply" | |
| 270 | + } | |
| 271 | +} | |
| 272 | + | |
| 273 | +if fs_config and fs_config.max_boost: | |
| 274 | + function_score_query["function_score"]["max_boost"] = fs_config.max_boost | |
| 275 | +``` | |
| 276 | + | |
| 277 | +### 3. 更新配置加载器 | |
| 278 | + | |
| 279 | +**文件**: `/home/tw/SearchEngine/config/__init__.py` 或 `config/loader.py` | |
| 280 | + | |
| 281 | +确保正确加载 `function_score` 和 `rerank` 配置段 | |
| 282 | + | |
| 283 | +### 4. 更新示例配置 | |
| 284 | + | |
| 285 | +**文件**: `/home/tw/SearchEngine/config/schema/customer1/config.yaml` | |
| 286 | + | |
| 287 | +在 `ranking` 配置后添加新配置(参见上面完整YAML) | |
| 288 | + | |
| 289 | +### 5. 测试验证 | |
| 290 | + | |
| 291 | +**测试用例**: | |
| 292 | + | |
| 293 | +1. **测试filter_weight** | |
| 294 | +```bash | |
| 295 | +curl -X POST /search/ -d '{"query": "玩具", "debug": true}' | |
| 296 | +# 检查 functions 中是否包含 filter+weight | |
| 297 | +# 验证 days_since_last_update <= 30 的商品分数更高 | |
| 298 | +``` | |
| 299 | + | |
| 300 | +2. **测试field_value_factor** | |
| 301 | +```bash | |
| 302 | +# 检查 on_sell_days_boost 字段是否影响打分 | |
| 303 | +# 验证 modifier 是否生效 | |
| 304 | +``` | |
| 305 | + | |
| 306 | +3. **测试decay函数** | |
| 307 | +```bash | |
| 308 | +# 验证时间衰减是否生效 | |
| 309 | +# 新商品应该得分更高 | |
| 310 | +``` | |
| 311 | + | |
| 312 | +4. **测试多个functions组合** | |
| 313 | +```bash | |
| 314 | +# 验证 score_mode 和 boost_mode 是否正确工作 | |
| 315 | +``` | |
| 316 | + | |
| 317 | + | |
| 318 | +## 配置示例(完整) | |
| 319 | + | |
| 320 | +### 简单配置(适合快速上手) | |
| 321 | + | |
| 322 | +```yaml | |
| 323 | +function_score: | |
| 324 | + enabled: true | |
| 325 | + functions: | |
| 326 | + - type: "filter_weight" | |
| 327 | + name: "新品提权" | |
| 328 | + filter: {range: {days_since_last_update: {lte: 30}}} | |
| 329 | + weight: 1.2 | |
| 330 | + | |
| 331 | + - type: "filter_weight" | |
| 332 | + name: "有视频" | |
| 333 | + filter: {term: {is_video: true}} | |
| 334 | + weight: 1.05 | |
| 335 | +``` | |
| 336 | + | |
| 337 | +### 高级配置(适合深度定制) | |
| 338 | + | |
| 339 | +```yaml | |
| 340 | +function_score: | |
| 341 | + enabled: true | |
| 342 | + score_mode: "sum" | |
| 343 | + boost_mode: "multiply" | |
| 344 | + max_boost: 5.0 | |
| 345 | + | |
| 346 | + functions: | |
| 347 | + # 条件权重 | |
| 348 | + - type: "filter_weight" | |
| 349 | + name: "7天新品" | |
| 350 | + filter: {range: {days_since_last_update: {lte: 7}}} | |
| 351 | + weight: 1.3 | |
| 352 | + | |
| 353 | + # 字段值因子 | |
| 354 | + - type: "field_value_factor" | |
| 355 | + name: "销量因子" | |
| 356 | + field: "sales_count" | |
| 357 | + factor: 0.01 | |
| 358 | + modifier: "log1p" | |
| 359 | + missing: 1.0 | |
| 360 | + | |
| 361 | + # 衰减函数 | |
| 362 | + - type: "decay" | |
| 363 | + name: "时间衰减" | |
| 364 | + function: "gauss" | |
| 365 | + field: "create_time" | |
| 366 | + origin: "now" | |
| 367 | + scale: "30d" | |
| 368 | + offset: "0d" | |
| 369 | + decay: 0.5 | |
| 370 | +``` | |
| 371 | + | |
| 372 | +## 优势 | |
| 373 | + | |
| 374 | +1. **基于ES原生能力** - 所有配置都是ES直接支持的 | |
| 375 | +2. **配置灵活** - YAML格式,易于理解和修改 | |
| 376 | +3. **无需改代码** - 客户自己调整配置即可 | |
| 377 | +4. **类型安全** - Pydantic验证配置正确性 | |
| 378 | +5. **文档完善** - 每个function有name和description | |
| 0 | 379 | \ No newline at end of file | ... | ... |
| ... | ... | @@ -0,0 +1,587 @@ |
| 1 | +# Function Score配置化完成报告 | |
| 2 | + | |
| 3 | +**完成日期**: 2025-11-12 | |
| 4 | +**核心原则**: 配置化、基于ES原生能力、简洁明了 | |
| 5 | + | |
| 6 | +--- | |
| 7 | + | |
| 8 | +## 实施内容 | |
| 9 | + | |
| 10 | +### 1. 新增配置类 | |
| 11 | + | |
| 12 | +**文件**: `/home/tw/SearchEngine/config/config_loader.py` | |
| 13 | + | |
| 14 | +新增两个配置类: | |
| 15 | + | |
| 16 | +```python | |
| 17 | +@dataclass | |
| 18 | +class FunctionScoreConfig: | |
| 19 | + """Function Score配置(ES层打分规则)""" | |
| 20 | + score_mode: str = "sum" | |
| 21 | + boost_mode: str = "multiply" | |
| 22 | + functions: List[Dict[str, Any]] = field(default_factory=list) | |
| 23 | + | |
| 24 | +@dataclass | |
| 25 | +class RerankConfig: | |
| 26 | + """本地重排配置(当前禁用)""" | |
| 27 | + enabled: bool = False | |
| 28 | + expression: str = "" | |
| 29 | + description: str = "" | |
| 30 | +``` | |
| 31 | + | |
| 32 | +添加到 `CustomerConfig`: | |
| 33 | +```python | |
| 34 | +class CustomerConfig: | |
| 35 | + # ... 其他字段 | |
| 36 | + function_score: FunctionScoreConfig | |
| 37 | + rerank: RerankConfig | |
| 38 | +``` | |
| 39 | + | |
| 40 | +### 2. 修改查询构建器 | |
| 41 | + | |
| 42 | +**文件**: `/home/tw/SearchEngine/search/multilang_query_builder.py` | |
| 43 | + | |
| 44 | +**修改init方法**: | |
| 45 | +```python | |
| 46 | +def __init__(self, config, ...): | |
| 47 | + self.config = config | |
| 48 | + self.function_score_config = config.function_score | |
| 49 | +``` | |
| 50 | + | |
| 51 | +**重写_build_score_functions方法**(支持3种function类型): | |
| 52 | +```python | |
| 53 | +def _build_score_functions(self) -> List[Dict[str, Any]]: | |
| 54 | + """从配置构建function_score的打分函数列表""" | |
| 55 | + if not self.function_score_config or not self.function_score_config.functions: | |
| 56 | + return [] | |
| 57 | + | |
| 58 | + functions = [] | |
| 59 | + | |
| 60 | + for func_config in self.function_score_config.functions: | |
| 61 | + func_type = func_config.get('type') | |
| 62 | + | |
| 63 | + if func_type == 'filter_weight': | |
| 64 | + # Filter + Weight | |
| 65 | + functions.append({ | |
| 66 | + "filter": func_config['filter'], | |
| 67 | + "weight": func_config.get('weight', 1.0) | |
| 68 | + }) | |
| 69 | + | |
| 70 | + elif func_type == 'field_value_factor': | |
| 71 | + # Field Value Factor | |
| 72 | + functions.append({ | |
| 73 | + "field_value_factor": { | |
| 74 | + "field": func_config['field'], | |
| 75 | + "factor": func_config.get('factor', 1.0), | |
| 76 | + "modifier": func_config.get('modifier', 'none'), | |
| 77 | + "missing": func_config.get('missing', 1.0) | |
| 78 | + } | |
| 79 | + }) | |
| 80 | + | |
| 81 | + elif func_type == 'decay': | |
| 82 | + # Decay Function | |
| 83 | + decay_func = func_config.get('function', 'gauss') | |
| 84 | + field = func_config['field'] | |
| 85 | + | |
| 86 | + decay_params = { | |
| 87 | + "origin": func_config.get('origin', 'now'), | |
| 88 | + "scale": func_config['scale'] | |
| 89 | + } | |
| 90 | + | |
| 91 | + if 'offset' in func_config: | |
| 92 | + decay_params['offset'] = func_config['offset'] | |
| 93 | + if 'decay' in func_config: | |
| 94 | + decay_params['decay'] = func_config['decay'] | |
| 95 | + | |
| 96 | + functions.append({ | |
| 97 | + decay_func: { | |
| 98 | + field: decay_params | |
| 99 | + } | |
| 100 | + }) | |
| 101 | + | |
| 102 | + return functions | |
| 103 | +``` | |
| 104 | + | |
| 105 | +### 3. 配置文件示例 | |
| 106 | + | |
| 107 | +**文件**: `/home/tw/SearchEngine/config/schema/customer1/config.yaml` | |
| 108 | + | |
| 109 | +添加完整的`function_score`配置: | |
| 110 | + | |
| 111 | +```yaml | |
| 112 | +# Function Score配置(ES层打分规则) | |
| 113 | +# 约定:function_score是查询结构的必需部分 | |
| 114 | +function_score: | |
| 115 | + score_mode: "sum" # multiply, sum, avg, first, max, min | |
| 116 | + boost_mode: "multiply" # multiply, replace, sum, avg, max, min | |
| 117 | + | |
| 118 | + functions: | |
| 119 | + # 1. Filter + Weight(条件权重) | |
| 120 | + - type: "filter_weight" | |
| 121 | + name: "7天新品提权" | |
| 122 | + filter: | |
| 123 | + range: | |
| 124 | + days_since_last_update: | |
| 125 | + lte: 7 | |
| 126 | + weight: 1.3 | |
| 127 | + | |
| 128 | + - type: "filter_weight" | |
| 129 | + name: "30天新品提权" | |
| 130 | + filter: | |
| 131 | + range: | |
| 132 | + days_since_last_update: | |
| 133 | + lte: 30 | |
| 134 | + weight: 1.15 | |
| 135 | + | |
| 136 | + - type: "filter_weight" | |
| 137 | + name: "有视频提权" | |
| 138 | + filter: | |
| 139 | + term: | |
| 140 | + is_video: true | |
| 141 | + weight: 1.05 | |
| 142 | + | |
| 143 | + # 2. Field Value Factor 示例(注释) | |
| 144 | + # - type: "field_value_factor" | |
| 145 | + # name: "销量因子" | |
| 146 | + # field: "sales_count" | |
| 147 | + # factor: 0.01 | |
| 148 | + # modifier: "log1p" | |
| 149 | + # missing: 1.0 | |
| 150 | + | |
| 151 | + # 3. Decay Functions 示例(注释) | |
| 152 | + # - type: "decay" | |
| 153 | + # name: "时间衰减" | |
| 154 | + # function: "gauss" | |
| 155 | + # field: "create_time" | |
| 156 | + # origin: "now" | |
| 157 | + # scale: "30d" | |
| 158 | + # decay: 0.5 | |
| 159 | + | |
| 160 | +# Rerank配置(本地重排,当前禁用) | |
| 161 | +rerank: | |
| 162 | + enabled: false | |
| 163 | + expression: "" | |
| 164 | + description: "Local reranking (disabled, use ES function_score instead)" | |
| 165 | +``` | |
| 166 | + | |
| 167 | +--- | |
| 168 | + | |
| 169 | +## 支持的Function类型 | |
| 170 | + | |
| 171 | +基于ES官方文档:https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-function-score-query | |
| 172 | + | |
| 173 | +### 1. Filter + Weight(条件权重) | |
| 174 | + | |
| 175 | +**配置格式**: | |
| 176 | +```yaml | |
| 177 | +- type: "filter_weight" | |
| 178 | + name: "描述名称" | |
| 179 | + filter: | |
| 180 | + term: {field: value} # 或 terms, range, exists 等任何ES filter | |
| 181 | + weight: 1.2 | |
| 182 | +``` | |
| 183 | + | |
| 184 | +**ES输出**: | |
| 185 | +```json | |
| 186 | +{ | |
| 187 | + "filter": {"term": {"field": "value"}}, | |
| 188 | + "weight": 1.2 | |
| 189 | +} | |
| 190 | +``` | |
| 191 | + | |
| 192 | +**应用场景**: | |
| 193 | +- 新品提权(days_since_last_update <= 7) | |
| 194 | +- 有视频提权(is_video = true) | |
| 195 | +- 特定标签提权(label_id = 165) | |
| 196 | +- 主力价格段提权(50 <= price <= 200) | |
| 197 | + | |
| 198 | +### 2. Field Value Factor(字段值映射) | |
| 199 | + | |
| 200 | +**配置格式**: | |
| 201 | +```yaml | |
| 202 | +- type: "field_value_factor" | |
| 203 | + name: "销量因子" | |
| 204 | + field: "sales_count" | |
| 205 | + factor: 0.01 | |
| 206 | + modifier: "log1p" # none, log, log1p, log2p, ln, ln1p, ln2p, square, sqrt, reciprocal | |
| 207 | + missing: 1.0 | |
| 208 | +``` | |
| 209 | + | |
| 210 | +**ES输出**: | |
| 211 | +```json | |
| 212 | +{ | |
| 213 | + "field_value_factor": { | |
| 214 | + "field": "sales_count", | |
| 215 | + "factor": 0.01, | |
| 216 | + "modifier": "log1p", | |
| 217 | + "missing": 1.0 | |
| 218 | + } | |
| 219 | +} | |
| 220 | +``` | |
| 221 | + | |
| 222 | +**Modifier说明**(ES原生支持): | |
| 223 | +- `none` - 不修改,直接使用字段值 | |
| 224 | +- `log` - log(x) | |
| 225 | +- `log1p` - log(1+x)(推荐,避免log(0)) | |
| 226 | +- `log2p` - log(2+x) | |
| 227 | +- `ln` - ln(x) | |
| 228 | +- `ln1p` - ln(1+x)(推荐) | |
| 229 | +- `ln2p` - ln(2+x) | |
| 230 | +- `square` - x² | |
| 231 | +- `sqrt` - √x | |
| 232 | +- `reciprocal` - 1/x | |
| 233 | + | |
| 234 | +**应用场景**: | |
| 235 | +- 销量因子(sales_count) | |
| 236 | +- 评分因子(rating) | |
| 237 | +- 库存因子(stock_quantity) | |
| 238 | +- 在售天数(on_sell_days_boost) | |
| 239 | + | |
| 240 | +### 3. Decay Functions(衰减函数) | |
| 241 | + | |
| 242 | +**配置格式**: | |
| 243 | +```yaml | |
| 244 | +- type: "decay" | |
| 245 | + name: "时间衰减" | |
| 246 | + function: "gauss" # gauss, exp, linear | |
| 247 | + field: "create_time" | |
| 248 | + origin: "now" | |
| 249 | + scale: "30d" | |
| 250 | + offset: "0d" | |
| 251 | + decay: 0.5 | |
| 252 | +``` | |
| 253 | + | |
| 254 | +**ES输出**: | |
| 255 | +```json | |
| 256 | +{ | |
| 257 | + "gauss": { | |
| 258 | + "create_time": { | |
| 259 | + "origin": "now", | |
| 260 | + "scale": "30d", | |
| 261 | + "offset": "0d", | |
| 262 | + "decay": 0.5 | |
| 263 | + } | |
| 264 | + } | |
| 265 | +} | |
| 266 | +``` | |
| 267 | + | |
| 268 | +**衰减函数类型**: | |
| 269 | +- `gauss` - 高斯衰减(正态分布) | |
| 270 | +- `exp` - 指数衰减 | |
| 271 | +- `linear` - 线性衰减 | |
| 272 | + | |
| 273 | +**应用场景**: | |
| 274 | +- 时间衰减(create_time距离now越远分数越低) | |
| 275 | +- 价格衰减(价格距离理想值越远分数越低) | |
| 276 | +- 地理位置衰减(距离目标位置越远分数越低) | |
| 277 | + | |
| 278 | +--- | |
| 279 | + | |
| 280 | +## 测试验证 | |
| 281 | + | |
| 282 | +### ✅ Test 1: 配置加载验证 | |
| 283 | +```bash | |
| 284 | +curl -X POST /search/ -d '{"query": "玩具", "size": 3, "debug": true}' | |
| 285 | +``` | |
| 286 | + | |
| 287 | +**结果**: | |
| 288 | +- ✓ Score mode: sum | |
| 289 | +- ✓ Boost mode: multiply | |
| 290 | +- ✓ Functions: 3个(7天新品、30天新品、有视频) | |
| 291 | + | |
| 292 | +### ✅ Test 2: Filter+Weight生效验证 | |
| 293 | +查询ES返回的function_score结构: | |
| 294 | +```json | |
| 295 | +{ | |
| 296 | + "function_score": { | |
| 297 | + "functions": [ | |
| 298 | + {"filter": {"range": {"days_since_last_update": {"lte": 7}}}, "weight": 1.3}, | |
| 299 | + {"filter": {"range": {"days_since_last_update": {"lte": 30}}}, "weight": 1.15}, | |
| 300 | + {"filter": {"term": {"is_video": true}}, "weight": 1.05} | |
| 301 | + ], | |
| 302 | + "score_mode": "sum", | |
| 303 | + "boost_mode": "multiply" | |
| 304 | + } | |
| 305 | +} | |
| 306 | +``` | |
| 307 | + | |
| 308 | +### ✅ Test 3: 查询结构验证 | |
| 309 | +完整的ES查询结构: | |
| 310 | +``` | |
| 311 | +function_score { | |
| 312 | + query: bool { | |
| 313 | + must: [bool { | |
| 314 | + should: [multi_match, knn], | |
| 315 | + minimum_should_match: 1 | |
| 316 | + }], | |
| 317 | + filter: [...] | |
| 318 | + }, | |
| 319 | + functions: [...], | |
| 320 | + score_mode: sum, | |
| 321 | + boost_mode: multiply | |
| 322 | +} | |
| 323 | +``` | |
| 324 | + | |
| 325 | +--- | |
| 326 | + | |
| 327 | +## 配置示例库 | |
| 328 | + | |
| 329 | +### 示例1:简单配置(新品提权) | |
| 330 | + | |
| 331 | +```yaml | |
| 332 | +function_score: | |
| 333 | + functions: | |
| 334 | + - type: "filter_weight" | |
| 335 | + name: "新品提权" | |
| 336 | + filter: {range: {days_since_last_update: {lte: 30}}} | |
| 337 | + weight: 1.2 | |
| 338 | +``` | |
| 339 | + | |
| 340 | +### 示例2:标签提权 | |
| 341 | + | |
| 342 | +```yaml | |
| 343 | +function_score: | |
| 344 | + functions: | |
| 345 | + - type: "filter_weight" | |
| 346 | + name: "特定标签提权" | |
| 347 | + filter: | |
| 348 | + term: | |
| 349 | + labelId_by_skuId_essa_3: 165 | |
| 350 | + weight: 1.1 | |
| 351 | +``` | |
| 352 | + | |
| 353 | +### 示例3:销量因子 | |
| 354 | + | |
| 355 | +```yaml | |
| 356 | +function_score: | |
| 357 | + functions: | |
| 358 | + - type: "field_value_factor" | |
| 359 | + name: "销量因子" | |
| 360 | + field: "sales_count" | |
| 361 | + factor: 0.01 | |
| 362 | + modifier: "log1p" # 对数映射 | |
| 363 | + missing: 1.0 | |
| 364 | +``` | |
| 365 | + | |
| 366 | +### 示例4:在售天数 | |
| 367 | + | |
| 368 | +```yaml | |
| 369 | +function_score: | |
| 370 | + functions: | |
| 371 | + - type: "field_value_factor" | |
| 372 | + name: "在售天数因子" | |
| 373 | + field: "on_sell_days_boost" | |
| 374 | + factor: 1.0 | |
| 375 | + modifier: "none" | |
| 376 | + missing: 1.0 | |
| 377 | +``` | |
| 378 | + | |
| 379 | +### 示例5:时间衰减 | |
| 380 | + | |
| 381 | +```yaml | |
| 382 | +function_score: | |
| 383 | + functions: | |
| 384 | + - type: "decay" | |
| 385 | + name: "时间衰减" | |
| 386 | + function: "gauss" | |
| 387 | + field: "create_time" | |
| 388 | + origin: "now" | |
| 389 | + scale: "30d" | |
| 390 | + offset: "0d" | |
| 391 | + decay: 0.5 | |
| 392 | +``` | |
| 393 | + | |
| 394 | +### 示例6:组合使用 | |
| 395 | + | |
| 396 | +```yaml | |
| 397 | +function_score: | |
| 398 | + score_mode: "sum" | |
| 399 | + boost_mode: "multiply" | |
| 400 | + | |
| 401 | + functions: | |
| 402 | + # 新品提权 | |
| 403 | + - type: "filter_weight" | |
| 404 | + name: "7天新品" | |
| 405 | + filter: {range: {days_since_last_update: {lte: 7}}} | |
| 406 | + weight: 1.3 | |
| 407 | + | |
| 408 | + # 有视频提权 | |
| 409 | + - type: "filter_weight" | |
| 410 | + name: "有视频" | |
| 411 | + filter: {term: {is_video: true}} | |
| 412 | + weight: 1.05 | |
| 413 | + | |
| 414 | + # 销量因子 | |
| 415 | + - type: "field_value_factor" | |
| 416 | + name: "销量" | |
| 417 | + field: "sales_count" | |
| 418 | + factor: 0.01 | |
| 419 | + modifier: "log1p" | |
| 420 | + missing: 1.0 | |
| 421 | + | |
| 422 | + # 时间衰减 | |
| 423 | + - type: "decay" | |
| 424 | + name: "时间衰减" | |
| 425 | + function: "gauss" | |
| 426 | + field: "create_time" | |
| 427 | + origin: "now" | |
| 428 | + scale: "30d" | |
| 429 | + decay: 0.5 | |
| 430 | +``` | |
| 431 | + | |
| 432 | +--- | |
| 433 | + | |
| 434 | +## 优势 | |
| 435 | + | |
| 436 | +### 1. 基于ES原生能力 | |
| 437 | +- ✅ 所有配置都是ES直接支持的 | |
| 438 | +- ✅ 性能最优(ES层计算,无需应用层处理) | |
| 439 | +- ✅ 功能完整(filter_weight, field_value_factor, decay) | |
| 440 | + | |
| 441 | +### 2. 配置灵活 | |
| 442 | +- ✅ YAML格式,易于理解和修改 | |
| 443 | +- ✅ 每个function有name和description | |
| 444 | +- ✅ 支持注释示例,方便客户参考 | |
| 445 | + | |
| 446 | +### 3. 无需改代码 | |
| 447 | +- ✅ 客户自己调整配置即可 | |
| 448 | +- ✅ 修改配置后重启服务生效 | |
| 449 | +- ✅ 不同客户可以有完全不同的打分规则 | |
| 450 | + | |
| 451 | +### 4. 类型安全 | |
| 452 | +- ✅ Pydantic验证配置正确性 | |
| 453 | +- ✅ 配置加载时就能发现错误 | |
| 454 | +- ✅ IDE支持完整 | |
| 455 | + | |
| 456 | +### 5. 架构简洁 | |
| 457 | +- ✅ 约定:function_score必需,不需要enabled开关 | |
| 458 | +- ✅ 统一:配置直接映射到ES DSL | |
| 459 | +- ✅ 清晰:一个配置项对应一个ES function | |
| 460 | + | |
| 461 | +--- | |
| 462 | + | |
| 463 | +## 参考文档 | |
| 464 | + | |
| 465 | +### ES官方文档 | |
| 466 | +https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-function-score-query | |
| 467 | + | |
| 468 | +### 支持的score_mode | |
| 469 | +- `multiply` - 相乘所有function分数 | |
| 470 | +- `sum` - 相加所有function分数 | |
| 471 | +- `avg` - 平均所有function分数 | |
| 472 | +- `first` - 使用第一个匹配的function分数 | |
| 473 | +- `max` - 使用最大的function分数 | |
| 474 | +- `min` - 使用最小的function分数 | |
| 475 | + | |
| 476 | +### 支持的boost_mode | |
| 477 | +- `multiply` - 查询分数 × function分数 | |
| 478 | +- `replace` - 只使用function分数 | |
| 479 | +- `sum` - 查询分数 + function分数 | |
| 480 | +- `avg` - 平均值 | |
| 481 | +- `max` - 最大值 | |
| 482 | +- `min` - 最小值 | |
| 483 | + | |
| 484 | +--- | |
| 485 | + | |
| 486 | +## 客户使用指南 | |
| 487 | + | |
| 488 | +### 快速开始 | |
| 489 | + | |
| 490 | +1. **编辑配置文件** | |
| 491 | +```bash | |
| 492 | +vi config/schema/customer1/config.yaml | |
| 493 | +``` | |
| 494 | + | |
| 495 | +2. **添加打分规则** | |
| 496 | +```yaml | |
| 497 | +function_score: | |
| 498 | + functions: | |
| 499 | + - type: "filter_weight" | |
| 500 | + name: "新品提权" | |
| 501 | + filter: {range: {days_since_last_update: {lte: 30}}} | |
| 502 | + weight: 1.2 | |
| 503 | +``` | |
| 504 | + | |
| 505 | +3. **重启服务** | |
| 506 | +```bash | |
| 507 | +./restart.sh | |
| 508 | +``` | |
| 509 | + | |
| 510 | +4. **验证生效** | |
| 511 | +```bash | |
| 512 | +curl -X POST http://localhost:6002/search/ \ | |
| 513 | + -d '{"query": "玩具", "debug": true}' \ | |
| 514 | + | grep -A20 function_score | |
| 515 | +``` | |
| 516 | + | |
| 517 | +### 调优建议 | |
| 518 | + | |
| 519 | +1. **Weight值范围** | |
| 520 | + - 建议:1.05 ~ 1.5 | |
| 521 | + - 过大会导致某些商品分数过高 | |
| 522 | + - 过小效果不明显 | |
| 523 | + | |
| 524 | +2. **Field Value Factor** | |
| 525 | + - 使用`log1p`或`sqrt`避免极端值 | |
| 526 | + - factor值需要根据字段范围调整 | |
| 527 | + - missing值建议设为1.0(中性) | |
| 528 | + | |
| 529 | +3. **Decay函数** | |
| 530 | + - scale控制衰减速度 | |
| 531 | + - decay控制衰减程度(0.5表示在scale距离处分数降为0.5) | |
| 532 | + - offset可以设置缓冲区 | |
| 533 | + | |
| 534 | +### 常见场景配置 | |
| 535 | + | |
| 536 | +**场景1:促销商品优先** | |
| 537 | +```yaml | |
| 538 | +- type: "filter_weight" | |
| 539 | + filter: {term: {is_promotion: true}} | |
| 540 | + weight: 1.3 | |
| 541 | +``` | |
| 542 | + | |
| 543 | +**场景2:库存充足优先** | |
| 544 | +```yaml | |
| 545 | +- type: "field_value_factor" | |
| 546 | + field: "stock_quantity" | |
| 547 | + factor: 0.01 | |
| 548 | + modifier: "sqrt" | |
| 549 | + missing: 0.5 | |
| 550 | +``` | |
| 551 | + | |
| 552 | +**场景3:高评分优先** | |
| 553 | +```yaml | |
| 554 | +- type: "field_value_factor" | |
| 555 | + field: "rating" | |
| 556 | + factor: 0.5 | |
| 557 | + modifier: "none" | |
| 558 | + missing: 1.0 | |
| 559 | +``` | |
| 560 | + | |
| 561 | +--- | |
| 562 | + | |
| 563 | +## 总结 | |
| 564 | + | |
| 565 | +### ✅ 已完成 | |
| 566 | + | |
| 567 | +- ✅ 配置模型定义 | |
| 568 | +- ✅ 配置加载器更新 | |
| 569 | +- ✅ 查询构建器支持配置化 | |
| 570 | +- ✅ 示例配置文件 | |
| 571 | +- ✅ 测试验证通过 | |
| 572 | + | |
| 573 | +### 🎯 核心价值 | |
| 574 | + | |
| 575 | +**"配置化、基于ES原生能力、简洁明了"** | |
| 576 | + | |
| 577 | +- 客户可自由调整打分规则 | |
| 578 | +- 无需修改代码 | |
| 579 | +- 所有功能都是ES原生支持 | |
| 580 | +- 性能最优 | |
| 581 | + | |
| 582 | +--- | |
| 583 | + | |
| 584 | +**版本**: v3.4 | |
| 585 | +**状态**: ✅ 完成并通过测试 | |
| 586 | +**参考**: ES官方文档 + 电商SAAS最佳实践 | |
| 587 | + | ... | ... |
config/__init__.py
| ... | ... | @@ -19,6 +19,8 @@ from .config_loader import ( |
| 19 | 19 | RankingConfig, |
| 20 | 20 | QueryConfig, |
| 21 | 21 | SPUConfig, |
| 22 | + FunctionScoreConfig, | |
| 23 | + RerankConfig, | |
| 22 | 24 | ConfigurationError |
| 23 | 25 | ) |
| 24 | 26 | |
| ... | ... | @@ -41,5 +43,7 @@ __all__ = [ |
| 41 | 43 | 'RankingConfig', |
| 42 | 44 | 'QueryConfig', |
| 43 | 45 | 'SPUConfig', |
| 46 | + 'FunctionScoreConfig', | |
| 47 | + 'RerankConfig', | |
| 44 | 48 | 'ConfigurationError', |
| 45 | 49 | ] | ... | ... |
config/config_loader.py
| ... | ... | @@ -62,6 +62,22 @@ class SPUConfig: |
| 62 | 62 | |
| 63 | 63 | |
| 64 | 64 | @dataclass |
| 65 | +class FunctionScoreConfig: | |
| 66 | + """Function Score配置(ES层打分规则)""" | |
| 67 | + score_mode: str = "sum" # multiply, sum, avg, first, max, min | |
| 68 | + boost_mode: str = "multiply" # multiply, replace, sum, avg, max, min | |
| 69 | + functions: List[Dict[str, Any]] = field(default_factory=list) | |
| 70 | + | |
| 71 | + | |
| 72 | +@dataclass | |
| 73 | +class RerankConfig: | |
| 74 | + """本地重排配置(当前禁用)""" | |
| 75 | + enabled: bool = False | |
| 76 | + expression: str = "" | |
| 77 | + description: str = "" | |
| 78 | + | |
| 79 | + | |
| 80 | +@dataclass | |
| 65 | 81 | class CustomerConfig: |
| 66 | 82 | """Complete configuration for a customer.""" |
| 67 | 83 | customer_id: str |
| ... | ... | @@ -82,6 +98,12 @@ class CustomerConfig: |
| 82 | 98 | # Ranking configuration |
| 83 | 99 | ranking: RankingConfig |
| 84 | 100 | |
| 101 | + # Function Score configuration (ES层打分) | |
| 102 | + function_score: FunctionScoreConfig | |
| 103 | + | |
| 104 | + # Rerank configuration (本地重排) | |
| 105 | + rerank: RerankConfig | |
| 106 | + | |
| 85 | 107 | # SPU configuration |
| 86 | 108 | spu_config: SPUConfig |
| 87 | 109 | |
| ... | ... | @@ -216,6 +238,22 @@ class ConfigLoader: |
| 216 | 238 | description=ranking_data.get("description", "Default BM25 + text embedding ranking") |
| 217 | 239 | ) |
| 218 | 240 | |
| 241 | + # Parse Function Score configuration | |
| 242 | + fs_data = config_data.get("function_score", {}) | |
| 243 | + function_score = FunctionScoreConfig( | |
| 244 | + score_mode=fs_data.get("score_mode", "sum"), | |
| 245 | + boost_mode=fs_data.get("boost_mode", "multiply"), | |
| 246 | + functions=fs_data.get("functions", []) | |
| 247 | + ) | |
| 248 | + | |
| 249 | + # Parse Rerank configuration | |
| 250 | + rerank_data = config_data.get("rerank", {}) | |
| 251 | + rerank = RerankConfig( | |
| 252 | + enabled=rerank_data.get("enabled", False), | |
| 253 | + expression=rerank_data.get("expression", ""), | |
| 254 | + description=rerank_data.get("description", "") | |
| 255 | + ) | |
| 256 | + | |
| 219 | 257 | # Parse SPU config |
| 220 | 258 | spu_data = config_data.get("spu_config", {}) |
| 221 | 259 | spu_config = SPUConfig( |
| ... | ... | @@ -234,6 +272,8 @@ class ConfigLoader: |
| 234 | 272 | indexes=indexes, |
| 235 | 273 | query_config=query_config, |
| 236 | 274 | ranking=ranking, |
| 275 | + function_score=function_score, | |
| 276 | + rerank=rerank, | |
| 237 | 277 | spu_config=spu_config, |
| 238 | 278 | es_index_name=config_data.get("es_index_name", f"search_{customer_id}"), |
| 239 | 279 | es_settings=config_data.get("es_settings", {}) | ... | ... |
config/schema/customer1/config.yaml
| ... | ... | @@ -250,11 +250,111 @@ query_config: |
| 250 | 250 | translation_service: "deepl" |
| 251 | 251 | translation_api_key: null # Set via environment variable |
| 252 | 252 | |
| 253 | -# Ranking Configuration | |
| 253 | +# Ranking Configuration(已弃用,保留用于文档说明) | |
| 254 | 254 | ranking: |
| 255 | 255 | expression: "bm25() + 0.2*text_embedding_relevance()" |
| 256 | 256 | description: "BM25 text relevance combined with semantic embedding similarity" |
| 257 | 257 | |
| 258 | +# Function Score配置(ES层打分规则) | |
| 259 | +# 约定:function_score是查询结构的必需部分 | |
| 260 | +function_score: | |
| 261 | + score_mode: "sum" # multiply, sum, avg, first, max, min | |
| 262 | + boost_mode: "multiply" # multiply, replace, sum, avg, max, min | |
| 263 | + | |
| 264 | + functions: | |
| 265 | + # 1. Filter + Weight(条件权重)- 根据条件匹配提权 | |
| 266 | + - type: "filter_weight" | |
| 267 | + name: "7天新品提权" | |
| 268 | + filter: | |
| 269 | + range: | |
| 270 | + days_since_last_update: | |
| 271 | + lte: 7 | |
| 272 | + weight: 1.3 | |
| 273 | + | |
| 274 | + - type: "filter_weight" | |
| 275 | + name: "30天新品提权" | |
| 276 | + filter: | |
| 277 | + range: | |
| 278 | + days_since_last_update: | |
| 279 | + lte: 30 | |
| 280 | + weight: 1.15 | |
| 281 | + | |
| 282 | + - type: "filter_weight" | |
| 283 | + name: "有视频提权" | |
| 284 | + filter: | |
| 285 | + term: | |
| 286 | + is_video: true | |
| 287 | + weight: 1.05 | |
| 288 | + | |
| 289 | + # 示例:特定标签提权 | |
| 290 | + # - type: "filter_weight" | |
| 291 | + # name: "特定标签提权" | |
| 292 | + # filter: | |
| 293 | + # term: | |
| 294 | + # labelId_by_skuId_essa_3: 165 | |
| 295 | + # weight: 1.1 | |
| 296 | + | |
| 297 | + # 示例:主力价格段提权 | |
| 298 | + # - type: "filter_weight" | |
| 299 | + # name: "主力价格段" | |
| 300 | + # filter: | |
| 301 | + # range: | |
| 302 | + # price: | |
| 303 | + # gte: 50 | |
| 304 | + # lte: 200 | |
| 305 | + # weight: 1.1 | |
| 306 | + | |
| 307 | + # 2. Field Value Factor(字段值映射)- 将数值字段映射为打分因子 | |
| 308 | + # 示例:在售天数 | |
| 309 | + # - type: "field_value_factor" | |
| 310 | + # name: "在售天数因子" | |
| 311 | + # field: "on_sell_days_boost" | |
| 312 | + # factor: 1.0 | |
| 313 | + # modifier: "none" # none, log, log1p, log2p, ln, ln1p, ln2p, square, sqrt, reciprocal | |
| 314 | + # missing: 1.0 | |
| 315 | + | |
| 316 | + # 示例:销量因子 | |
| 317 | + # - type: "field_value_factor" | |
| 318 | + # name: "销量因子" | |
| 319 | + # field: "sales_count" | |
| 320 | + # factor: 0.01 | |
| 321 | + # modifier: "log1p" # 对数映射,避免极端值影响 | |
| 322 | + # missing: 1.0 | |
| 323 | + | |
| 324 | + # 示例:评分因子 | |
| 325 | + # - type: "field_value_factor" | |
| 326 | + # name: "评分因子" | |
| 327 | + # field: "rating" | |
| 328 | + # factor: 0.5 | |
| 329 | + # modifier: "sqrt" | |
| 330 | + # missing: 1.0 | |
| 331 | + | |
| 332 | + # 3. Decay Functions(衰减函数)- 距离原点越远分数越低 | |
| 333 | + # 示例:时间衰减 | |
| 334 | + # - type: "decay" | |
| 335 | + # name: "时间衰减" | |
| 336 | + # function: "gauss" # gauss, exp, linear | |
| 337 | + # field: "create_time" | |
| 338 | + # origin: "now" | |
| 339 | + # scale: "30d" | |
| 340 | + # offset: "0d" | |
| 341 | + # decay: 0.5 | |
| 342 | + | |
| 343 | + # 示例:价格衰减 | |
| 344 | + # - type: "decay" | |
| 345 | + # name: "价格衰减" | |
| 346 | + # function: "linear" | |
| 347 | + # field: "price" | |
| 348 | + # origin: "100" | |
| 349 | + # scale: "50" | |
| 350 | + # decay: 0.5 | |
| 351 | + | |
| 352 | +# Rerank配置(本地重排,当前禁用) | |
| 353 | +rerank: | |
| 354 | + enabled: false | |
| 355 | + expression: "" | |
| 356 | + description: "Local reranking (disabled, use ES function_score instead)" | |
| 357 | + | |
| 258 | 358 | # SPU Aggregation (disabled for customer1) |
| 259 | 359 | spu_config: |
| 260 | 360 | enabled: false | ... | ... |
search/multilang_query_builder.py
| ... | ... | @@ -41,6 +41,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 41 | 41 | image_embedding_field: Field name for image embeddings |
| 42 | 42 | """ |
| 43 | 43 | self.config = config |
| 44 | + self.function_score_config = config.function_score | |
| 44 | 45 | |
| 45 | 46 | # For default domain, use all fields as fallback |
| 46 | 47 | default_fields = self._get_domain_fields("default") |
| ... | ... | @@ -188,13 +189,13 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 188 | 189 | if filter_clauses: |
| 189 | 190 | outer_bool["bool"]["filter"] = filter_clauses |
| 190 | 191 | |
| 191 | - # 包裹function_score | |
| 192 | + # 包裹function_score(从配置读取score_mode和boost_mode) | |
| 192 | 193 | function_score_query = { |
| 193 | 194 | "function_score": { |
| 194 | 195 | "query": outer_bool, |
| 195 | 196 | "functions": self._build_score_functions(), |
| 196 | - "score_mode": "sum", | |
| 197 | - "boost_mode": "multiply" | |
| 197 | + "score_mode": self.function_score_config.score_mode if self.function_score_config else "sum", | |
| 198 | + "boost_mode": self.function_score_config.boost_mode if self.function_score_config else "multiply" | |
| 198 | 199 | } |
| 199 | 200 | } |
| 200 | 201 | |
| ... | ... | @@ -211,28 +212,57 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 211 | 212 | |
| 212 | 213 | def _build_score_functions(self) -> List[Dict[str, Any]]: |
| 213 | 214 | """ |
| 214 | - 构建 function_score 的打分函数列表 | |
| 215 | + 从配置构建 function_score 的打分函数列表 | |
| 215 | 216 | |
| 216 | 217 | Returns: |
| 217 | - 打分函数列表 | |
| 218 | + 打分函数列表(ES原生格式) | |
| 218 | 219 | """ |
| 220 | + if not self.function_score_config or not self.function_score_config.functions: | |
| 221 | + return [] | |
| 222 | + | |
| 219 | 223 | functions = [] |
| 220 | 224 | |
| 221 | - # 时效性加权:最近更新的商品得分更高 | |
| 222 | - functions.append({ | |
| 223 | - "filter": { | |
| 224 | - "range": { | |
| 225 | - "days_since_last_update": {"lte": 30} | |
| 225 | + for func_config in self.function_score_config.functions: | |
| 226 | + func_type = func_config.get('type') | |
| 227 | + | |
| 228 | + if func_type == 'filter_weight': | |
| 229 | + # Filter + Weight | |
| 230 | + functions.append({ | |
| 231 | + "filter": func_config['filter'], | |
| 232 | + "weight": func_config.get('weight', 1.0) | |
| 233 | + }) | |
| 234 | + | |
| 235 | + elif func_type == 'field_value_factor': | |
| 236 | + # Field Value Factor | |
| 237 | + functions.append({ | |
| 238 | + "field_value_factor": { | |
| 239 | + "field": func_config['field'], | |
| 240 | + "factor": func_config.get('factor', 1.0), | |
| 241 | + "modifier": func_config.get('modifier', 'none'), | |
| 242 | + "missing": func_config.get('missing', 1.0) | |
| 243 | + } | |
| 244 | + }) | |
| 245 | + | |
| 246 | + elif func_type == 'decay': | |
| 247 | + # Decay Function (gauss/exp/linear) | |
| 248 | + decay_func = func_config.get('function', 'gauss') | |
| 249 | + field = func_config['field'] | |
| 250 | + | |
| 251 | + decay_params = { | |
| 252 | + "origin": func_config.get('origin', 'now'), | |
| 253 | + "scale": func_config['scale'] | |
| 226 | 254 | } |
| 227 | - }, | |
| 228 | - "weight": 1.1 | |
| 229 | - }) | |
| 230 | - | |
| 231 | - # 可以添加更多打分因子 | |
| 232 | - # functions.append({ | |
| 233 | - # "filter": {"term": {"is_video": True}}, | |
| 234 | - # "weight": 1.05 | |
| 235 | - # }) | |
| 255 | + | |
| 256 | + if 'offset' in func_config: | |
| 257 | + decay_params['offset'] = func_config['offset'] | |
| 258 | + if 'decay' in func_config: | |
| 259 | + decay_params['decay'] = func_config['decay'] | |
| 260 | + | |
| 261 | + functions.append({ | |
| 262 | + decay_func: { | |
| 263 | + field: decay_params | |
| 264 | + } | |
| 265 | + }) | |
| 236 | 266 | |
| 237 | 267 | return functions |
| 238 | 268 | ... | ... |