<!-- 25a9f060-257b-486f-b598-bbb062d1adf9 af9cc72d-91a6-4c51-af4f-90ed22b18af7 -->
Function Score配置化实施方案
ES Function Score能力清单(基于官方文档)
参考:https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-function-score-query
支持的Function类型
- Weight - 固定权重(可带filter条件)
Field Value Factor - 字段值映射
- modifier支持:`none`, `log`, `log1p`, `log2p`, `ln`, `ln1p`, `ln2p`, `square`, `sqrt`, `reciprocal`Decay Functions - 衰减函数
- 类型:`gauss`, `exp`, `linear` - 适用字段:numeric, date, geopointRandom Score - 随机分数
Script Score - 脚本打分
boost_mode选项
multiply, replace, sum, avg, max, min
score_mode选项
multiply, sum, avg, first, max, min
配置设计(简化版)
/home/tw/SearchEngine/config/schema/customer1/config.yaml
# Function Score配置(ES层打分规则)
# 约定:function_score是必需的,不需要enabled开关
function_score:
score_mode: "sum" # multiply, sum, avg, first, max, min
boost_mode: "multiply" # multiply, replace, sum, avg, max, min
functions:
# 1. Filter + Weight(条件权重)
- type: "filter_weight"
name: "7天新品提权"
filter:
range:
days_since_last_update:
lte: 7
weight: 1.3
- type: "filter_weight"
name: "30天新品提权"
filter:
range:
days_since_last_update:
lte: 30
weight: 1.15
- type: "filter_weight"
name: "有视频提权"
filter:
term:
is_video: true
weight: 1.05
- type: "filter_weight"
name: "特定标签提权"
filter:
term:
labelId_by_skuId_essa_3: 165
weight: 1.1
- type: "filter_weight"
name: "主力价格段"
filter:
range:
price:
gte: 50
lte: 200
weight: 1.1
# 2. Field Value Factor(字段值映射)
- type: "field_value_factor"
name: "在售天数因子"
field: "on_sell_days_boost"
factor: 1.0
modifier: "none" # none, log, log1p, log2p, ln, ln1p, ln2p, square, sqrt, reciprocal
missing: 1.0
- type: "field_value_factor"
name: "销量因子"
field: "sales_count"
factor: 0.01
modifier: "log1p" # 对数映射,避免极端值
missing: 1.0
- type: "field_value_factor"
name: "评分因子"
field: "rating"
factor: 0.5
modifier: "sqrt"
missing: 1.0
# 3. Decay Functions(衰减函数)
- type: "decay"
name: "时间衰减"
function: "gauss" # gauss, exp, linear
field: "create_time"
origin: "now"
scale: "30d"
offset: "0d"
decay: 0.5
- type: "decay"
name: "价格衰减"
function: "linear"
field: "price"
origin: "100"
scale: "50"
decay: 0.5
# Rerank配置(本地重排,当前禁用)
rerank:
enabled: false
expression: "bm25() + 0.2*text_embedding_relevance()"
description: "Local reranking (disabled, use ES function_score instead)"
实施步骤
1. 定义配置模型
文件: /home/tw/SearchEngine/config/models.py(新建或更新customer_config.py)
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Literal
@dataclass
class FilterWeightFunction:
"""Filter + Weight 打分函数"""
type: Literal["filter_weight"] = "filter_weight"
name: str = ""
filter: Dict[str, Any] = field(default_factory=dict)
weight: float = 1.0
@dataclass
class FieldValueFactorFunction:
"""Field Value Factor 打分函数"""
type: Literal["field_value_factor"] = "field_value_factor"
name: str = ""
field: str = ""
factor: float = 1.0
modifier: Literal["none", "log", "log1p", "log2p", "ln", "ln1p", "ln2p", "square", "sqrt", "reciprocal"] = "none"
missing: float = 1.0
@dataclass
class DecayFunction:
"""Decay 衰减函数"""
type: Literal["decay"] = "decay"
name: str = ""
function: Literal["gauss", "exp", "linear"] = "gauss"
field: str = ""
origin: str = "" # 支持数值、日期、坐标
scale: str = ""
offset: str = "0"
decay: float = 0.5
@dataclass
class FunctionScoreConfig:
"""Function Score配置"""
enabled: bool = True
score_mode: Literal["multiply", "sum", "avg", "first", "max", "min"] = "sum"
boost_mode: Literal["multiply", "replace", "sum", "avg", "max", "min"] = "multiply"
max_boost: Optional[float] = None
functions: List[Dict[str, Any]] = field(default_factory=list)
@dataclass
class RerankConfig:
"""本地重排配置"""
enabled: bool = False
expression: str = ""
description: str = ""
2. 修改 MultiLanguageQueryBuilder
文件: /home/tw/SearchEngine/search/multilang_query_builder.py
修改 init 方法:
def __init__(self, config, index_name, text_embedding_field=None, image_embedding_field=None):
super().__init__(config, index_name, text_embedding_field, image_embedding_field)
self.function_score_config = getattr(config, 'function_score', None)
完全重写 _build_score_functions 方法(212-237行):
def _build_score_functions(self) -> List[Dict[str, Any]]:
"""
从配置构建 function_score 的打分函数列表
Returns:
打分函数列表(ES原生格式)
"""
if not self.function_score_config or not self.function_score_config.enabled:
return []
functions = []
for func_config in self.function_score_config.functions:
func_type = func_config.get('type')
if func_type == 'filter_weight':
# Filter + Weight
functions.append({
"filter": func_config['filter'],
"weight": func_config.get('weight', 1.0)
})
elif func_type == 'field_value_factor':
# Field Value Factor
functions.append({
"field_value_factor": {
"field": func_config['field'],
"factor": func_config.get('factor', 1.0),
"modifier": func_config.get('modifier', 'none'),
"missing": func_config.get('missing', 1.0)
}
})
elif func_type == 'decay':
# Decay Function (gauss/exp/linear)
decay_func = func_config.get('function', 'gauss')
field = func_config['field']
decay_params = {
"origin": func_config.get('origin', 'now'),
"scale": func_config['scale']
}
if 'offset' in func_config:
decay_params['offset'] = func_config['offset']
if 'decay' in func_config:
decay_params['decay'] = func_config['decay']
functions.append({
decay_func: {
field: decay_params
}
})
return functions
修改 build_multilang_query 方法(使用配置的score_mode和boost_mode):
# 包裹function_score
fs_config = self.function_score_config
function_score_query = {
"function_score": {
"query": outer_bool,
"functions": self._build_score_functions(),
"score_mode": fs_config.score_mode if fs_config else "sum",
"boost_mode": fs_config.boost_mode if fs_config else "multiply"
}
}
if fs_config and fs_config.max_boost:
function_score_query["function_score"]["max_boost"] = fs_config.max_boost
3. 更新配置加载器
文件: /home/tw/SearchEngine/config/__init__.py 或 config/loader.py
确保正确加载 function_score 和 rerank 配置段
4. 更新示例配置
文件: /home/tw/SearchEngine/config/schema/customer1/config.yaml
在 ranking 配置后添加新配置(参见上面完整YAML)
5. 测试验证
测试用例:
测试filter_weight
curl -X POST /search/ -d '{"query": "玩具", "debug": true}' # 检查 functions 中是否包含 filter+weight # 验证 days_since_last_update <= 30 的商品分数更高测试field_value_factor
# 检查 on_sell_days_boost 字段是否影响打分 # 验证 modifier 是否生效测试decay函数
# 验证时间衰减是否生效 # 新商品应该得分更高测试多个functions组合
# 验证 score_mode 和 boost_mode 是否正确工作
配置示例(完整)
简单配置(适合快速上手)
function_score:
enabled: true
functions:
- type: "filter_weight"
name: "新品提权"
filter: {range: {days_since_last_update: {lte: 30}}}
weight: 1.2
- type: "filter_weight"
name: "有视频"
filter: {term: {is_video: true}}
weight: 1.05
高级配置(适合深度定制)
function_score:
enabled: true
score_mode: "sum"
boost_mode: "multiply"
max_boost: 5.0
functions:
# 条件权重
- type: "filter_weight"
name: "7天新品"
filter: {range: {days_since_last_update: {lte: 7}}}
weight: 1.3
# 字段值因子
- type: "field_value_factor"
name: "销量因子"
field: "sales_count"
factor: 0.01
modifier: "log1p"
missing: 1.0
# 衰减函数
- type: "decay"
name: "时间衰减"
function: "gauss"
field: "create_time"
origin: "now"
scale: "30d"
offset: "0d"
decay: 0.5
优势
- 基于ES原生能力 - 所有配置都是ES直接支持的
- 配置灵活 - YAML格式,易于理解和修改
- 无需改代码 - 客户自己调整配置即可
- 类型安全 - Pydantic验证配置正确性
- 文档完善 - 每个function有name和description