Commit 9df421edc66838b08a6693b98830c4c974583f14
1 parent
80f1e036
基于eval框架开始调参
Showing
10 changed files
with
543 additions
and
46 deletions
Show diff stats
config/config.yaml
| @@ -285,6 +285,8 @@ coarse_rank: | @@ -285,6 +285,8 @@ coarse_rank: | ||
| 285 | input_window: 700 | 285 | input_window: 700 |
| 286 | output_window: 240 | 286 | output_window: 240 |
| 287 | fusion: | 287 | fusion: |
| 288 | + es_bias: 0.1 | ||
| 289 | + es_exponent: 0.05 | ||
| 288 | text_bias: 0.1 | 290 | text_bias: 0.1 |
| 289 | text_exponent: 0.35 | 291 | text_exponent: 0.35 |
| 290 | # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) | 292 | # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) |
| @@ -294,7 +296,7 @@ coarse_rank: | @@ -294,7 +296,7 @@ coarse_rank: | ||
| 294 | knn_image_weight: 1.0 | 296 | knn_image_weight: 1.0 |
| 295 | knn_tie_breaker: 0.1 | 297 | knn_tie_breaker: 0.1 |
| 296 | knn_bias: 0.6 | 298 | knn_bias: 0.6 |
| 297 | - knn_exponent: 0.0 | 299 | + knn_exponent: 0.2 |
| 298 | 300 | ||
| 299 | # 精排配置(轻量 reranker) | 301 | # 精排配置(轻量 reranker) |
| 300 | fine_rank: | 302 | fine_rank: |
| @@ -317,11 +319,13 @@ rerank: | @@ -317,11 +319,13 @@ rerank: | ||
| 317 | rerank_doc_template: '{title}' | 319 | rerank_doc_template: '{title}' |
| 318 | service_profile: default | 320 | service_profile: default |
| 319 | 321 | ||
| 320 | - # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项) | 322 | + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) |
| 321 | # 其中 knn_score 先做一层 dis_max: | 323 | # 其中 knn_score 先做一层 dis_max: |
| 322 | # max(knn_text_weight * text_knn, knn_image_weight * image_knn) | 324 | # max(knn_text_weight * text_knn, knn_image_weight * image_knn) |
| 323 | # + knn_tie_breaker * 另一侧较弱信号 | 325 | # + knn_tie_breaker * 另一侧较弱信号 |
| 324 | fusion: | 326 | fusion: |
| 327 | + es_bias: 0.1 | ||
| 328 | + es_exponent: 0.05 | ||
| 325 | rerank_bias: 1.0e-05 | 329 | rerank_bias: 1.0e-05 |
| 326 | rerank_exponent: 1.15 | 330 | rerank_exponent: 1.15 |
| 327 | fine_bias: 1.0e-05 | 331 | fine_bias: 1.0e-05 |
| @@ -334,7 +338,7 @@ rerank: | @@ -334,7 +338,7 @@ rerank: | ||
| 334 | knn_image_weight: 1.0 | 338 | knn_image_weight: 1.0 |
| 335 | knn_tie_breaker: 0.1 | 339 | knn_tie_breaker: 0.1 |
| 336 | knn_bias: 0.6 | 340 | knn_bias: 0.6 |
| 337 | - knn_exponent: 0.0 | 341 | + knn_exponent: 0.2 |
| 338 | 342 | ||
| 339 | # 可扩展服务/provider 注册表(单一配置源) | 343 | # 可扩展服务/provider 注册表(单一配置源) |
| 340 | services: | 344 | services: |
config/loader.py
| @@ -578,6 +578,8 @@ class AppConfigLoader: | @@ -578,6 +578,8 @@ class AppConfigLoader: | ||
| 578 | input_window=int(coarse_rank_cfg.get("input_window", 700)), | 578 | input_window=int(coarse_rank_cfg.get("input_window", 700)), |
| 579 | output_window=int(coarse_rank_cfg.get("output_window", 240)), | 579 | output_window=int(coarse_rank_cfg.get("output_window", 240)), |
| 580 | fusion=CoarseRankFusionConfig( | 580 | fusion=CoarseRankFusionConfig( |
| 581 | + es_bias=float(coarse_fusion_raw.get("es_bias", 0.1)), | ||
| 582 | + es_exponent=float(coarse_fusion_raw.get("es_exponent", 0.0)), | ||
| 581 | text_bias=float(coarse_fusion_raw.get("text_bias", 0.1)), | 583 | text_bias=float(coarse_fusion_raw.get("text_bias", 0.1)), |
| 582 | text_exponent=float(coarse_fusion_raw.get("text_exponent", 0.35)), | 584 | text_exponent=float(coarse_fusion_raw.get("text_exponent", 0.35)), |
| 583 | knn_text_weight=float(coarse_fusion_raw.get("knn_text_weight", 1.0)), | 585 | knn_text_weight=float(coarse_fusion_raw.get("knn_text_weight", 1.0)), |
| @@ -617,6 +619,8 @@ class AppConfigLoader: | @@ -617,6 +619,8 @@ class AppConfigLoader: | ||
| 617 | else None | 619 | else None |
| 618 | ), | 620 | ), |
| 619 | fusion=RerankFusionConfig( | 621 | fusion=RerankFusionConfig( |
| 622 | + es_bias=float(fusion_raw.get("es_bias", 0.1)), | ||
| 623 | + es_exponent=float(fusion_raw.get("es_exponent", 0.0)), | ||
| 620 | rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)), | 624 | rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)), |
| 621 | rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)), | 625 | rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)), |
| 622 | text_bias=float(fusion_raw.get("text_bias", 0.1)), | 626 | text_bias=float(fusion_raw.get("text_bias", 0.1)), |
config/schema.py
| @@ -105,9 +105,11 @@ class FunctionScoreConfig: | @@ -105,9 +105,11 @@ class FunctionScoreConfig: | ||
| 105 | class RerankFusionConfig: | 105 | class RerankFusionConfig: |
| 106 | """ | 106 | """ |
| 107 | Multiplicative fusion: fused = Π (max(score_i, 0) + bias_i) ** exponent_i | 107 | Multiplicative fusion: fused = Π (max(score_i, 0) + bias_i) ** exponent_i |
| 108 | - for rerank / text / knn terms respectively. | 108 | + for es / rerank / fine / text / knn terms respectively. |
| 109 | """ | 109 | """ |
| 110 | 110 | ||
| 111 | + es_bias: float = 0.1 | ||
| 112 | + es_exponent: float = 0.0 | ||
| 111 | rerank_bias: float = 0.00001 | 113 | rerank_bias: float = 0.00001 |
| 112 | rerank_exponent: float = 1.0 | 114 | rerank_exponent: float = 1.0 |
| 113 | text_bias: float = 0.1 | 115 | text_bias: float = 0.1 |
| @@ -127,10 +129,13 @@ class RerankFusionConfig: | @@ -127,10 +129,13 @@ class RerankFusionConfig: | ||
| 127 | class CoarseRankFusionConfig: | 129 | class CoarseRankFusionConfig: |
| 128 | """ | 130 | """ |
| 129 | Multiplicative fusion without model score: | 131 | Multiplicative fusion without model score: |
| 130 | - fused = (max(text, 0) + text_bias) ** text_exponent | 132 | + fused = (max(es, 0) + es_bias) ** es_exponent |
| 133 | + * (max(text, 0) + text_bias) ** text_exponent | ||
| 131 | * (max(knn, 0) + knn_bias) ** knn_exponent | 134 | * (max(knn, 0) + knn_bias) ** knn_exponent |
| 132 | """ | 135 | """ |
| 133 | 136 | ||
| 137 | + es_bias: float = 0.1 | ||
| 138 | + es_exponent: float = 0.0 | ||
| 134 | text_bias: float = 0.1 | 139 | text_bias: float = 0.1 |
| 135 | text_exponent: float = 0.35 | 140 | text_exponent: float = 0.35 |
| 136 | knn_text_weight: float = 1.0 | 141 | knn_text_weight: float = 1.0 |
docs/常用查询 - ES.md
| @@ -651,6 +651,9 @@ GET /search_products_tenant_170/_search | @@ -651,6 +651,9 @@ GET /search_products_tenant_170/_search | ||
| 651 | 651 | ||
| 652 | ## 检查字段是否存在 | 652 | ## 检查字段是否存在 |
| 653 | 653 | ||
| 654 | +GET search_products_tenant_163/_mapping | ||
| 655 | +GET search_products_tenant_163/_field_caps?fields=* | ||
| 656 | + | ||
| 654 | ```bash | 657 | ```bash |
| 655 | curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ | 658 | curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ |
| 656 | 'http://localhost:9200/search_products_tenant_163/_count' \ | 659 | 'http://localhost:9200/search_products_tenant_163/_count' \ |
docs/相关性检索优化说明.md
| @@ -155,7 +155,7 @@ | @@ -155,7 +155,7 @@ | ||
| 155 | 155 | ||
| 156 | 这种分层让 parser 不再返回 ES 专用的“语言计划字段”,职责边界更清晰。 | 156 | 这种分层让 parser 不再返回 ES 专用的“语言计划字段”,职责边界更清晰。 |
| 157 | 157 | ||
| 158 | -## 8. 融合打分(Rerank + Text + KNN) | 158 | +## 8. 融合打分(ES + Text + KNN + Model) |
| 159 | 159 | ||
| 160 | 当前融合逻辑位于 `search/rerank_client.py`。 | 160 | 当前融合逻辑位于 `search/rerank_client.py`。 |
| 161 | 161 | ||
| @@ -180,27 +180,83 @@ | @@ -180,27 +180,83 @@ | ||
| 180 | 180 | ||
| 181 | 如果以上子分都缺失,则回退到 ES `_score` 作为 `text_score`,避免纯文本召回被误打成 0。 | 181 | 如果以上子分都缺失,则回退到 ES `_score` 作为 `text_score`,避免纯文本召回被误打成 0。 |
| 182 | 182 | ||
| 183 | -### 8.2 最终融合公式 | 183 | +### 8.2 向量相关性大分 |
| 184 | + | ||
| 185 | +向量不是两路分别进入最终公式,而是**先融合成一个统一的 `knn_score`**。 | ||
| 186 | + | ||
| 187 | +当前实现位于 `search/rerank_client.py` 的 `_collect_knn_score_components()`: | ||
| 188 | + | ||
| 189 | +1. `text_knn_score = matched_queries["knn_query"]` | ||
| 190 | +2. `image_knn_score = matched_queries["image_knn_query"]` | ||
| 191 | +3. 分别乘权重: | ||
| 192 | + - `weighted_text_knn_score = knn_text_weight * text_knn_score` | ||
| 193 | + - `weighted_image_knn_score = knn_image_weight * image_knn_score` | ||
| 194 | +4. 再做一层 dismax 融合: | ||
| 195 | + - `primary_knn_score = max(weighted_text_knn_score, weighted_image_knn_score)` | ||
| 196 | + - `support_knn_score = 另一侧较弱信号` | ||
| 197 | + - `knn_score = primary_knn_score + knn_tie_breaker * support_knn_score` | ||
| 198 | + | ||
| 199 | +当前默认配置在 [config.yaml](/data/saas-search/config/config.yaml) 中是: | ||
| 200 | + | ||
| 201 | +- `knn_text_weight = 1.0` | ||
| 202 | +- `knn_image_weight = 1.0` | ||
| 203 | +- `knn_tie_breaker = 0.1` | ||
| 204 | + | ||
| 205 | +也就是说: | ||
| 206 | + | ||
| 207 | +- 现在确实是“文本 KNN + 图片 KNN 先融合成一项 `knn_score`” | ||
| 208 | +- 但**图片权重目前并没有略高于文本权重** | ||
| 209 | +- 当前两路权重是相等的,只是通过 dismax 机制保留“主路 + 辅助路” | ||
| 210 | + | ||
| 211 | +如果业务上希望 image 语义更主导,可以把 `knn_image_weight` 调成略高于 `knn_text_weight`,例如 `1.1 ~ 1.3` 这一类小幅领先值,再观察 query 分布与 bad case。 | ||
| 212 | + | ||
| 213 | +### 8.3 各阶段融合公式 | ||
| 184 | 214 | ||
| 185 | ```python | 215 | ```python |
| 186 | -fused_score = ( | ||
| 187 | - (rerank_score + 0.00001) * | ||
| 188 | - (text_score + 0.1) ** 0.35 * | ||
| 189 | - (knn_score + 0.6) ** 0.2 | 216 | +coarse_score = ( |
| 217 | + (es_score + es_bias) ** es_exponent | ||
| 218 | + * (text_score + text_bias) ** text_exponent | ||
| 219 | + * (knn_score + knn_bias) ** knn_exponent | ||
| 220 | +) | ||
| 221 | + | ||
| 222 | +fine_stage_score = ( | ||
| 223 | + (es_score + es_bias) ** es_exponent | ||
| 224 | + * (fine_score + fine_bias) ** fine_exponent | ||
| 225 | + * (text_score + text_bias) ** text_exponent | ||
| 226 | + * (knn_score + knn_bias) ** knn_exponent | ||
| 227 | + * style_boost | ||
| 228 | +) | ||
| 229 | + | ||
| 230 | +final_score = ( | ||
| 231 | + (es_score + es_bias) ** es_exponent | ||
| 232 | + * (rerank_score + rerank_bias) ** rerank_exponent | ||
| 233 | + * (fine_score + fine_bias) ** fine_exponent # 仅当 fine rank 打开且有分数时参与 | ||
| 234 | + * (text_score + text_bias) ** text_exponent | ||
| 235 | + * (knn_score + knn_bias) ** knn_exponent | ||
| 236 | + * style_boost | ||
| 190 | ) | 237 | ) |
| 191 | ``` | 238 | ``` |
| 192 | 239 | ||
| 193 | -设计意图: | 240 | +当前默认配置下: |
| 241 | + | ||
| 242 | +- `coarse`: `es_exponent=0.05`, `text_exponent=0.35`, `knn_exponent=0.2` | ||
| 243 | +- `fine/final`: `es_exponent=0.05`, `text_exponent=0.25`, `knn_exponent=0.2` | ||
| 244 | +- `final`: 额外有 `rerank_exponent=1.15` | ||
| 245 | + | ||
| 246 | +设计意图可以概括成: | ||
| 194 | 247 | ||
| 195 | -- `rerank_score` 是主导信号 | ||
| 196 | -- `text_score` 保留乘法增益,但通过较低指数避免词法高分过度放大 | ||
| 197 | -- `knn_score` 保持弱参与,只作为语义召回补充 | 248 | +- `es_score` 不再只做 debug,而是作为全阶段都保留的弱先验 |
| 249 | +- `text_score` 是稳定主干信号 | ||
| 250 | +- `knn_score` 是统一的语义信号入口 | ||
| 251 | +- `fine_score` / `rerank_score` 是越往后越贵、越强的模型因子 | ||
| 252 | +- `style_boost` 只在命中已选 SKU 时乘上去 | ||
| 198 | 253 | ||
| 199 | -### 8.3 调试字段 | 254 | +### 8.4 调试字段 |
| 200 | 255 | ||
| 201 | 开启 `debug=true` 后,`debug_info.per_result` 会暴露: | 256 | 开启 `debug=true` 后,`debug_info.per_result` 会暴露: |
| 202 | 257 | ||
| 203 | - `es_score` | 258 | - `es_score` |
| 259 | +- `es_factor` | ||
| 204 | - `rerank_score` | 260 | - `rerank_score` |
| 205 | - `text_score` | 261 | - `text_score` |
| 206 | - `text_source_score` | 262 | - `text_source_score` |
| @@ -261,10 +317,10 @@ sleep 3 | @@ -261,10 +317,10 @@ sleep 3 | ||
| 261 | 317 | ||
| 262 | 1. Query 解析 | 318 | 1. Query 解析 |
| 263 | 2. ES 召回 | 319 | 2. ES 召回 |
| 264 | -3. 粗排:只用 ES 内部文本/KNN 信号 | 320 | +3. 粗排:ES 原始总分 + 文本大分 + 统一 KNN 大分 |
| 265 | 4. 款式 SKU 选择 + title suffix | 321 | 4. 款式 SKU 选择 + title suffix |
| 266 | -5. 精排:轻量 reranker + 文本/KNN 融合 | ||
| 267 | -6. 最终 rerank:重 reranker + fine score + 文本/KNN 融合 | 322 | +5. 精排:轻量 reranker + ES/text/KNN 融合 |
| 323 | +6. 最终 rerank:重 reranker + fine score + ES/text/KNN 融合 | ||
| 268 | 7. 分页、补全字段、格式化返回 | 324 | 7. 分页、补全字段、格式化返回 |
| 269 | 325 | ||
| 270 | 主控代码在 [searcher.py](/data/saas-search/search/searcher.py),打分与 rerank 细节在 [rerank_client.py](/data/saas-search/search/rerank_client.py),配置定义在 [schema.py](/data/saas-search/config/schema.py) 和 [config.yaml](/data/saas-search/config/config.yaml)。 | 326 | 主控代码在 [searcher.py](/data/saas-search/search/searcher.py),打分与 rerank 细节在 [rerank_client.py](/data/saas-search/search/rerank_client.py),配置定义在 [schema.py](/data/saas-search/config/schema.py) 和 [config.yaml](/data/saas-search/config/config.yaml)。 |
| @@ -339,7 +395,8 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | @@ -339,7 +395,8 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | ||
| 339 | **Step 4:粗排** | 395 | **Step 4:粗排** |
| 340 | 粗排入口在 [searcher.py:638](/data/saas-search/search/searcher.py#L638),真正的打分在 [rerank_client.py:348](/data/saas-search/search/rerank_client.py#L348) 的 `coarse_resort_hits()`。 | 396 | 粗排入口在 [searcher.py:638](/data/saas-search/search/searcher.py#L638),真正的打分在 [rerank_client.py:348](/data/saas-search/search/rerank_client.py#L348) 的 `coarse_resort_hits()`。 |
| 341 | 397 | ||
| 342 | -粗排只看两类信号: | 398 | +粗排现在看三类信号: |
| 399 | +- `es_score` | ||
| 343 | - `text_score` | 400 | - `text_score` |
| 344 | - `knn_score` | 401 | - `knn_score` |
| 345 | 402 | ||
| @@ -362,9 +419,13 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | @@ -362,9 +419,13 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | ||
| 362 | - 分别乘自己的 weight | 419 | - 分别乘自己的 weight |
| 363 | - 取强的一路做主路 | 420 | - 取强的一路做主路 |
| 364 | - 弱的一路按 `knn_tie_breaker` 做辅助 | 421 | - 弱的一路按 `knn_tie_breaker` 做辅助 |
| 422 | +- 产出一个统一的 `knn_score` | ||
| 365 | 423 | ||
| 366 | -然后粗排融合公式在 [rerank_client.py:334](/data/saas-search/search/rerank_client.py#L334): | ||
| 367 | -- `coarse_score = (text_score + text_bias)^text_exponent * (knn_score + knn_bias)^knn_exponent` | 424 | +然后粗排融合公式在 [rerank_client.py:346](/data/saas-search/search/rerank_client.py#L346): |
| 425 | +- `coarse_score = es_factor * text_factor * knn_factor` | ||
| 426 | +- `es_factor = (es_score + es_bias)^es_exponent` | ||
| 427 | +- `text_factor = (text_score + text_bias)^text_exponent` | ||
| 428 | +- `knn_factor = (knn_score + knn_bias)^knn_exponent` | ||
| 368 | 429 | ||
| 369 | 配置定义在 [schema.py:124](/data/saas-search/config/schema.py#L124) 和 [config.yaml:231](/data/saas-search/config/config.yaml#L231)。 | 430 | 配置定义在 [schema.py:124](/data/saas-search/config/schema.py#L124) 和 [config.yaml:231](/data/saas-search/config/config.yaml#L231)。 |
| 370 | 431 | ||
| @@ -398,9 +459,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | @@ -398,9 +459,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | ||
| 398 | 3. 不再只按 `fine_score` 排,而是按融合后的 `_fine_fused_score` 排 | 459 | 3. 不再只按 `fine_score` 排,而是按融合后的 `_fine_fused_score` 排 |
| 399 | 460 | ||
| 400 | 精排融合公式现在是: | 461 | 精排融合公式现在是: |
| 401 | -- `fine_stage_score = fine_factor * text_factor * knn_factor * style_boost` | 462 | +- `fine_stage_score = es_factor * fine_factor * text_factor * knn_factor * style_boost` |
| 402 | 463 | ||
| 403 | 具体公共计算在 [rerank_client.py:286](/data/saas-search/search/rerank_client.py#L286) 的 `_compute_multiplicative_fusion()`: | 464 | 具体公共计算在 [rerank_client.py:286](/data/saas-search/search/rerank_client.py#L286) 的 `_compute_multiplicative_fusion()`: |
| 465 | +- `es_factor = (es_score + es_bias)^es_exponent` | ||
| 404 | - `fine_factor = (fine_score + fine_bias)^fine_exponent` | 466 | - `fine_factor = (fine_score + fine_bias)^fine_exponent` |
| 405 | - `text_factor = (text_score + text_bias)^text_exponent` | 467 | - `text_factor = (text_score + text_bias)^text_exponent` |
| 406 | - `knn_factor = (knn_score + knn_bias)^knn_exponent` | 468 | - `knn_factor = (knn_score + knn_bias)^knn_exponent` |
| @@ -423,9 +485,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | @@ -423,9 +485,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | ||
| 423 | 它和 fine rank 很像,但多了一个更重的模型分 `rerank_score`。 | 485 | 它和 fine rank 很像,但多了一个更重的模型分 `rerank_score`。 |
| 424 | 最终公式是: | 486 | 最终公式是: |
| 425 | 487 | ||
| 426 | -- `final_score = rerank_factor * fine_factor * text_factor * knn_factor * style_boost` | 488 | +- `final_score = es_factor * rerank_factor * fine_factor * text_factor * knn_factor * style_boost` |
| 427 | 489 | ||
| 428 | 也就是: | 490 | 也就是: |
| 491 | +- ES 原始总分也会继续保留到最终阶段 | ||
| 429 | - fine rank 产生的 `fine_score` 不会丢 | 492 | - fine rank 产生的 `fine_score` 不会丢 |
| 430 | - 到最终 rerank 时,它会继续作为一个乘法项参与最终融合 | 493 | - 到最终 rerank 时,它会继续作为一个乘法项参与最终融合 |
| 431 | 494 | ||
| @@ -468,9 +531,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | @@ -468,9 +531,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | ||
| 468 | - `final_page` | 531 | - `final_page` |
| 469 | 532 | ||
| 470 | 其中: | 533 | 其中: |
| 471 | -- coarse stage 主要保留 text/translation/knn 的拆分信号 | 534 | +- coarse stage 保留 es/text/translation/knn 的拆分信号 |
| 472 | - fine/rerank stage 现在都保留 `fusion_inputs`、`fusion_factors`、`fusion_summary` | 535 | - fine/rerank stage 现在都保留 `fusion_inputs`、`fusion_factors`、`fusion_summary` |
| 473 | - `fusion_summary` 来自真实计算过程本身,见 [rerank_client.py:265](/data/saas-search/search/rerank_client.py#L265) | 536 | - `fusion_summary` 来自真实计算过程本身,见 [rerank_client.py:265](/data/saas-search/search/rerank_client.py#L265) |
| 537 | +- 当 `fine_rank` 关闭时,`rerank.rank_change` 会继承 `coarse_rank` 作为上游阶段,不会错误地全部显示为 0 | ||
| 474 | 538 | ||
| 475 | 这点很重要,因为现在“实际排序逻辑”和“debug 展示逻辑”是同源的,不是两套各写一份。 | 539 | 这点很重要,因为现在“实际排序逻辑”和“debug 展示逻辑”是同源的,不是两套各写一份。 |
| 476 | 540 | ||
| @@ -486,6 +550,238 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | @@ -486,6 +550,238 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde | ||
| 486 | 550 | ||
| 487 | 如果你愿意,我下一步可以继续按“一个具体 query 的真实流转样例”来讲,比如假设用户搜 `black dress`,我把它从 `parsed_query`、ES named queries、coarse/fine/final 的每个分数怎么出来,完整手推一遍。 | 551 | 如果你愿意,我下一步可以继续按“一个具体 query 的真实流转样例”来讲,比如假设用户搜 `black dress`,我把它从 `parsed_query`、ES named queries、coarse/fine/final 的每个分数怎么出来,完整手推一遍。 |
| 488 | 552 | ||
| 553 | +## 12. 值得优先探索的相关性实验方向 | ||
| 554 | + | ||
| 555 | +下面这些方向按我对当前 rank 体系的判断,优先级大致是“先做低风险高收益,再做结构性升级”。 | ||
| 556 | + | ||
| 557 | +### 12.1 Query 分桶,而不是所有 query 共用一套融合参数 | ||
| 558 | + | ||
| 559 | +当前问题: | ||
| 560 | + | ||
| 561 | +- 所有 query 基本共用同一套 exponent / bias | ||
| 562 | +- 但“强词法 query”、“泛类目 query”、“风格词 query”、“图搜触发 query”、“中英混输 query”的最优信号配比通常不同 | ||
| 563 | + | ||
| 564 | +建议实验: | ||
| 565 | + | ||
| 566 | +- 先做轻量 query 分桶: | ||
| 567 | + - 精准实体词 | ||
| 568 | + - 泛类目词 | ||
| 569 | + - 风格/属性词 | ||
| 570 | + - 中英混输 | ||
| 571 | + - 带强图片语义的 query | ||
| 572 | +- 每个桶单独调: | ||
| 573 | + - `text_translation_weight` | ||
| 574 | + - `knn_text_weight / knn_image_weight` | ||
| 575 | + - `es_exponent / text_exponent / knn_exponent` | ||
| 576 | + | ||
| 577 | +为什么值得先做: | ||
| 578 | + | ||
| 579 | +- 不改主架构 | ||
| 580 | +- 容易上线灰度 | ||
| 581 | +- 往往比“全局调一个 exponent”稳定得多 | ||
| 582 | + | ||
| 583 | +### 12.2 把 image KNN 设成略高于 text KNN,但只在合适 query 上生效 | ||
| 584 | + | ||
| 585 | +当前问题: | ||
| 586 | + | ||
| 587 | +- 现在 `knn_text_weight = 1.0`,`knn_image_weight = 1.0` | ||
| 588 | +- 对鞋、服饰款式、图案、轮廓类 query,image embedding 往往比 text embedding 更接近用户真实意图 | ||
| 589 | +- 但不是所有 query 都适合直接全局抬高 image 权重 | ||
| 590 | + | ||
| 591 | +建议实验: | ||
| 592 | + | ||
| 593 | +- 离线先试: | ||
| 594 | + - `knn_image_weight = 1.1 / 1.2 / 1.3` | ||
| 595 | + - `knn_text_weight = 1.0` | ||
| 596 | +- 再进一步试 query gating: | ||
| 597 | + - 若 query 命中款式词、形状词、鞋包词、图案词,则抬高 image weight | ||
| 598 | + - 若 query 是明确品类词或强属性词,则维持中性 | ||
| 599 | + | ||
| 600 | +为什么我不建议一上来全局大幅抬高: | ||
| 601 | + | ||
| 602 | +- 会把一些“文本很明确,但图像泛相似”的结果抬上来 | ||
| 603 | +- 容易让高视觉相似、低语义准确的商品误冲前排 | ||
| 604 | + | ||
| 605 | +### 12.3 不只融合“分数”,还要融合“排名证据” | ||
| 606 | + | ||
| 607 | +当前问题: | ||
| 608 | + | ||
| 609 | +- 现在所有阶段都高度依赖 score 级别的乘法融合 | ||
| 610 | +- 不同信号源的 score 标度未必天然可比 | ||
| 611 | +- reranker 分数、ES score、named query score、KNN score 的数值空间差异很大 | ||
| 612 | + | ||
| 613 | +建议实验: | ||
| 614 | + | ||
| 615 | +- 增加 rank-based 特征: | ||
| 616 | + - `es_rank` | ||
| 617 | + - `text_rank` | ||
| 618 | + - `knn_rank` | ||
| 619 | + - `rerank_rank` | ||
| 620 | +- 试两类简单方法: | ||
| 621 | + - RRF(Reciprocal Rank Fusion) | ||
| 622 | + - score-rank 混合:先做 rank 融合,再乘少量 score 因子 | ||
| 623 | + | ||
| 624 | +为什么值得做: | ||
| 625 | + | ||
| 626 | +- 对异常 score 分布更稳 | ||
| 627 | +- 对模型偶发极端分更鲁棒 | ||
| 628 | +- 很适合拿来做基线对照 | ||
| 629 | + | ||
| 630 | +### 12.4 将 `base_query` 和 `translation_query` 从“单点 max”升级为“更完整的 lexical 证据” | ||
| 631 | + | ||
| 632 | +当前问题: | ||
| 633 | + | ||
| 634 | +- 文本大分现在只抓: | ||
| 635 | + - `base_query` | ||
| 636 | + - `max(base_query_trans_*)` | ||
| 637 | +- 这很干净,但可能过于压缩文本证据 | ||
| 638 | +- phrase 命中、best_fields 命中、多语言字段命中、字段质量差异,没有更细粒度地进入后续 rank | ||
| 639 | + | ||
| 640 | +建议实验: | ||
| 641 | + | ||
| 642 | +- 把 lexical 证据拆得更细: | ||
| 643 | + - exact / phrase | ||
| 644 | + - best_fields | ||
| 645 | + - title 命中 | ||
| 646 | + - category 命中 | ||
| 647 | + - brand/vendor 命中 | ||
| 648 | +- 后续不一定都入主公式,但可以先做 debug / feature log | ||
| 649 | + | ||
| 650 | +这样做的收益: | ||
| 651 | + | ||
| 652 | +- 更容易解释“为什么这条词法上明明更准却没排上来” | ||
| 653 | +- 为后续 learning-to-rank 或规则门控准备特征 | ||
| 654 | + | ||
| 655 | +### 12.5 增加“类目先验”和“商品类型约束” | ||
| 656 | + | ||
| 657 | +当前问题: | ||
| 658 | + | ||
| 659 | +- 现在体系更偏“文本/向量相似度驱动” | ||
| 660 | +- 对“牛仔裤 vs 连裤袜”这种 bad case,问题常常不只是分数融合,而是**商品类型约束太弱** | ||
| 661 | + | ||
| 662 | +建议实验: | ||
| 663 | + | ||
| 664 | +- query 侧先做轻量商品类型识别: | ||
| 665 | + - 裙子 | ||
| 666 | + - 裤子 | ||
| 667 | + - 上衣 | ||
| 668 | + - 鞋 | ||
| 669 | +- doc 侧取: | ||
| 670 | + - category_path | ||
| 671 | + - taxonomy leaf | ||
| 672 | + - 类目 embedding / one-hot | ||
| 673 | +- 然后试: | ||
| 674 | + - 作为 hard filter 候选约束 | ||
| 675 | + - 作为 coarse/final 的 boost 因子 | ||
| 676 | + - 作为 rerank 输入字段增强 | ||
| 677 | + | ||
| 678 | +这是我认为对明显 bad case 最有价值的一类结构性修复。 | ||
| 679 | + | ||
| 680 | +### 12.6 把“负证据”纳入体系,而不只是累加正证据 | ||
| 681 | + | ||
| 682 | +当前问题: | ||
| 683 | + | ||
| 684 | +- 当前乘法体系主要是在积累正向因子 | ||
| 685 | +- 但很多错误结果不是“正向不够强”,而是“存在明显负证据” | ||
| 686 | +- 例如 query 是“半身裙”,doc 却强命中“上衣”“打底衫”“连裤袜” | ||
| 687 | + | ||
| 688 | +建议实验: | ||
| 689 | + | ||
| 690 | +- 抽取轻量负词特征: | ||
| 691 | + - 商品类型冲突词 | ||
| 692 | + - 性别/人群冲突词 | ||
| 693 | + - 长度/版型冲突词 | ||
| 694 | +- 方式可以先很简单: | ||
| 695 | + - penalty factor | ||
| 696 | + - blacklist term penalty | ||
| 697 | + - query-doc type mismatch penalty | ||
| 698 | + | ||
| 699 | +这是当前体系里非常缺的一块。 | ||
| 700 | + | ||
| 701 | +### 12.7 把 KNN 从“单一总分”升级为“多语义子通道” | ||
| 702 | + | ||
| 703 | +当前问题: | ||
| 704 | + | ||
| 705 | +- 现在 KNN 最终会被压成一个 `knn_score` | ||
| 706 | +- 这对工程简单很好,但损失了“这条向量信号到底为什么相似”的信息 | ||
| 707 | + | ||
| 708 | +建议实验: | ||
| 709 | + | ||
| 710 | +- 分通道记录和使用: | ||
| 711 | + - text semantic similarity | ||
| 712 | + - image appearance similarity | ||
| 713 | + - category-aware similarity | ||
| 714 | + - style-aware similarity | ||
| 715 | +- 即使最终仍合成一个总分,也建议先保留分通道特征 | ||
| 716 | + | ||
| 717 | +这样未来才能回答: | ||
| 718 | + | ||
| 719 | +- 这条结果是“外观像” | ||
| 720 | +- 还是“描述语义像” | ||
| 721 | +- 还是“类目像但款式不对” | ||
| 722 | + | ||
| 723 | +### 12.8 从纯手工公式,逐步过渡到轻量 LTR | ||
| 724 | + | ||
| 725 | +当前问题: | ||
| 726 | + | ||
| 727 | +- 目前公式已经比较清晰,但本质还是手工 feature engineering + 手工 exponent | ||
| 728 | +- 一旦信号变多,靠手调很难长期维护 | ||
| 729 | + | ||
| 730 | +建议实验: | ||
| 731 | + | ||
| 732 | +- 先不引入复杂在线模型 | ||
| 733 | +- 先做离线 LTR baseline: | ||
| 734 | + - LambdaMART / XGBoost ranker | ||
| 735 | + - 输入现成特征: | ||
| 736 | + - es_score | ||
| 737 | + - text_score | ||
| 738 | + - text_source_score | ||
| 739 | + - translation_score | ||
| 740 | + - text_knn_score | ||
| 741 | + - image_knn_score | ||
| 742 | + - coarse_rank | ||
| 743 | + - rerank_score | ||
| 744 | + - category match | ||
| 745 | + - style intent match | ||
| 746 | + | ||
| 747 | +为什么这一步值得准备: | ||
| 748 | + | ||
| 749 | +- 你们现在的 debug 字段已经很接近 feature log 了 | ||
| 750 | +- 其实已经具备往 LTR 过渡的土壤 | ||
| 751 | + | ||
| 752 | +### 12.9 先把评估体系补齐,再谈大改 | ||
| 753 | + | ||
| 754 | +当前问题: | ||
| 755 | + | ||
| 756 | +- 很多相关性讨论容易停留在个例 | ||
| 757 | +- 但融合改动经常存在 query 分布层面的 tradeoff | ||
| 758 | + | ||
| 759 | +建议实验配套: | ||
| 760 | + | ||
| 761 | +- 建立 query slice 指标: | ||
| 762 | + - 鞋靴 | ||
| 763 | + - 裙装 | ||
| 764 | + - 裤装 | ||
| 765 | + - 中英混输 | ||
| 766 | + - 图像语义强 query | ||
| 767 | + - 属性词强 query | ||
| 768 | +- 每次实验至少看: | ||
| 769 | + - overall | ||
| 770 | + - top 1 | ||
| 771 | + - top 3 | ||
| 772 | + - slice breakdown | ||
| 773 | + - bad case 回归集 | ||
| 774 | + | ||
| 775 | +### 12.10 我对当前体系的几个核心判断 | ||
| 776 | + | ||
| 777 | +1. 当前体系最大的优点不是公式本身,而是已经把信号拆成了可解释的层级,这非常适合继续做实验。 | ||
| 778 | +2. 当前体系最大的短板不是“knn exponent 还不够准”,而是缺少 query 分桶、类目先验和负证据。 | ||
| 779 | +3. 只调融合公式还能继续拿到一部分收益,但中期最值得投入的是: | ||
| 780 | + - query-aware 参数 | ||
| 781 | + - 类型/类目约束 | ||
| 782 | + - score + rank 混合融合 | ||
| 783 | + - 为 LTR 做特征沉淀 | ||
| 784 | + | ||
| 489 | 785 | ||
| 490 | 786 | ||
| 491 | ## reranker方面: | 787 | ## reranker方面: |
scripts/evaluation/eval_framework/framework.py
| @@ -272,6 +272,40 @@ class SearchEvaluationFramework: | @@ -272,6 +272,40 @@ class SearchEvaluationFramework: | ||
| 272 | ranked.sort(key=lambda item: item["score"], reverse=True) | 272 | ranked.sort(key=lambda item: item["score"], reverse=True) |
| 273 | return ranked | 273 | return ranked |
| 274 | 274 | ||
| 275 | + def _assign_fixed_rerank_scores( | ||
| 276 | + self, | ||
| 277 | + query: str, | ||
| 278 | + spu_ids: Sequence[str], | ||
| 279 | + *, | ||
| 280 | + score: float, | ||
| 281 | + force_refresh: bool = False, | ||
| 282 | + ) -> Dict[str, float]: | ||
| 283 | + """Persist a fixed rerank score for a deduplicated ``spu_id`` list.""" | ||
| 284 | + normalized_ids: List[str] = [] | ||
| 285 | + seen: set[str] = set() | ||
| 286 | + for spu_id in spu_ids: | ||
| 287 | + sid = str(spu_id or "").strip() | ||
| 288 | + if not sid or sid in seen: | ||
| 289 | + continue | ||
| 290 | + seen.add(sid) | ||
| 291 | + normalized_ids.append(sid) | ||
| 292 | + if not normalized_ids: | ||
| 293 | + return {} | ||
| 294 | + | ||
| 295 | + cached = {} if force_refresh else self.store.get_rerank_scores(self.tenant_id, query) | ||
| 296 | + to_store: Dict[str, float] = {} | ||
| 297 | + for sid in normalized_ids: | ||
| 298 | + if force_refresh or sid not in cached or float(cached[sid]) != float(score): | ||
| 299 | + to_store[sid] = float(score) | ||
| 300 | + if to_store: | ||
| 301 | + self.store.upsert_rerank_scores( | ||
| 302 | + self.tenant_id, | ||
| 303 | + query, | ||
| 304 | + to_store, | ||
| 305 | + model_name="search_recall_pool_fixed", | ||
| 306 | + ) | ||
| 307 | + return {sid: float(score) for sid in normalized_ids} | ||
| 308 | + | ||
| 275 | def _rerank_batch_with_retry(self, query: str, docs: Sequence[Dict[str, Any]]) -> List[float]: | 309 | def _rerank_batch_with_retry(self, query: str, docs: Sequence[Dict[str, Any]]) -> List[float]: |
| 276 | if not docs: | 310 | if not docs: |
| 277 | return [] | 311 | return [] |
| @@ -631,12 +665,25 @@ class SearchEvaluationFramework: | @@ -631,12 +665,25 @@ class SearchEvaluationFramework: | ||
| 631 | search_size = max(int(search_depth), int(search_recall_top_k)) | 665 | search_size = max(int(search_depth), int(search_recall_top_k)) |
| 632 | search_payload = self.search_client.search(query=query, size=search_size, from_=0, language=language) | 666 | search_payload = self.search_client.search(query=query, size=search_size, from_=0, language=language) |
| 633 | search_results = list(search_payload.get("results") or []) | 667 | search_results = list(search_payload.get("results") or []) |
| 634 | - recall_n = min(int(search_recall_top_k), len(search_results)) | ||
| 635 | - pool_search_docs = search_results[:recall_n] | ||
| 636 | - pool_spu_ids = {str(d.get("spu_id")) for d in pool_search_docs if str(d.get("spu_id") or "").strip()} | 668 | + search_result_spu_ids = [str(doc.get("spu_id") or "").strip() for doc in search_results] |
| 669 | + recall_spu_ids: List[str] = [] | ||
| 670 | + seen_recall_spu_ids: set[str] = set() | ||
| 671 | + for spu_id in search_result_spu_ids[: int(search_recall_top_k)]: | ||
| 672 | + if not spu_id or spu_id in seen_recall_spu_ids: | ||
| 673 | + continue | ||
| 674 | + seen_recall_spu_ids.add(spu_id) | ||
| 675 | + recall_spu_ids.append(spu_id) | ||
| 676 | + recall_n = len(recall_spu_ids) | ||
| 677 | + pool_spu_ids = set(recall_spu_ids) | ||
| 637 | 678 | ||
| 638 | corpus = self.corpus_docs(refresh=False) | 679 | corpus = self.corpus_docs(refresh=False) |
| 639 | corpus_by_id = {str(d.get("spu_id")): d for d in corpus if str(d.get("spu_id") or "").strip()} | 680 | corpus_by_id = {str(d.get("spu_id")): d for d in corpus if str(d.get("spu_id") or "").strip()} |
| 681 | + self._assign_fixed_rerank_scores( | ||
| 682 | + query=query, | ||
| 683 | + spu_ids=recall_spu_ids, | ||
| 684 | + score=1.0, | ||
| 685 | + force_refresh=force_refresh_rerank, | ||
| 686 | + ) | ||
| 640 | 687 | ||
| 641 | rerank_pending_n = sum( | 688 | rerank_pending_n = sum( |
| 642 | 1 | 689 | 1 |
| @@ -697,12 +744,13 @@ class SearchEvaluationFramework: | @@ -697,12 +744,13 @@ class SearchEvaluationFramework: | ||
| 697 | else: | 744 | else: |
| 698 | ordered_docs: List[Dict[str, Any]] = [] | 745 | ordered_docs: List[Dict[str, Any]] = [] |
| 699 | seen_ordered: set[str] = set() | 746 | seen_ordered: set[str] = set() |
| 700 | - for doc in pool_search_docs: | ||
| 701 | - sid = str(doc.get("spu_id") or "") | 747 | + for sid in recall_spu_ids: |
| 702 | if not sid or sid in seen_ordered: | 748 | if not sid or sid in seen_ordered: |
| 703 | continue | 749 | continue |
| 704 | seen_ordered.add(sid) | 750 | seen_ordered.add(sid) |
| 705 | - ordered_docs.append(corpus_by_id.get(sid, doc)) | 751 | + doc = corpus_by_id.get(sid) |
| 752 | + if doc is not None: | ||
| 753 | + ordered_docs.append(doc) | ||
| 706 | for item in ranked_outside: | 754 | for item in ranked_outside: |
| 707 | sid = str(item["spu_id"]) | 755 | sid = str(item["spu_id"]) |
| 708 | if sid in seen_ordered: | 756 | if sid in seen_ordered: |
| @@ -730,9 +778,10 @@ class SearchEvaluationFramework: | @@ -730,9 +778,10 @@ class SearchEvaluationFramework: | ||
| 730 | 778 | ||
| 731 | rerank_depth_effective = min(int(rerank_depth), len(ranked_outside)) | 779 | rerank_depth_effective = min(int(rerank_depth), len(ranked_outside)) |
| 732 | search_labeled_results: List[Dict[str, Any]] = [] | 780 | search_labeled_results: List[Dict[str, Any]] = [] |
| 733 | - for rank, doc in enumerate(search_results, start=1): | ||
| 734 | - spu_id = str(doc.get("spu_id")) | ||
| 735 | - in_pool = rank <= recall_n | 781 | + for rank, search_doc in enumerate(search_results, start=1): |
| 782 | + spu_id = str(search_doc.get("spu_id") or "") | ||
| 783 | + doc = corpus_by_id.get(spu_id, search_doc) | ||
| 784 | + in_pool = spu_id in pool_spu_ids | ||
| 736 | search_labeled_results.append( | 785 | search_labeled_results.append( |
| 737 | { | 786 | { |
| 738 | "rank": rank, | 787 | "rank": rank, |
| @@ -998,4 +1047,3 @@ class SearchEvaluationFramework: | @@ -998,4 +1047,3 @@ class SearchEvaluationFramework: | ||
| 998 | output_json_path, | 1047 | output_json_path, |
| 999 | ) | 1048 | ) |
| 1000 | return payload | 1049 | return payload |
| 1001 | - |
search/rerank_client.py
| @@ -252,17 +252,18 @@ def _build_hit_signal_bundle( | @@ -252,17 +252,18 @@ def _build_hit_signal_bundle( | ||
| 252 | hit: Dict[str, Any], | 252 | hit: Dict[str, Any], |
| 253 | fusion: CoarseRankFusionConfig | RerankFusionConfig, | 253 | fusion: CoarseRankFusionConfig | RerankFusionConfig, |
| 254 | ) -> Dict[str, Any]: | 254 | ) -> Dict[str, Any]: |
| 255 | - es_score = _to_score(hit.get("_score")) | 255 | + raw_es_score = _to_score(hit.get("_raw_es_score", hit.get("_original_score", hit.get("_score")))) |
| 256 | + hit["_raw_es_score"] = raw_es_score | ||
| 256 | matched_queries = hit.get("matched_queries") | 257 | matched_queries = hit.get("matched_queries") |
| 257 | text_components = _collect_text_score_components( | 258 | text_components = _collect_text_score_components( |
| 258 | matched_queries, | 259 | matched_queries, |
| 259 | - es_score, | 260 | + raw_es_score, |
| 260 | translation_weight=fusion.text_translation_weight, | 261 | translation_weight=fusion.text_translation_weight, |
| 261 | ) | 262 | ) |
| 262 | knn_components = _collect_knn_score_components(matched_queries, fusion) | 263 | knn_components = _collect_knn_score_components(matched_queries, fusion) |
| 263 | return { | 264 | return { |
| 264 | "doc_id": hit.get("_id"), | 265 | "doc_id": hit.get("_id"), |
| 265 | - "es_score": es_score, | 266 | + "es_score": raw_es_score, |
| 266 | "matched_queries": matched_queries, | 267 | "matched_queries": matched_queries, |
| 267 | "text_components": text_components, | 268 | "text_components": text_components, |
| 268 | "knn_components": knn_components, | 269 | "knn_components": knn_components, |
| @@ -294,6 +295,7 @@ def _build_formula_summary( | @@ -294,6 +295,7 @@ def _build_formula_summary( | ||
| 294 | 295 | ||
| 295 | def _compute_multiplicative_fusion( | 296 | def _compute_multiplicative_fusion( |
| 296 | *, | 297 | *, |
| 298 | + es_score: float, | ||
| 297 | text_score: float, | 299 | text_score: float, |
| 298 | knn_score: float, | 300 | knn_score: float, |
| 299 | fusion: RerankFusionConfig, | 301 | fusion: RerankFusionConfig, |
| @@ -317,6 +319,7 @@ def _compute_multiplicative_fusion( | @@ -317,6 +319,7 @@ def _compute_multiplicative_fusion( | ||
| 317 | } | 319 | } |
| 318 | ) | 320 | ) |
| 319 | 321 | ||
| 322 | + _add_term("es_score", es_score, fusion.es_bias, fusion.es_exponent) | ||
| 320 | _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) | 323 | _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) |
| 321 | _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) | 324 | _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) |
| 322 | _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) | 325 | _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) |
| @@ -341,13 +344,15 @@ def _compute_multiplicative_fusion( | @@ -341,13 +344,15 @@ def _compute_multiplicative_fusion( | ||
| 341 | 344 | ||
| 342 | 345 | ||
| 343 | def _multiply_coarse_fusion_factors( | 346 | def _multiply_coarse_fusion_factors( |
| 347 | + es_score: float, | ||
| 344 | text_score: float, | 348 | text_score: float, |
| 345 | knn_score: float, | 349 | knn_score: float, |
| 346 | fusion: CoarseRankFusionConfig, | 350 | fusion: CoarseRankFusionConfig, |
| 347 | -) -> Tuple[float, float, float]: | 351 | +) -> Tuple[float, float, float, float]: |
| 352 | + es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent | ||
| 348 | text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent | 353 | text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent |
| 349 | knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent | 354 | knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent |
| 350 | - return text_factor, knn_factor, text_factor * knn_factor | 355 | + return es_factor, text_factor, knn_factor, es_factor * text_factor * knn_factor |
| 351 | 356 | ||
| 352 | 357 | ||
| 353 | def _has_selected_sku(hit: Dict[str, Any]) -> bool: | 358 | def _has_selected_sku(hit: Dict[str, Any]) -> bool: |
| @@ -359,7 +364,7 @@ def coarse_resort_hits( | @@ -359,7 +364,7 @@ def coarse_resort_hits( | ||
| 359 | fusion: Optional[CoarseRankFusionConfig] = None, | 364 | fusion: Optional[CoarseRankFusionConfig] = None, |
| 360 | debug: bool = False, | 365 | debug: bool = False, |
| 361 | ) -> List[Dict[str, Any]]: | 366 | ) -> List[Dict[str, Any]]: |
| 362 | - """Coarse rank with text/knn fusion only.""" | 367 | + """Coarse rank with es/text/knn multiplicative fusion.""" |
| 363 | if not es_hits: | 368 | if not es_hits: |
| 364 | return [] | 369 | return [] |
| 365 | 370 | ||
| @@ -373,7 +378,8 @@ def coarse_resort_hits( | @@ -373,7 +378,8 @@ def coarse_resort_hits( | ||
| 373 | knn_components = signal_bundle["knn_components"] | 378 | knn_components = signal_bundle["knn_components"] |
| 374 | text_score = signal_bundle["text_score"] | 379 | text_score = signal_bundle["text_score"] |
| 375 | knn_score = signal_bundle["knn_score"] | 380 | knn_score = signal_bundle["knn_score"] |
| 376 | - text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors( | 381 | + es_factor, text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors( |
| 382 | + es_score=es_score, | ||
| 377 | text_score=text_score, | 383 | text_score=text_score, |
| 378 | knn_score=knn_score, | 384 | knn_score=knn_score, |
| 379 | fusion=f, | 385 | fusion=f, |
| @@ -409,6 +415,7 @@ def coarse_resort_hits( | @@ -409,6 +415,7 @@ def coarse_resort_hits( | ||
| 409 | "knn_primary_score": knn_components["primary_knn_score"], | 415 | "knn_primary_score": knn_components["primary_knn_score"], |
| 410 | "knn_support_score": knn_components["support_knn_score"], | 416 | "knn_support_score": knn_components["support_knn_score"], |
| 411 | "knn_score": knn_score, | 417 | "knn_score": knn_score, |
| 418 | + "coarse_es_factor": es_factor, | ||
| 412 | "coarse_text_factor": text_factor, | 419 | "coarse_text_factor": text_factor, |
| 413 | "coarse_knn_factor": knn_factor, | 420 | "coarse_knn_factor": knn_factor, |
| 414 | "coarse_score": coarse_score, | 421 | "coarse_score": coarse_score, |
| @@ -435,13 +442,19 @@ def fuse_scores_and_resort( | @@ -435,13 +442,19 @@ def fuse_scores_and_resort( | ||
| 435 | 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 | 442 | 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 |
| 436 | 443 | ||
| 437 | 融合形式(由 ``fusion`` 配置 bias / exponent):: | 444 | 融合形式(由 ``fusion`` 配置 bias / exponent):: |
| 438 | - fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k * sku_boost | 445 | + fused = (max(es,0)+b_es)^e_es |
| 446 | + * (max(rerank,0)+b_r)^e_r | ||
| 447 | + * (max(fine,0)+b_f)^e_f | ||
| 448 | + * (max(text,0)+b_t)^e_t | ||
| 449 | + * (max(knn,0)+b_k)^e_k | ||
| 450 | + * sku_boost | ||
| 439 | 451 | ||
| 440 | 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过 | 452 | 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过 |
| 441 | ``query.style_intent.selected_sku_boost`` 配置。 | 453 | ``query.style_intent.selected_sku_boost`` 配置。 |
| 442 | 454 | ||
| 443 | 对每条 hit 会写入: | 455 | 对每条 hit 会写入: |
| 444 | - _original_score: 原始 ES 分数 | 456 | - _original_score: 原始 ES 分数 |
| 457 | + - _raw_es_score: ES 原始总分(后续阶段始终复用,不依赖可能被改写的 `_score`) | ||
| 445 | - _rerank_score: 重排服务返回的分数 | 458 | - _rerank_score: 重排服务返回的分数 |
| 446 | - _fused_score: 融合分数 | 459 | - _fused_score: 融合分数 |
| 447 | - _text_score: 文本相关性分数(优先取 named queries 的 base_query 分数) | 460 | - _text_score: 文本相关性分数(优先取 named queries 的 base_query 分数) |
| @@ -475,6 +488,7 @@ def fuse_scores_and_resort( | @@ -475,6 +488,7 @@ def fuse_scores_and_resort( | ||
| 475 | sku_selected = _has_selected_sku(hit) | 488 | sku_selected = _has_selected_sku(hit) |
| 476 | style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 | 489 | style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 |
| 477 | fusion_result = _compute_multiplicative_fusion( | 490 | fusion_result = _compute_multiplicative_fusion( |
| 491 | + es_score=signal_bundle["es_score"], | ||
| 478 | rerank_score=rerank_score, | 492 | rerank_score=rerank_score, |
| 479 | fine_score=fine_score, | 493 | fine_score=fine_score, |
| 480 | text_score=text_score, | 494 | text_score=text_score, |
| @@ -526,6 +540,7 @@ def fuse_scores_and_resort( | @@ -526,6 +540,7 @@ def fuse_scores_and_resort( | ||
| 526 | ), | 540 | ), |
| 527 | "rerank_factor": fusion_result["factors"].get("rerank_score"), | 541 | "rerank_factor": fusion_result["factors"].get("rerank_score"), |
| 528 | "fine_factor": fusion_result["factors"].get("fine_score"), | 542 | "fine_factor": fusion_result["factors"].get("fine_score"), |
| 543 | + "es_factor": fusion_result["factors"].get("es_score"), | ||
| 529 | "text_factor": fusion_result["factors"].get("text_score"), | 544 | "text_factor": fusion_result["factors"].get("text_score"), |
| 530 | "knn_factor": fusion_result["factors"].get("knn_score"), | 545 | "knn_factor": fusion_result["factors"].get("knn_score"), |
| 531 | "style_intent_selected_sku": sku_selected, | 546 | "style_intent_selected_sku": sku_selected, |
| @@ -654,6 +669,7 @@ def run_lightweight_rerank( | @@ -654,6 +669,7 @@ def run_lightweight_rerank( | ||
| 654 | sku_selected = _has_selected_sku(hit) | 669 | sku_selected = _has_selected_sku(hit) |
| 655 | style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 | 670 | style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 |
| 656 | fusion_result = _compute_multiplicative_fusion( | 671 | fusion_result = _compute_multiplicative_fusion( |
| 672 | + es_score=signal_bundle["es_score"], | ||
| 657 | fine_score=fine_score, | 673 | fine_score=fine_score, |
| 658 | text_score=text_score, | 674 | text_score=text_score, |
| 659 | knn_score=knn_score, | 675 | knn_score=knn_score, |
| @@ -679,7 +695,9 @@ def run_lightweight_rerank( | @@ -679,7 +695,9 @@ def run_lightweight_rerank( | ||
| 679 | "fusion_inputs": fusion_result["inputs"], | 695 | "fusion_inputs": fusion_result["inputs"], |
| 680 | "fusion_factors": fusion_result["factors"], | 696 | "fusion_factors": fusion_result["factors"], |
| 681 | "fusion_summary": fusion_result["summary"], | 697 | "fusion_summary": fusion_result["summary"], |
| 698 | + "es_score": signal_bundle["es_score"], | ||
| 682 | "fine_factor": fusion_result["factors"].get("fine_score"), | 699 | "fine_factor": fusion_result["factors"].get("fine_score"), |
| 700 | + "es_factor": fusion_result["factors"].get("es_score"), | ||
| 683 | "text_factor": fusion_result["factors"].get("text_score"), | 701 | "text_factor": fusion_result["factors"].get("text_score"), |
| 684 | "knn_factor": fusion_result["factors"].get("knn_score"), | 702 | "knn_factor": fusion_result["factors"].get("knn_score"), |
| 685 | "style_intent_selected_sku": sku_selected, | 703 | "style_intent_selected_sku": sku_selected, |
search/searcher.py
| @@ -994,7 +994,7 @@ class Searcher: | @@ -994,7 +994,7 @@ class Searcher: | ||
| 994 | if decision is not None: | 994 | if decision is not None: |
| 995 | style_intent_debug = decision.to_dict() | 995 | style_intent_debug = decision.to_dict() |
| 996 | 996 | ||
| 997 | - raw_score = hit.get("_score") | 997 | + raw_score = hit.get("_raw_es_score", hit.get("_original_score", hit.get("_score"))) |
| 998 | try: | 998 | try: |
| 999 | es_score = float(raw_score) if raw_score is not None else 0.0 | 999 | es_score = float(raw_score) if raw_score is not None else 0.0 |
| 1000 | except (TypeError, ValueError): | 1000 | except (TypeError, ValueError): |
| @@ -1024,6 +1024,7 @@ class Searcher: | @@ -1024,6 +1024,7 @@ class Searcher: | ||
| 1024 | 1024 | ||
| 1025 | if coarse_debug: | 1025 | if coarse_debug: |
| 1026 | debug_entry["coarse_score"] = coarse_debug.get("coarse_score") | 1026 | debug_entry["coarse_score"] = coarse_debug.get("coarse_score") |
| 1027 | + debug_entry["coarse_es_factor"] = coarse_debug.get("coarse_es_factor") | ||
| 1027 | debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor") | 1028 | debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor") |
| 1028 | debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor") | 1029 | debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor") |
| 1029 | 1030 | ||
| @@ -1033,6 +1034,7 @@ class Searcher: | @@ -1033,6 +1034,7 @@ class Searcher: | ||
| 1033 | debug_entry["score"] = rerank_debug.get("score") | 1034 | debug_entry["score"] = rerank_debug.get("score") |
| 1034 | debug_entry["rerank_score"] = rerank_debug.get("rerank_score") | 1035 | debug_entry["rerank_score"] = rerank_debug.get("rerank_score") |
| 1035 | debug_entry["fine_score"] = rerank_debug.get("fine_score") | 1036 | debug_entry["fine_score"] = rerank_debug.get("fine_score") |
| 1037 | + debug_entry["es_score"] = rerank_debug.get("es_score", es_score) | ||
| 1036 | debug_entry["text_score"] = rerank_debug.get("text_score") | 1038 | debug_entry["text_score"] = rerank_debug.get("text_score") |
| 1037 | debug_entry["knn_score"] = rerank_debug.get("knn_score") | 1039 | debug_entry["knn_score"] = rerank_debug.get("knn_score") |
| 1038 | debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs") | 1040 | debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs") |
| @@ -1040,6 +1042,7 @@ class Searcher: | @@ -1040,6 +1042,7 @@ class Searcher: | ||
| 1040 | debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary") | 1042 | debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary") |
| 1041 | debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") | 1043 | debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") |
| 1042 | debug_entry["fine_factor"] = rerank_debug.get("fine_factor") | 1044 | debug_entry["fine_factor"] = rerank_debug.get("fine_factor") |
| 1045 | + debug_entry["es_factor"] = rerank_debug.get("es_factor") | ||
| 1043 | debug_entry["text_factor"] = rerank_debug.get("text_factor") | 1046 | debug_entry["text_factor"] = rerank_debug.get("text_factor") |
| 1044 | debug_entry["knn_factor"] = rerank_debug.get("knn_factor") | 1047 | debug_entry["knn_factor"] = rerank_debug.get("knn_factor") |
| 1045 | debug_entry["fused_score"] = rerank_debug.get("fused_score") | 1048 | debug_entry["fused_score"] = rerank_debug.get("fused_score") |
| @@ -1049,11 +1052,13 @@ class Searcher: | @@ -1049,11 +1052,13 @@ class Searcher: | ||
| 1049 | debug_entry["doc_id"] = fine_debug.get("doc_id") | 1052 | debug_entry["doc_id"] = fine_debug.get("doc_id") |
| 1050 | debug_entry["score"] = fine_debug.get("score") | 1053 | debug_entry["score"] = fine_debug.get("score") |
| 1051 | debug_entry["fine_score"] = fine_debug.get("fine_score") | 1054 | debug_entry["fine_score"] = fine_debug.get("fine_score") |
| 1055 | + debug_entry["es_score"] = fine_debug.get("es_score", es_score) | ||
| 1052 | debug_entry["text_score"] = fine_debug.get("text_score") | 1056 | debug_entry["text_score"] = fine_debug.get("text_score") |
| 1053 | debug_entry["knn_score"] = fine_debug.get("knn_score") | 1057 | debug_entry["knn_score"] = fine_debug.get("knn_score") |
| 1054 | debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs") | 1058 | debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs") |
| 1055 | debug_entry["fusion_factors"] = fine_debug.get("fusion_factors") | 1059 | debug_entry["fusion_factors"] = fine_debug.get("fusion_factors") |
| 1056 | debug_entry["fusion_summary"] = fine_debug.get("fusion_summary") | 1060 | debug_entry["fusion_summary"] = fine_debug.get("fusion_summary") |
| 1061 | + debug_entry["es_factor"] = fine_debug.get("es_factor") | ||
| 1057 | debug_entry["rerank_input"] = fine_debug.get("rerank_input") | 1062 | debug_entry["rerank_input"] = fine_debug.get("rerank_input") |
| 1058 | 1063 | ||
| 1059 | initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1064 | initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| @@ -1061,6 +1066,14 @@ class Searcher: | @@ -1061,6 +1066,14 @@ class Searcher: | ||
| 1061 | fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1066 | fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| 1062 | rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1067 | rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| 1063 | final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1068 | final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| 1069 | + rerank_previous_rank = fine_rank if fine_rank is not None else coarse_rank | ||
| 1070 | + final_previous_rank = rerank_rank | ||
| 1071 | + if final_previous_rank is None: | ||
| 1072 | + final_previous_rank = fine_rank | ||
| 1073 | + if final_previous_rank is None: | ||
| 1074 | + final_previous_rank = coarse_rank | ||
| 1075 | + if final_previous_rank is None: | ||
| 1076 | + final_previous_rank = initial_rank | ||
| 1064 | 1077 | ||
| 1065 | def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: | 1078 | def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: |
| 1066 | if previous_rank is None or current_rank is None: | 1079 | if previous_rank is None or current_rank is None: |
| @@ -1078,8 +1091,10 @@ class Searcher: | @@ -1078,8 +1091,10 @@ class Searcher: | ||
| 1078 | "rank": coarse_rank, | 1091 | "rank": coarse_rank, |
| 1079 | "rank_change": _rank_change(initial_rank, coarse_rank), | 1092 | "rank_change": _rank_change(initial_rank, coarse_rank), |
| 1080 | "score": coarse_debug.get("coarse_score") if coarse_debug else None, | 1093 | "score": coarse_debug.get("coarse_score") if coarse_debug else None, |
| 1094 | + "es_score": coarse_debug.get("es_score") if coarse_debug else es_score, | ||
| 1081 | "text_score": coarse_debug.get("text_score") if coarse_debug else None, | 1095 | "text_score": coarse_debug.get("text_score") if coarse_debug else None, |
| 1082 | "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, | 1096 | "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, |
| 1097 | + "es_factor": coarse_debug.get("coarse_es_factor") if coarse_debug else None, | ||
| 1083 | "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None, | 1098 | "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None, |
| 1084 | "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None, | 1099 | "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None, |
| 1085 | "signals": coarse_debug, | 1100 | "signals": coarse_debug, |
| @@ -1093,8 +1108,10 @@ class Searcher: | @@ -1093,8 +1108,10 @@ class Searcher: | ||
| 1093 | else hit.get("_fine_fused_score", hit.get("_fine_score")) | 1108 | else hit.get("_fine_fused_score", hit.get("_fine_score")) |
| 1094 | ), | 1109 | ), |
| 1095 | "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), | 1110 | "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), |
| 1111 | + "es_score": fine_debug.get("es_score") if fine_debug else es_score, | ||
| 1096 | "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), | 1112 | "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), |
| 1097 | "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), | 1113 | "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), |
| 1114 | + "es_factor": fine_debug.get("es_factor") if fine_debug else None, | ||
| 1098 | "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None, | 1115 | "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None, |
| 1099 | "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None, | 1116 | "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None, |
| 1100 | "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None, | 1117 | "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None, |
| @@ -1103,8 +1120,9 @@ class Searcher: | @@ -1103,8 +1120,9 @@ class Searcher: | ||
| 1103 | }, | 1120 | }, |
| 1104 | "rerank": { | 1121 | "rerank": { |
| 1105 | "rank": rerank_rank, | 1122 | "rank": rerank_rank, |
| 1106 | - "rank_change": _rank_change(fine_rank, rerank_rank), | 1123 | + "rank_change": _rank_change(rerank_previous_rank, rerank_rank), |
| 1107 | "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), | 1124 | "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), |
| 1125 | + "es_score": rerank_debug.get("es_score") if rerank_debug else es_score, | ||
| 1108 | "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), | 1126 | "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), |
| 1109 | "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), | 1127 | "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), |
| 1110 | "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), | 1128 | "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), |
| @@ -1115,13 +1133,14 @@ class Searcher: | @@ -1115,13 +1133,14 @@ class Searcher: | ||
| 1115 | "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None, | 1133 | "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None, |
| 1116 | "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None, | 1134 | "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None, |
| 1117 | "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None, | 1135 | "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None, |
| 1136 | + "es_factor": rerank_debug.get("es_factor") if rerank_debug else None, | ||
| 1118 | "text_factor": rerank_debug.get("text_factor") if rerank_debug else None, | 1137 | "text_factor": rerank_debug.get("text_factor") if rerank_debug else None, |
| 1119 | "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None, | 1138 | "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None, |
| 1120 | "signals": rerank_debug, | 1139 | "signals": rerank_debug, |
| 1121 | }, | 1140 | }, |
| 1122 | "final_page": { | 1141 | "final_page": { |
| 1123 | "rank": final_rank, | 1142 | "rank": final_rank, |
| 1124 | - "rank_change": _rank_change(rerank_rank, final_rank), | 1143 | + "rank_change": _rank_change(final_previous_rank, final_rank), |
| 1125 | }, | 1144 | }, |
| 1126 | } | 1145 | } |
| 1127 | 1146 |
tests/test_rerank_client.py
| @@ -258,3 +258,35 @@ def test_fuse_scores_and_resort_uses_hit_level_fine_score_when_not_passed_separa | @@ -258,3 +258,35 @@ def test_fuse_scores_and_resort_uses_hit_level_fine_score_when_not_passed_separa | ||
| 258 | assert isclose(debug[0]["fine_factor"], (0.7 + 0.00001), rel_tol=1e-9) | 258 | assert isclose(debug[0]["fine_factor"], (0.7 + 0.00001), rel_tol=1e-9) |
| 259 | assert debug[0]["fusion_inputs"]["fine_score"] == 0.7 | 259 | assert debug[0]["fusion_inputs"]["fine_score"] == 0.7 |
| 260 | assert "fine_score=" in debug[0]["fusion_summary"] | 260 | assert "fine_score=" in debug[0]["fusion_summary"] |
| 261 | + | ||
| 262 | + | ||
| 263 | +def test_fuse_scores_and_resort_can_include_raw_es_score_as_factor(): | ||
| 264 | + hits = [ | ||
| 265 | + { | ||
| 266 | + "_id": "es-strong", | ||
| 267 | + "_score": 100.0, | ||
| 268 | + "matched_queries": {"base_query": 1.0, "knn_query": 0.0}, | ||
| 269 | + }, | ||
| 270 | + { | ||
| 271 | + "_id": "es-weak", | ||
| 272 | + "_score": 1.0, | ||
| 273 | + "matched_queries": {"base_query": 1.0, "knn_query": 0.0}, | ||
| 274 | + }, | ||
| 275 | + ] | ||
| 276 | + fusion = RerankFusionConfig( | ||
| 277 | + es_bias=0.0, | ||
| 278 | + es_exponent=1.0, | ||
| 279 | + rerank_bias=0.0, | ||
| 280 | + rerank_exponent=1.0, | ||
| 281 | + text_bias=0.0, | ||
| 282 | + text_exponent=0.0, | ||
| 283 | + knn_bias=1.0, | ||
| 284 | + knn_exponent=0.0, | ||
| 285 | + ) | ||
| 286 | + | ||
| 287 | + debug = fuse_scores_and_resort(hits, [1.0, 1.0], fusion=fusion, debug=True) | ||
| 288 | + | ||
| 289 | + assert [hit["_id"] for hit in hits] == ["es-strong", "es-weak"] | ||
| 290 | + assert isclose(hits[0]["_raw_es_score"], 100.0, rel_tol=1e-9) | ||
| 291 | + assert isclose(debug[0]["es_factor"], 100.0, rel_tol=1e-9) | ||
| 292 | + assert debug[0]["fusion_inputs"]["es_score"] == 100.0 |
tests/test_search_rerank_window.py
| @@ -10,6 +10,7 @@ import yaml | @@ -10,6 +10,7 @@ import yaml | ||
| 10 | 10 | ||
| 11 | from config import ( | 11 | from config import ( |
| 12 | ConfigLoader, | 12 | ConfigLoader, |
| 13 | + FineRankConfig, | ||
| 13 | FunctionScoreConfig, | 14 | FunctionScoreConfig, |
| 14 | IndexConfig, | 15 | IndexConfig, |
| 15 | QueryConfig, | 16 | QueryConfig, |
| @@ -944,3 +945,70 @@ def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeyp | @@ -944,3 +945,70 @@ def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeyp | ||
| 944 | assert result.debug_info["per_result"][0]["final_rank"] == 1 | 945 | assert result.debug_info["per_result"][0]["final_rank"] == 1 |
| 945 | assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0 | 946 | assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0 |
| 946 | assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0 | 947 | assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0 |
| 948 | + | ||
| 949 | + | ||
| 950 | +def test_searcher_rerank_rank_change_falls_back_to_coarse_rank_when_fine_disabled(monkeypatch): | ||
| 951 | + es_client = _FakeESClient(total_hits=5) | ||
| 952 | + config = _build_search_config(rerank_enabled=True, rerank_window=5) | ||
| 953 | + config = SearchConfig( | ||
| 954 | + field_boosts=config.field_boosts, | ||
| 955 | + indexes=config.indexes, | ||
| 956 | + query_config=config.query_config, | ||
| 957 | + function_score=config.function_score, | ||
| 958 | + coarse_rank=config.coarse_rank, | ||
| 959 | + fine_rank=FineRankConfig(enabled=False, input_window=5, output_window=5), | ||
| 960 | + rerank=config.rerank, | ||
| 961 | + spu_config=config.spu_config, | ||
| 962 | + es_index_name=config.es_index_name, | ||
| 963 | + es_settings=config.es_settings, | ||
| 964 | + ) | ||
| 965 | + searcher = _build_searcher(config, es_client) | ||
| 966 | + context = create_request_context(reqid="rank-fallback", uid="u-rank-fallback") | ||
| 967 | + | ||
| 968 | + monkeypatch.setattr( | ||
| 969 | + "search.searcher.get_tenant_config_loader", | ||
| 970 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}), | ||
| 971 | + ) | ||
| 972 | + | ||
| 973 | + def _fake_run_rerank(**kwargs): | ||
| 974 | + hits = kwargs["es_response"]["hits"]["hits"] | ||
| 975 | + hits.reverse() | ||
| 976 | + fused_debug = [] | ||
| 977 | + for idx, hit in enumerate(hits): | ||
| 978 | + hit["_fused_score"] = 100.0 - idx | ||
| 979 | + hit["_rerank_score"] = 1.0 - 0.1 * idx | ||
| 980 | + fused_debug.append( | ||
| 981 | + { | ||
| 982 | + "doc_id": hit["_id"], | ||
| 983 | + "score": hit["_fused_score"], | ||
| 984 | + "es_score": hit.get("_raw_es_score", hit.get("_score")), | ||
| 985 | + "rerank_score": hit["_rerank_score"], | ||
| 986 | + "text_score": hit.get("_text_score", hit.get("_score")), | ||
| 987 | + "knn_score": hit.get("_knn_score", 0.0), | ||
| 988 | + "es_factor": 1.0, | ||
| 989 | + "rerank_factor": 1.0, | ||
| 990 | + "text_factor": 1.0, | ||
| 991 | + "knn_factor": 1.0, | ||
| 992 | + "fused_score": hit["_fused_score"], | ||
| 993 | + } | ||
| 994 | + ) | ||
| 995 | + return kwargs["es_response"], {"model": "final-reranker"}, fused_debug | ||
| 996 | + | ||
| 997 | + monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank) | ||
| 998 | + | ||
| 999 | + result = searcher.search( | ||
| 1000 | + query="toy", | ||
| 1001 | + tenant_id="162", | ||
| 1002 | + from_=0, | ||
| 1003 | + size=5, | ||
| 1004 | + context=context, | ||
| 1005 | + enable_rerank=True, | ||
| 1006 | + debug=True, | ||
| 1007 | + ) | ||
| 1008 | + | ||
| 1009 | + per_result = {row["spu_id"]: row for row in result.debug_info["per_result"]} | ||
| 1010 | + moved = per_result["4"]["ranking_funnel"] | ||
| 1011 | + assert moved["fine_rank"]["rank"] is None | ||
| 1012 | + assert moved["rerank"]["rank"] == 1 | ||
| 1013 | + assert moved["rerank"]["rank_change"] == 4 | ||
| 1014 | + assert moved["final_page"]["rank_change"] == 0 |