Commit 9df421edc66838b08a6693b98830c4c974583f14

Authored by tangwang
1 parent 80f1e036

基于eval框架开始调参

config/config.yaml
@@ -285,6 +285,8 @@ coarse_rank: @@ -285,6 +285,8 @@ coarse_rank:
285 input_window: 700 285 input_window: 700
286 output_window: 240 286 output_window: 240
287 fusion: 287 fusion:
  288 + es_bias: 0.1
  289 + es_exponent: 0.05
288 text_bias: 0.1 290 text_bias: 0.1
289 text_exponent: 0.35 291 text_exponent: 0.35
290 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) 292 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
@@ -294,7 +296,7 @@ coarse_rank: @@ -294,7 +296,7 @@ coarse_rank:
294 knn_image_weight: 1.0 296 knn_image_weight: 1.0
295 knn_tie_breaker: 0.1 297 knn_tie_breaker: 0.1
296 knn_bias: 0.6 298 knn_bias: 0.6
297 - knn_exponent: 0.0 299 + knn_exponent: 0.2
298 300
299 # 精排配置(轻量 reranker) 301 # 精排配置(轻量 reranker)
300 fine_rank: 302 fine_rank:
@@ -317,11 +319,13 @@ rerank: @@ -317,11 +319,13 @@ rerank:
317 rerank_doc_template: '{title}' 319 rerank_doc_template: '{title}'
318 service_profile: default 320 service_profile: default
319 321
320 - # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项 322 + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn
321 # 其中 knn_score 先做一层 dis_max: 323 # 其中 knn_score 先做一层 dis_max:
322 # max(knn_text_weight * text_knn, knn_image_weight * image_knn) 324 # max(knn_text_weight * text_knn, knn_image_weight * image_knn)
323 # + knn_tie_breaker * 另一侧较弱信号 325 # + knn_tie_breaker * 另一侧较弱信号
324 fusion: 326 fusion:
  327 + es_bias: 0.1
  328 + es_exponent: 0.05
325 rerank_bias: 1.0e-05 329 rerank_bias: 1.0e-05
326 rerank_exponent: 1.15 330 rerank_exponent: 1.15
327 fine_bias: 1.0e-05 331 fine_bias: 1.0e-05
@@ -334,7 +338,7 @@ rerank: @@ -334,7 +338,7 @@ rerank:
334 knn_image_weight: 1.0 338 knn_image_weight: 1.0
335 knn_tie_breaker: 0.1 339 knn_tie_breaker: 0.1
336 knn_bias: 0.6 340 knn_bias: 0.6
337 - knn_exponent: 0.0 341 + knn_exponent: 0.2
338 342
339 # 可扩展服务/provider 注册表(单一配置源) 343 # 可扩展服务/provider 注册表(单一配置源)
340 services: 344 services:
@@ -578,6 +578,8 @@ class AppConfigLoader: @@ -578,6 +578,8 @@ class AppConfigLoader:
578 input_window=int(coarse_rank_cfg.get("input_window", 700)), 578 input_window=int(coarse_rank_cfg.get("input_window", 700)),
579 output_window=int(coarse_rank_cfg.get("output_window", 240)), 579 output_window=int(coarse_rank_cfg.get("output_window", 240)),
580 fusion=CoarseRankFusionConfig( 580 fusion=CoarseRankFusionConfig(
  581 + es_bias=float(coarse_fusion_raw.get("es_bias", 0.1)),
  582 + es_exponent=float(coarse_fusion_raw.get("es_exponent", 0.0)),
581 text_bias=float(coarse_fusion_raw.get("text_bias", 0.1)), 583 text_bias=float(coarse_fusion_raw.get("text_bias", 0.1)),
582 text_exponent=float(coarse_fusion_raw.get("text_exponent", 0.35)), 584 text_exponent=float(coarse_fusion_raw.get("text_exponent", 0.35)),
583 knn_text_weight=float(coarse_fusion_raw.get("knn_text_weight", 1.0)), 585 knn_text_weight=float(coarse_fusion_raw.get("knn_text_weight", 1.0)),
@@ -617,6 +619,8 @@ class AppConfigLoader: @@ -617,6 +619,8 @@ class AppConfigLoader:
617 else None 619 else None
618 ), 620 ),
619 fusion=RerankFusionConfig( 621 fusion=RerankFusionConfig(
  622 + es_bias=float(fusion_raw.get("es_bias", 0.1)),
  623 + es_exponent=float(fusion_raw.get("es_exponent", 0.0)),
620 rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)), 624 rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)),
621 rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)), 625 rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)),
622 text_bias=float(fusion_raw.get("text_bias", 0.1)), 626 text_bias=float(fusion_raw.get("text_bias", 0.1)),
@@ -105,9 +105,11 @@ class FunctionScoreConfig: @@ -105,9 +105,11 @@ class FunctionScoreConfig:
105 class RerankFusionConfig: 105 class RerankFusionConfig:
106 """ 106 """
107 Multiplicative fusion: fused = Π (max(score_i, 0) + bias_i) ** exponent_i 107 Multiplicative fusion: fused = Π (max(score_i, 0) + bias_i) ** exponent_i
108 - for rerank / text / knn terms respectively. 108 + for es / rerank / fine / text / knn terms respectively.
109 """ 109 """
110 110
  111 + es_bias: float = 0.1
  112 + es_exponent: float = 0.0
111 rerank_bias: float = 0.00001 113 rerank_bias: float = 0.00001
112 rerank_exponent: float = 1.0 114 rerank_exponent: float = 1.0
113 text_bias: float = 0.1 115 text_bias: float = 0.1
@@ -127,10 +129,13 @@ class RerankFusionConfig: @@ -127,10 +129,13 @@ class RerankFusionConfig:
127 class CoarseRankFusionConfig: 129 class CoarseRankFusionConfig:
128 """ 130 """
129 Multiplicative fusion without model score: 131 Multiplicative fusion without model score:
130 - fused = (max(text, 0) + text_bias) ** text_exponent 132 + fused = (max(es, 0) + es_bias) ** es_exponent
  133 + * (max(text, 0) + text_bias) ** text_exponent
131 * (max(knn, 0) + knn_bias) ** knn_exponent 134 * (max(knn, 0) + knn_bias) ** knn_exponent
132 """ 135 """
133 136
  137 + es_bias: float = 0.1
  138 + es_exponent: float = 0.0
134 text_bias: float = 0.1 139 text_bias: float = 0.1
135 text_exponent: float = 0.35 140 text_exponent: float = 0.35
136 knn_text_weight: float = 1.0 141 knn_text_weight: float = 1.0
docs/常用查询 - ES.md
@@ -651,6 +651,9 @@ GET /search_products_tenant_170/_search @@ -651,6 +651,9 @@ GET /search_products_tenant_170/_search
651 651
652 ## 检查字段是否存在 652 ## 检查字段是否存在
653 653
  654 +GET search_products_tenant_163/_mapping
  655 +GET search_products_tenant_163/_field_caps?fields=*
  656 +
654 ```bash 657 ```bash
655 curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ 658 curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
656 'http://localhost:9200/search_products_tenant_163/_count' \ 659 'http://localhost:9200/search_products_tenant_163/_count' \
docs/相关性检索优化说明.md
@@ -155,7 +155,7 @@ @@ -155,7 +155,7 @@
155 155
156 这种分层让 parser 不再返回 ES 专用的“语言计划字段”,职责边界更清晰。 156 这种分层让 parser 不再返回 ES 专用的“语言计划字段”,职责边界更清晰。
157 157
158 -## 8. 融合打分(Rerank + Text + KNN 158 +## 8. 融合打分(ES + Text + KNN + Model
159 159
160 当前融合逻辑位于 `search/rerank_client.py`。 160 当前融合逻辑位于 `search/rerank_client.py`。
161 161
@@ -180,27 +180,83 @@ @@ -180,27 +180,83 @@
180 180
181 如果以上子分都缺失,则回退到 ES `_score` 作为 `text_score`,避免纯文本召回被误打成 0。 181 如果以上子分都缺失,则回退到 ES `_score` 作为 `text_score`,避免纯文本召回被误打成 0。
182 182
183 -### 8.2 最终融合公式 183 +### 8.2 向量相关性大分
  184 +
  185 +向量不是两路分别进入最终公式,而是**先融合成一个统一的 `knn_score`**。
  186 +
  187 +当前实现位于 `search/rerank_client.py` 的 `_collect_knn_score_components()`:
  188 +
  189 +1. `text_knn_score = matched_queries["knn_query"]`
  190 +2. `image_knn_score = matched_queries["image_knn_query"]`
  191 +3. 分别乘权重:
  192 + - `weighted_text_knn_score = knn_text_weight * text_knn_score`
  193 + - `weighted_image_knn_score = knn_image_weight * image_knn_score`
  194 +4. 再做一层 dismax 融合:
  195 + - `primary_knn_score = max(weighted_text_knn_score, weighted_image_knn_score)`
  196 + - `support_knn_score = 另一侧较弱信号`
  197 + - `knn_score = primary_knn_score + knn_tie_breaker * support_knn_score`
  198 +
  199 +当前默认配置在 [config.yaml](/data/saas-search/config/config.yaml) 中是:
  200 +
  201 +- `knn_text_weight = 1.0`
  202 +- `knn_image_weight = 1.0`
  203 +- `knn_tie_breaker = 0.1`
  204 +
  205 +也就是说:
  206 +
  207 +- 现在确实是“文本 KNN + 图片 KNN 先融合成一项 `knn_score`”
  208 +- 但**图片权重目前并没有略高于文本权重**
  209 +- 当前两路权重是相等的,只是通过 dismax 机制保留“主路 + 辅助路”
  210 +
  211 +如果业务上希望 image 语义更主导,可以把 `knn_image_weight` 调成略高于 `knn_text_weight`,例如 `1.1 ~ 1.3` 这一类小幅领先值,再观察 query 分布与 bad case。
  212 +
  213 +### 8.3 各阶段融合公式
184 214
185 ```python 215 ```python
186 -fused_score = (  
187 - (rerank_score + 0.00001) *  
188 - (text_score + 0.1) ** 0.35 *  
189 - (knn_score + 0.6) ** 0.2 216 +coarse_score = (
  217 + (es_score + es_bias) ** es_exponent
  218 + * (text_score + text_bias) ** text_exponent
  219 + * (knn_score + knn_bias) ** knn_exponent
  220 +)
  221 +
  222 +fine_stage_score = (
  223 + (es_score + es_bias) ** es_exponent
  224 + * (fine_score + fine_bias) ** fine_exponent
  225 + * (text_score + text_bias) ** text_exponent
  226 + * (knn_score + knn_bias) ** knn_exponent
  227 + * style_boost
  228 +)
  229 +
  230 +final_score = (
  231 + (es_score + es_bias) ** es_exponent
  232 + * (rerank_score + rerank_bias) ** rerank_exponent
  233 + * (fine_score + fine_bias) ** fine_exponent # 仅当 fine rank 打开且有分数时参与
  234 + * (text_score + text_bias) ** text_exponent
  235 + * (knn_score + knn_bias) ** knn_exponent
  236 + * style_boost
190 ) 237 )
191 ``` 238 ```
192 239
193 -设计意图: 240 +当前默认配置下:
  241 +
  242 +- `coarse`: `es_exponent=0.05`, `text_exponent=0.35`, `knn_exponent=0.2`
  243 +- `fine/final`: `es_exponent=0.05`, `text_exponent=0.25`, `knn_exponent=0.2`
  244 +- `final`: 额外有 `rerank_exponent=1.15`
  245 +
  246 +设计意图可以概括成:
194 247
195 -- `rerank_score` 是主导信号  
196 -- `text_score` 保留乘法增益,但通过较低指数避免词法高分过度放大  
197 -- `knn_score` 保持弱参与,只作为语义召回补充 248 +- `es_score` 不再只做 debug,而是作为全阶段都保留的弱先验
  249 +- `text_score` 是稳定主干信号
  250 +- `knn_score` 是统一的语义信号入口
  251 +- `fine_score` / `rerank_score` 是越往后越贵、越强的模型因子
  252 +- `style_boost` 只在命中已选 SKU 时乘上去
198 253
199 -### 8.3 调试字段 254 +### 8.4 调试字段
200 255
201 开启 `debug=true` 后,`debug_info.per_result` 会暴露: 256 开启 `debug=true` 后,`debug_info.per_result` 会暴露:
202 257
203 - `es_score` 258 - `es_score`
  259 +- `es_factor`
204 - `rerank_score` 260 - `rerank_score`
205 - `text_score` 261 - `text_score`
206 - `text_source_score` 262 - `text_source_score`
@@ -261,10 +317,10 @@ sleep 3 @@ -261,10 +317,10 @@ sleep 3
261 317
262 1. Query 解析 318 1. Query 解析
263 2. ES 召回 319 2. ES 召回
264 -3. 粗排:只用 ES 内部文本/KNN 信号 320 +3. 粗排:ES 原始总分 + 文本大分 + 统一 KNN 大分
265 4. 款式 SKU 选择 + title suffix 321 4. 款式 SKU 选择 + title suffix
266 -5. 精排:轻量 reranker + 文本/KNN 融合  
267 -6. 最终 rerank:重 reranker + fine score + 文本/KNN 融合 322 +5. 精排:轻量 reranker + ES/text/KNN 融合
  323 +6. 最终 rerank:重 reranker + fine score + ES/text/KNN 融合
268 7. 分页、补全字段、格式化返回 324 7. 分页、补全字段、格式化返回
269 325
270 主控代码在 [searcher.py](/data/saas-search/search/searcher.py),打分与 rerank 细节在 [rerank_client.py](/data/saas-search/search/rerank_client.py),配置定义在 [schema.py](/data/saas-search/config/schema.py) 和 [config.yaml](/data/saas-search/config/config.yaml)。 326 主控代码在 [searcher.py](/data/saas-search/search/searcher.py),打分与 rerank 细节在 [rerank_client.py](/data/saas-search/search/rerank_client.py),配置定义在 [schema.py](/data/saas-search/config/schema.py) 和 [config.yaml](/data/saas-search/config/config.yaml)。
@@ -339,7 +395,8 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde @@ -339,7 +395,8 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde
339 **Step 4:粗排** 395 **Step 4:粗排**
340 粗排入口在 [searcher.py:638](/data/saas-search/search/searcher.py#L638),真正的打分在 [rerank_client.py:348](/data/saas-search/search/rerank_client.py#L348) 的 `coarse_resort_hits()`。 396 粗排入口在 [searcher.py:638](/data/saas-search/search/searcher.py#L638),真正的打分在 [rerank_client.py:348](/data/saas-search/search/rerank_client.py#L348) 的 `coarse_resort_hits()`。
341 397
342 -粗排只看两类信号: 398 +粗排现在看三类信号:
  399 +- `es_score`
343 - `text_score` 400 - `text_score`
344 - `knn_score` 401 - `knn_score`
345 402
@@ -362,9 +419,13 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde @@ -362,9 +419,13 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde
362 - 分别乘自己的 weight 419 - 分别乘自己的 weight
363 - 取强的一路做主路 420 - 取强的一路做主路
364 - 弱的一路按 `knn_tie_breaker` 做辅助 421 - 弱的一路按 `knn_tie_breaker` 做辅助
  422 +- 产出一个统一的 `knn_score`
365 423
366 -然后粗排融合公式在 [rerank_client.py:334](/data/saas-search/search/rerank_client.py#L334):  
367 -- `coarse_score = (text_score + text_bias)^text_exponent * (knn_score + knn_bias)^knn_exponent` 424 +然后粗排融合公式在 [rerank_client.py:346](/data/saas-search/search/rerank_client.py#L346):
  425 +- `coarse_score = es_factor * text_factor * knn_factor`
  426 +- `es_factor = (es_score + es_bias)^es_exponent`
  427 +- `text_factor = (text_score + text_bias)^text_exponent`
  428 +- `knn_factor = (knn_score + knn_bias)^knn_exponent`
368 429
369 配置定义在 [schema.py:124](/data/saas-search/config/schema.py#L124) 和 [config.yaml:231](/data/saas-search/config/config.yaml#L231)。 430 配置定义在 [schema.py:124](/data/saas-search/config/schema.py#L124) 和 [config.yaml:231](/data/saas-search/config/config.yaml#L231)。
370 431
@@ -398,9 +459,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde @@ -398,9 +459,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde
398 3. 不再只按 `fine_score` 排,而是按融合后的 `_fine_fused_score` 排 459 3. 不再只按 `fine_score` 排,而是按融合后的 `_fine_fused_score` 排
399 460
400 精排融合公式现在是: 461 精排融合公式现在是:
401 -- `fine_stage_score = fine_factor * text_factor * knn_factor * style_boost` 462 +- `fine_stage_score = es_factor * fine_factor * text_factor * knn_factor * style_boost`
402 463
403 具体公共计算在 [rerank_client.py:286](/data/saas-search/search/rerank_client.py#L286) 的 `_compute_multiplicative_fusion()`: 464 具体公共计算在 [rerank_client.py:286](/data/saas-search/search/rerank_client.py#L286) 的 `_compute_multiplicative_fusion()`:
  465 +- `es_factor = (es_score + es_bias)^es_exponent`
404 - `fine_factor = (fine_score + fine_bias)^fine_exponent` 466 - `fine_factor = (fine_score + fine_bias)^fine_exponent`
405 - `text_factor = (text_score + text_bias)^text_exponent` 467 - `text_factor = (text_score + text_bias)^text_exponent`
406 - `knn_factor = (knn_score + knn_bias)^knn_exponent` 468 - `knn_factor = (knn_score + knn_bias)^knn_exponent`
@@ -423,9 +485,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde @@ -423,9 +485,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde
423 它和 fine rank 很像,但多了一个更重的模型分 `rerank_score`。 485 它和 fine rank 很像,但多了一个更重的模型分 `rerank_score`。
424 最终公式是: 486 最终公式是:
425 487
426 -- `final_score = rerank_factor * fine_factor * text_factor * knn_factor * style_boost` 488 +- `final_score = es_factor * rerank_factor * fine_factor * text_factor * knn_factor * style_boost`
427 489
428 也就是: 490 也就是:
  491 +- ES 原始总分也会继续保留到最终阶段
429 - fine rank 产生的 `fine_score` 不会丢 492 - fine rank 产生的 `fine_score` 不会丢
430 - 到最终 rerank 时,它会继续作为一个乘法项参与最终融合 493 - 到最终 rerank 时,它会继续作为一个乘法项参与最终融合
431 494
@@ -468,9 +531,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde @@ -468,9 +531,10 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde
468 - `final_page` 531 - `final_page`
469 532
470 其中: 533 其中:
471 -- coarse stage 主要保留 text/translation/knn 的拆分信号 534 +- coarse stage 保留 es/text/translation/knn 的拆分信号
472 - fine/rerank stage 现在都保留 `fusion_inputs`、`fusion_factors`、`fusion_summary` 535 - fine/rerank stage 现在都保留 `fusion_inputs`、`fusion_factors`、`fusion_summary`
473 - `fusion_summary` 来自真实计算过程本身,见 [rerank_client.py:265](/data/saas-search/search/rerank_client.py#L265) 536 - `fusion_summary` 来自真实计算过程本身,见 [rerank_client.py:265](/data/saas-search/search/rerank_client.py#L265)
  537 +- 当 `fine_rank` 关闭时,`rerank.rank_change` 会继承 `coarse_rank` 作为上游阶段,不会错误地全部显示为 0
474 538
475 这点很重要,因为现在“实际排序逻辑”和“debug 展示逻辑”是同源的,不是两套各写一份。 539 这点很重要,因为现在“实际排序逻辑”和“debug 展示逻辑”是同源的,不是两套各写一份。
476 540
@@ -486,6 +550,238 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde @@ -486,6 +550,238 @@ KNN 部分在 [es_query_builder.py:250](/data/saas-search/search/es_query_builde
486 550
487 如果你愿意,我下一步可以继续按“一个具体 query 的真实流转样例”来讲,比如假设用户搜 `black dress`,我把它从 `parsed_query`、ES named queries、coarse/fine/final 的每个分数怎么出来,完整手推一遍。 551 如果你愿意,我下一步可以继续按“一个具体 query 的真实流转样例”来讲,比如假设用户搜 `black dress`,我把它从 `parsed_query`、ES named queries、coarse/fine/final 的每个分数怎么出来,完整手推一遍。
488 552
  553 +## 12. 值得优先探索的相关性实验方向
  554 +
  555 +下面这些方向按我对当前 rank 体系的判断,优先级大致是“先做低风险高收益,再做结构性升级”。
  556 +
  557 +### 12.1 Query 分桶,而不是所有 query 共用一套融合参数
  558 +
  559 +当前问题:
  560 +
  561 +- 所有 query 基本共用同一套 exponent / bias
  562 +- 但“强词法 query”、“泛类目 query”、“风格词 query”、“图搜触发 query”、“中英混输 query”的最优信号配比通常不同
  563 +
  564 +建议实验:
  565 +
  566 +- 先做轻量 query 分桶:
  567 + - 精准实体词
  568 + - 泛类目词
  569 + - 风格/属性词
  570 + - 中英混输
  571 + - 带强图片语义的 query
  572 +- 每个桶单独调:
  573 + - `text_translation_weight`
  574 + - `knn_text_weight / knn_image_weight`
  575 + - `es_exponent / text_exponent / knn_exponent`
  576 +
  577 +为什么值得先做:
  578 +
  579 +- 不改主架构
  580 +- 容易上线灰度
  581 +- 往往比“全局调一个 exponent”稳定得多
  582 +
  583 +### 12.2 把 image KNN 设成略高于 text KNN,但只在合适 query 上生效
  584 +
  585 +当前问题:
  586 +
  587 +- 现在 `knn_text_weight = 1.0`,`knn_image_weight = 1.0`
  588 +- 对鞋、服饰款式、图案、轮廓类 query,image embedding 往往比 text embedding 更接近用户真实意图
  589 +- 但不是所有 query 都适合直接全局抬高 image 权重
  590 +
  591 +建议实验:
  592 +
  593 +- 离线先试:
  594 + - `knn_image_weight = 1.1 / 1.2 / 1.3`
  595 + - `knn_text_weight = 1.0`
  596 +- 再进一步试 query gating:
  597 + - 若 query 命中款式词、形状词、鞋包词、图案词,则抬高 image weight
  598 + - 若 query 是明确品类词或强属性词,则维持中性
  599 +
  600 +为什么我不建议一上来全局大幅抬高:
  601 +
  602 +- 会把一些“文本很明确,但图像泛相似”的结果抬上来
  603 +- 容易让高视觉相似、低语义准确的商品误冲前排
  604 +
  605 +### 12.3 不只融合“分数”,还要融合“排名证据”
  606 +
  607 +当前问题:
  608 +
  609 +- 现在所有阶段都高度依赖 score 级别的乘法融合
  610 +- 不同信号源的 score 标度未必天然可比
  611 +- reranker 分数、ES score、named query score、KNN score 的数值空间差异很大
  612 +
  613 +建议实验:
  614 +
  615 +- 增加 rank-based 特征:
  616 + - `es_rank`
  617 + - `text_rank`
  618 + - `knn_rank`
  619 + - `rerank_rank`
  620 +- 试两类简单方法:
  621 + - RRF(Reciprocal Rank Fusion)
  622 + - score-rank 混合:先做 rank 融合,再乘少量 score 因子
  623 +
  624 +为什么值得做:
  625 +
  626 +- 对异常 score 分布更稳
  627 +- 对模型偶发极端分更鲁棒
  628 +- 很适合拿来做基线对照
  629 +
  630 +### 12.4 将 `base_query` 和 `translation_query` 从“单点 max”升级为“更完整的 lexical 证据”
  631 +
  632 +当前问题:
  633 +
  634 +- 文本大分现在只抓:
  635 + - `base_query`
  636 + - `max(base_query_trans_*)`
  637 +- 这很干净,但可能过于压缩文本证据
  638 +- phrase 命中、best_fields 命中、多语言字段命中、字段质量差异,没有更细粒度地进入后续 rank
  639 +
  640 +建议实验:
  641 +
  642 +- 把 lexical 证据拆得更细:
  643 + - exact / phrase
  644 + - best_fields
  645 + - title 命中
  646 + - category 命中
  647 + - brand/vendor 命中
  648 +- 后续不一定都入主公式,但可以先做 debug / feature log
  649 +
  650 +这样做的收益:
  651 +
  652 +- 更容易解释“为什么这条词法上明明更准却没排上来”
  653 +- 为后续 learning-to-rank 或规则门控准备特征
  654 +
  655 +### 12.5 增加“类目先验”和“商品类型约束”
  656 +
  657 +当前问题:
  658 +
  659 +- 现在体系更偏“文本/向量相似度驱动”
  660 +- 对“牛仔裤 vs 连裤袜”这种 bad case,问题常常不只是分数融合,而是**商品类型约束太弱**
  661 +
  662 +建议实验:
  663 +
  664 +- query 侧先做轻量商品类型识别:
  665 + - 裙子
  666 + - 裤子
  667 + - 上衣
  668 + - 鞋
  669 +- doc 侧取:
  670 + - category_path
  671 + - taxonomy leaf
  672 + - 类目 embedding / one-hot
  673 +- 然后试:
  674 + - 作为 hard filter 候选约束
  675 + - 作为 coarse/final 的 boost 因子
  676 + - 作为 rerank 输入字段增强
  677 +
  678 +这是我认为对明显 bad case 最有价值的一类结构性修复。
  679 +
  680 +### 12.6 把“负证据”纳入体系,而不只是累加正证据
  681 +
  682 +当前问题:
  683 +
  684 +- 当前乘法体系主要是在积累正向因子
  685 +- 但很多错误结果不是“正向不够强”,而是“存在明显负证据”
  686 +- 例如 query 是“半身裙”,doc 却强命中“上衣”“打底衫”“连裤袜”
  687 +
  688 +建议实验:
  689 +
  690 +- 抽取轻量负词特征:
  691 + - 商品类型冲突词
  692 + - 性别/人群冲突词
  693 + - 长度/版型冲突词
  694 +- 方式可以先很简单:
  695 + - penalty factor
  696 + - blacklist term penalty
  697 + - query-doc type mismatch penalty
  698 +
  699 +这是当前体系里非常缺的一块。
  700 +
  701 +### 12.7 把 KNN 从“单一总分”升级为“多语义子通道”
  702 +
  703 +当前问题:
  704 +
  705 +- 现在 KNN 最终会被压成一个 `knn_score`
  706 +- 这对工程简单很好,但损失了“这条向量信号到底为什么相似”的信息
  707 +
  708 +建议实验:
  709 +
  710 +- 分通道记录和使用:
  711 + - text semantic similarity
  712 + - image appearance similarity
  713 + - category-aware similarity
  714 + - style-aware similarity
  715 +- 即使最终仍合成一个总分,也建议先保留分通道特征
  716 +
  717 +这样未来才能回答:
  718 +
  719 +- 这条结果是“外观像”
  720 +- 还是“描述语义像”
  721 +- 还是“类目像但款式不对”
  722 +
  723 +### 12.8 从纯手工公式,逐步过渡到轻量 LTR
  724 +
  725 +当前问题:
  726 +
  727 +- 目前公式已经比较清晰,但本质还是手工 feature engineering + 手工 exponent
  728 +- 一旦信号变多,靠手调很难长期维护
  729 +
  730 +建议实验:
  731 +
  732 +- 先不引入复杂在线模型
  733 +- 先做离线 LTR baseline:
  734 + - LambdaMART / XGBoost ranker
  735 + - 输入现成特征:
  736 + - es_score
  737 + - text_score
  738 + - text_source_score
  739 + - translation_score
  740 + - text_knn_score
  741 + - image_knn_score
  742 + - coarse_rank
  743 + - rerank_score
  744 + - category match
  745 + - style intent match
  746 +
  747 +为什么这一步值得准备:
  748 +
  749 +- 你们现在的 debug 字段已经很接近 feature log 了
  750 +- 其实已经具备往 LTR 过渡的土壤
  751 +
  752 +### 12.9 先把评估体系补齐,再谈大改
  753 +
  754 +当前问题:
  755 +
  756 +- 很多相关性讨论容易停留在个例
  757 +- 但融合改动经常存在 query 分布层面的 tradeoff
  758 +
  759 +建议实验配套:
  760 +
  761 +- 建立 query slice 指标:
  762 + - 鞋靴
  763 + - 裙装
  764 + - 裤装
  765 + - 中英混输
  766 + - 图像语义强 query
  767 + - 属性词强 query
  768 +- 每次实验至少看:
  769 + - overall
  770 + - top 1
  771 + - top 3
  772 + - slice breakdown
  773 + - bad case 回归集
  774 +
  775 +### 12.10 我对当前体系的几个核心判断
  776 +
  777 +1. 当前体系最大的优点不是公式本身,而是已经把信号拆成了可解释的层级,这非常适合继续做实验。
  778 +2. 当前体系最大的短板不是“knn exponent 还不够准”,而是缺少 query 分桶、类目先验和负证据。
  779 +3. 只调融合公式还能继续拿到一部分收益,但中期最值得投入的是:
  780 + - query-aware 参数
  781 + - 类型/类目约束
  782 + - score + rank 混合融合
  783 + - 为 LTR 做特征沉淀
  784 +
489 785
490 786
491 ## reranker方面: 787 ## reranker方面:
scripts/evaluation/eval_framework/framework.py
@@ -272,6 +272,40 @@ class SearchEvaluationFramework: @@ -272,6 +272,40 @@ class SearchEvaluationFramework:
272 ranked.sort(key=lambda item: item["score"], reverse=True) 272 ranked.sort(key=lambda item: item["score"], reverse=True)
273 return ranked 273 return ranked
274 274
  275 + def _assign_fixed_rerank_scores(
  276 + self,
  277 + query: str,
  278 + spu_ids: Sequence[str],
  279 + *,
  280 + score: float,
  281 + force_refresh: bool = False,
  282 + ) -> Dict[str, float]:
  283 + """Persist a fixed rerank score for a deduplicated ``spu_id`` list."""
  284 + normalized_ids: List[str] = []
  285 + seen: set[str] = set()
  286 + for spu_id in spu_ids:
  287 + sid = str(spu_id or "").strip()
  288 + if not sid or sid in seen:
  289 + continue
  290 + seen.add(sid)
  291 + normalized_ids.append(sid)
  292 + if not normalized_ids:
  293 + return {}
  294 +
  295 + cached = {} if force_refresh else self.store.get_rerank_scores(self.tenant_id, query)
  296 + to_store: Dict[str, float] = {}
  297 + for sid in normalized_ids:
  298 + if force_refresh or sid not in cached or float(cached[sid]) != float(score):
  299 + to_store[sid] = float(score)
  300 + if to_store:
  301 + self.store.upsert_rerank_scores(
  302 + self.tenant_id,
  303 + query,
  304 + to_store,
  305 + model_name="search_recall_pool_fixed",
  306 + )
  307 + return {sid: float(score) for sid in normalized_ids}
  308 +
275 def _rerank_batch_with_retry(self, query: str, docs: Sequence[Dict[str, Any]]) -> List[float]: 309 def _rerank_batch_with_retry(self, query: str, docs: Sequence[Dict[str, Any]]) -> List[float]:
276 if not docs: 310 if not docs:
277 return [] 311 return []
@@ -631,12 +665,25 @@ class SearchEvaluationFramework: @@ -631,12 +665,25 @@ class SearchEvaluationFramework:
631 search_size = max(int(search_depth), int(search_recall_top_k)) 665 search_size = max(int(search_depth), int(search_recall_top_k))
632 search_payload = self.search_client.search(query=query, size=search_size, from_=0, language=language) 666 search_payload = self.search_client.search(query=query, size=search_size, from_=0, language=language)
633 search_results = list(search_payload.get("results") or []) 667 search_results = list(search_payload.get("results") or [])
634 - recall_n = min(int(search_recall_top_k), len(search_results))  
635 - pool_search_docs = search_results[:recall_n]  
636 - pool_spu_ids = {str(d.get("spu_id")) for d in pool_search_docs if str(d.get("spu_id") or "").strip()} 668 + search_result_spu_ids = [str(doc.get("spu_id") or "").strip() for doc in search_results]
  669 + recall_spu_ids: List[str] = []
  670 + seen_recall_spu_ids: set[str] = set()
  671 + for spu_id in search_result_spu_ids[: int(search_recall_top_k)]:
  672 + if not spu_id or spu_id in seen_recall_spu_ids:
  673 + continue
  674 + seen_recall_spu_ids.add(spu_id)
  675 + recall_spu_ids.append(spu_id)
  676 + recall_n = len(recall_spu_ids)
  677 + pool_spu_ids = set(recall_spu_ids)
637 678
638 corpus = self.corpus_docs(refresh=False) 679 corpus = self.corpus_docs(refresh=False)
639 corpus_by_id = {str(d.get("spu_id")): d for d in corpus if str(d.get("spu_id") or "").strip()} 680 corpus_by_id = {str(d.get("spu_id")): d for d in corpus if str(d.get("spu_id") or "").strip()}
  681 + self._assign_fixed_rerank_scores(
  682 + query=query,
  683 + spu_ids=recall_spu_ids,
  684 + score=1.0,
  685 + force_refresh=force_refresh_rerank,
  686 + )
640 687
641 rerank_pending_n = sum( 688 rerank_pending_n = sum(
642 1 689 1
@@ -697,12 +744,13 @@ class SearchEvaluationFramework: @@ -697,12 +744,13 @@ class SearchEvaluationFramework:
697 else: 744 else:
698 ordered_docs: List[Dict[str, Any]] = [] 745 ordered_docs: List[Dict[str, Any]] = []
699 seen_ordered: set[str] = set() 746 seen_ordered: set[str] = set()
700 - for doc in pool_search_docs:  
701 - sid = str(doc.get("spu_id") or "") 747 + for sid in recall_spu_ids:
702 if not sid or sid in seen_ordered: 748 if not sid or sid in seen_ordered:
703 continue 749 continue
704 seen_ordered.add(sid) 750 seen_ordered.add(sid)
705 - ordered_docs.append(corpus_by_id.get(sid, doc)) 751 + doc = corpus_by_id.get(sid)
  752 + if doc is not None:
  753 + ordered_docs.append(doc)
706 for item in ranked_outside: 754 for item in ranked_outside:
707 sid = str(item["spu_id"]) 755 sid = str(item["spu_id"])
708 if sid in seen_ordered: 756 if sid in seen_ordered:
@@ -730,9 +778,10 @@ class SearchEvaluationFramework: @@ -730,9 +778,10 @@ class SearchEvaluationFramework:
730 778
731 rerank_depth_effective = min(int(rerank_depth), len(ranked_outside)) 779 rerank_depth_effective = min(int(rerank_depth), len(ranked_outside))
732 search_labeled_results: List[Dict[str, Any]] = [] 780 search_labeled_results: List[Dict[str, Any]] = []
733 - for rank, doc in enumerate(search_results, start=1):  
734 - spu_id = str(doc.get("spu_id"))  
735 - in_pool = rank <= recall_n 781 + for rank, search_doc in enumerate(search_results, start=1):
  782 + spu_id = str(search_doc.get("spu_id") or "")
  783 + doc = corpus_by_id.get(spu_id, search_doc)
  784 + in_pool = spu_id in pool_spu_ids
736 search_labeled_results.append( 785 search_labeled_results.append(
737 { 786 {
738 "rank": rank, 787 "rank": rank,
@@ -998,4 +1047,3 @@ class SearchEvaluationFramework: @@ -998,4 +1047,3 @@ class SearchEvaluationFramework:
998 output_json_path, 1047 output_json_path,
999 ) 1048 )
1000 return payload 1049 return payload
1001 -  
search/rerank_client.py
@@ -252,17 +252,18 @@ def _build_hit_signal_bundle( @@ -252,17 +252,18 @@ def _build_hit_signal_bundle(
252 hit: Dict[str, Any], 252 hit: Dict[str, Any],
253 fusion: CoarseRankFusionConfig | RerankFusionConfig, 253 fusion: CoarseRankFusionConfig | RerankFusionConfig,
254 ) -> Dict[str, Any]: 254 ) -> Dict[str, Any]:
255 - es_score = _to_score(hit.get("_score")) 255 + raw_es_score = _to_score(hit.get("_raw_es_score", hit.get("_original_score", hit.get("_score"))))
  256 + hit["_raw_es_score"] = raw_es_score
256 matched_queries = hit.get("matched_queries") 257 matched_queries = hit.get("matched_queries")
257 text_components = _collect_text_score_components( 258 text_components = _collect_text_score_components(
258 matched_queries, 259 matched_queries,
259 - es_score, 260 + raw_es_score,
260 translation_weight=fusion.text_translation_weight, 261 translation_weight=fusion.text_translation_weight,
261 ) 262 )
262 knn_components = _collect_knn_score_components(matched_queries, fusion) 263 knn_components = _collect_knn_score_components(matched_queries, fusion)
263 return { 264 return {
264 "doc_id": hit.get("_id"), 265 "doc_id": hit.get("_id"),
265 - "es_score": es_score, 266 + "es_score": raw_es_score,
266 "matched_queries": matched_queries, 267 "matched_queries": matched_queries,
267 "text_components": text_components, 268 "text_components": text_components,
268 "knn_components": knn_components, 269 "knn_components": knn_components,
@@ -294,6 +295,7 @@ def _build_formula_summary( @@ -294,6 +295,7 @@ def _build_formula_summary(
294 295
295 def _compute_multiplicative_fusion( 296 def _compute_multiplicative_fusion(
296 *, 297 *,
  298 + es_score: float,
297 text_score: float, 299 text_score: float,
298 knn_score: float, 300 knn_score: float,
299 fusion: RerankFusionConfig, 301 fusion: RerankFusionConfig,
@@ -317,6 +319,7 @@ def _compute_multiplicative_fusion( @@ -317,6 +319,7 @@ def _compute_multiplicative_fusion(
317 } 319 }
318 ) 320 )
319 321
  322 + _add_term("es_score", es_score, fusion.es_bias, fusion.es_exponent)
320 _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) 323 _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent)
321 _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) 324 _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent)
322 _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) 325 _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent)
@@ -341,13 +344,15 @@ def _compute_multiplicative_fusion( @@ -341,13 +344,15 @@ def _compute_multiplicative_fusion(
341 344
342 345
343 def _multiply_coarse_fusion_factors( 346 def _multiply_coarse_fusion_factors(
  347 + es_score: float,
344 text_score: float, 348 text_score: float,
345 knn_score: float, 349 knn_score: float,
346 fusion: CoarseRankFusionConfig, 350 fusion: CoarseRankFusionConfig,
347 -) -> Tuple[float, float, float]: 351 +) -> Tuple[float, float, float, float]:
  352 + es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent
348 text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent 353 text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
349 knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent 354 knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
350 - return text_factor, knn_factor, text_factor * knn_factor 355 + return es_factor, text_factor, knn_factor, es_factor * text_factor * knn_factor
351 356
352 357
353 def _has_selected_sku(hit: Dict[str, Any]) -> bool: 358 def _has_selected_sku(hit: Dict[str, Any]) -> bool:
@@ -359,7 +364,7 @@ def coarse_resort_hits( @@ -359,7 +364,7 @@ def coarse_resort_hits(
359 fusion: Optional[CoarseRankFusionConfig] = None, 364 fusion: Optional[CoarseRankFusionConfig] = None,
360 debug: bool = False, 365 debug: bool = False,
361 ) -> List[Dict[str, Any]]: 366 ) -> List[Dict[str, Any]]:
362 - """Coarse rank with text/knn fusion only.""" 367 + """Coarse rank with es/text/knn multiplicative fusion."""
363 if not es_hits: 368 if not es_hits:
364 return [] 369 return []
365 370
@@ -373,7 +378,8 @@ def coarse_resort_hits( @@ -373,7 +378,8 @@ def coarse_resort_hits(
373 knn_components = signal_bundle["knn_components"] 378 knn_components = signal_bundle["knn_components"]
374 text_score = signal_bundle["text_score"] 379 text_score = signal_bundle["text_score"]
375 knn_score = signal_bundle["knn_score"] 380 knn_score = signal_bundle["knn_score"]
376 - text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors( 381 + es_factor, text_factor, knn_factor, coarse_score = _multiply_coarse_fusion_factors(
  382 + es_score=es_score,
377 text_score=text_score, 383 text_score=text_score,
378 knn_score=knn_score, 384 knn_score=knn_score,
379 fusion=f, 385 fusion=f,
@@ -409,6 +415,7 @@ def coarse_resort_hits( @@ -409,6 +415,7 @@ def coarse_resort_hits(
409 "knn_primary_score": knn_components["primary_knn_score"], 415 "knn_primary_score": knn_components["primary_knn_score"],
410 "knn_support_score": knn_components["support_knn_score"], 416 "knn_support_score": knn_components["support_knn_score"],
411 "knn_score": knn_score, 417 "knn_score": knn_score,
  418 + "coarse_es_factor": es_factor,
412 "coarse_text_factor": text_factor, 419 "coarse_text_factor": text_factor,
413 "coarse_knn_factor": knn_factor, 420 "coarse_knn_factor": knn_factor,
414 "coarse_score": coarse_score, 421 "coarse_score": coarse_score,
@@ -435,13 +442,19 @@ def fuse_scores_and_resort( @@ -435,13 +442,19 @@ def fuse_scores_and_resort(
435 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 442 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。
436 443
437 融合形式(由 ``fusion`` 配置 bias / exponent):: 444 融合形式(由 ``fusion`` 配置 bias / exponent)::
438 - fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k * sku_boost 445 + fused = (max(es,0)+b_es)^e_es
  446 + * (max(rerank,0)+b_r)^e_r
  447 + * (max(fine,0)+b_f)^e_f
  448 + * (max(text,0)+b_t)^e_t
  449 + * (max(knn,0)+b_k)^e_k
  450 + * sku_boost
439 451
440 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过 452 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过
441 ``query.style_intent.selected_sku_boost`` 配置。 453 ``query.style_intent.selected_sku_boost`` 配置。
442 454
443 对每条 hit 会写入: 455 对每条 hit 会写入:
444 - _original_score: 原始 ES 分数 456 - _original_score: 原始 ES 分数
  457 + - _raw_es_score: ES 原始总分(后续阶段始终复用,不依赖可能被改写的 `_score`)
445 - _rerank_score: 重排服务返回的分数 458 - _rerank_score: 重排服务返回的分数
446 - _fused_score: 融合分数 459 - _fused_score: 融合分数
447 - _text_score: 文本相关性分数(优先取 named queries 的 base_query 分数) 460 - _text_score: 文本相关性分数(优先取 named queries 的 base_query 分数)
@@ -475,6 +488,7 @@ def fuse_scores_and_resort( @@ -475,6 +488,7 @@ def fuse_scores_and_resort(
475 sku_selected = _has_selected_sku(hit) 488 sku_selected = _has_selected_sku(hit)
476 style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 489 style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
477 fusion_result = _compute_multiplicative_fusion( 490 fusion_result = _compute_multiplicative_fusion(
  491 + es_score=signal_bundle["es_score"],
478 rerank_score=rerank_score, 492 rerank_score=rerank_score,
479 fine_score=fine_score, 493 fine_score=fine_score,
480 text_score=text_score, 494 text_score=text_score,
@@ -526,6 +540,7 @@ def fuse_scores_and_resort( @@ -526,6 +540,7 @@ def fuse_scores_and_resort(
526 ), 540 ),
527 "rerank_factor": fusion_result["factors"].get("rerank_score"), 541 "rerank_factor": fusion_result["factors"].get("rerank_score"),
528 "fine_factor": fusion_result["factors"].get("fine_score"), 542 "fine_factor": fusion_result["factors"].get("fine_score"),
  543 + "es_factor": fusion_result["factors"].get("es_score"),
529 "text_factor": fusion_result["factors"].get("text_score"), 544 "text_factor": fusion_result["factors"].get("text_score"),
530 "knn_factor": fusion_result["factors"].get("knn_score"), 545 "knn_factor": fusion_result["factors"].get("knn_score"),
531 "style_intent_selected_sku": sku_selected, 546 "style_intent_selected_sku": sku_selected,
@@ -654,6 +669,7 @@ def run_lightweight_rerank( @@ -654,6 +669,7 @@ def run_lightweight_rerank(
654 sku_selected = _has_selected_sku(hit) 669 sku_selected = _has_selected_sku(hit)
655 style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 670 style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
656 fusion_result = _compute_multiplicative_fusion( 671 fusion_result = _compute_multiplicative_fusion(
  672 + es_score=signal_bundle["es_score"],
657 fine_score=fine_score, 673 fine_score=fine_score,
658 text_score=text_score, 674 text_score=text_score,
659 knn_score=knn_score, 675 knn_score=knn_score,
@@ -679,7 +695,9 @@ def run_lightweight_rerank( @@ -679,7 +695,9 @@ def run_lightweight_rerank(
679 "fusion_inputs": fusion_result["inputs"], 695 "fusion_inputs": fusion_result["inputs"],
680 "fusion_factors": fusion_result["factors"], 696 "fusion_factors": fusion_result["factors"],
681 "fusion_summary": fusion_result["summary"], 697 "fusion_summary": fusion_result["summary"],
  698 + "es_score": signal_bundle["es_score"],
682 "fine_factor": fusion_result["factors"].get("fine_score"), 699 "fine_factor": fusion_result["factors"].get("fine_score"),
  700 + "es_factor": fusion_result["factors"].get("es_score"),
683 "text_factor": fusion_result["factors"].get("text_score"), 701 "text_factor": fusion_result["factors"].get("text_score"),
684 "knn_factor": fusion_result["factors"].get("knn_score"), 702 "knn_factor": fusion_result["factors"].get("knn_score"),
685 "style_intent_selected_sku": sku_selected, 703 "style_intent_selected_sku": sku_selected,
search/searcher.py
@@ -994,7 +994,7 @@ class Searcher: @@ -994,7 +994,7 @@ class Searcher:
994 if decision is not None: 994 if decision is not None:
995 style_intent_debug = decision.to_dict() 995 style_intent_debug = decision.to_dict()
996 996
997 - raw_score = hit.get("_score") 997 + raw_score = hit.get("_raw_es_score", hit.get("_original_score", hit.get("_score")))
998 try: 998 try:
999 es_score = float(raw_score) if raw_score is not None else 0.0 999 es_score = float(raw_score) if raw_score is not None else 0.0
1000 except (TypeError, ValueError): 1000 except (TypeError, ValueError):
@@ -1024,6 +1024,7 @@ class Searcher: @@ -1024,6 +1024,7 @@ class Searcher:
1024 1024
1025 if coarse_debug: 1025 if coarse_debug:
1026 debug_entry["coarse_score"] = coarse_debug.get("coarse_score") 1026 debug_entry["coarse_score"] = coarse_debug.get("coarse_score")
  1027 + debug_entry["coarse_es_factor"] = coarse_debug.get("coarse_es_factor")
1027 debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor") 1028 debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor")
1028 debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor") 1029 debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor")
1029 1030
@@ -1033,6 +1034,7 @@ class Searcher: @@ -1033,6 +1034,7 @@ class Searcher:
1033 debug_entry["score"] = rerank_debug.get("score") 1034 debug_entry["score"] = rerank_debug.get("score")
1034 debug_entry["rerank_score"] = rerank_debug.get("rerank_score") 1035 debug_entry["rerank_score"] = rerank_debug.get("rerank_score")
1035 debug_entry["fine_score"] = rerank_debug.get("fine_score") 1036 debug_entry["fine_score"] = rerank_debug.get("fine_score")
  1037 + debug_entry["es_score"] = rerank_debug.get("es_score", es_score)
1036 debug_entry["text_score"] = rerank_debug.get("text_score") 1038 debug_entry["text_score"] = rerank_debug.get("text_score")
1037 debug_entry["knn_score"] = rerank_debug.get("knn_score") 1039 debug_entry["knn_score"] = rerank_debug.get("knn_score")
1038 debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs") 1040 debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs")
@@ -1040,6 +1042,7 @@ class Searcher: @@ -1040,6 +1042,7 @@ class Searcher:
1040 debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary") 1042 debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary")
1041 debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") 1043 debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
1042 debug_entry["fine_factor"] = rerank_debug.get("fine_factor") 1044 debug_entry["fine_factor"] = rerank_debug.get("fine_factor")
  1045 + debug_entry["es_factor"] = rerank_debug.get("es_factor")
1043 debug_entry["text_factor"] = rerank_debug.get("text_factor") 1046 debug_entry["text_factor"] = rerank_debug.get("text_factor")
1044 debug_entry["knn_factor"] = rerank_debug.get("knn_factor") 1047 debug_entry["knn_factor"] = rerank_debug.get("knn_factor")
1045 debug_entry["fused_score"] = rerank_debug.get("fused_score") 1048 debug_entry["fused_score"] = rerank_debug.get("fused_score")
@@ -1049,11 +1052,13 @@ class Searcher: @@ -1049,11 +1052,13 @@ class Searcher:
1049 debug_entry["doc_id"] = fine_debug.get("doc_id") 1052 debug_entry["doc_id"] = fine_debug.get("doc_id")
1050 debug_entry["score"] = fine_debug.get("score") 1053 debug_entry["score"] = fine_debug.get("score")
1051 debug_entry["fine_score"] = fine_debug.get("fine_score") 1054 debug_entry["fine_score"] = fine_debug.get("fine_score")
  1055 + debug_entry["es_score"] = fine_debug.get("es_score", es_score)
1052 debug_entry["text_score"] = fine_debug.get("text_score") 1056 debug_entry["text_score"] = fine_debug.get("text_score")
1053 debug_entry["knn_score"] = fine_debug.get("knn_score") 1057 debug_entry["knn_score"] = fine_debug.get("knn_score")
1054 debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs") 1058 debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs")
1055 debug_entry["fusion_factors"] = fine_debug.get("fusion_factors") 1059 debug_entry["fusion_factors"] = fine_debug.get("fusion_factors")
1056 debug_entry["fusion_summary"] = fine_debug.get("fusion_summary") 1060 debug_entry["fusion_summary"] = fine_debug.get("fusion_summary")
  1061 + debug_entry["es_factor"] = fine_debug.get("es_factor")
1057 debug_entry["rerank_input"] = fine_debug.get("rerank_input") 1062 debug_entry["rerank_input"] = fine_debug.get("rerank_input")
1058 1063
1059 initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None 1064 initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
@@ -1061,6 +1066,14 @@ class Searcher: @@ -1061,6 +1066,14 @@ class Searcher:
1061 fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None 1066 fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
1062 rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None 1067 rerank_rank = rerank_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
1063 final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None 1068 final_rank = final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
  1069 + rerank_previous_rank = fine_rank if fine_rank is not None else coarse_rank
  1070 + final_previous_rank = rerank_rank
  1071 + if final_previous_rank is None:
  1072 + final_previous_rank = fine_rank
  1073 + if final_previous_rank is None:
  1074 + final_previous_rank = coarse_rank
  1075 + if final_previous_rank is None:
  1076 + final_previous_rank = initial_rank
1064 1077
1065 def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: 1078 def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]:
1066 if previous_rank is None or current_rank is None: 1079 if previous_rank is None or current_rank is None:
@@ -1078,8 +1091,10 @@ class Searcher: @@ -1078,8 +1091,10 @@ class Searcher:
1078 "rank": coarse_rank, 1091 "rank": coarse_rank,
1079 "rank_change": _rank_change(initial_rank, coarse_rank), 1092 "rank_change": _rank_change(initial_rank, coarse_rank),
1080 "score": coarse_debug.get("coarse_score") if coarse_debug else None, 1093 "score": coarse_debug.get("coarse_score") if coarse_debug else None,
  1094 + "es_score": coarse_debug.get("es_score") if coarse_debug else es_score,
1081 "text_score": coarse_debug.get("text_score") if coarse_debug else None, 1095 "text_score": coarse_debug.get("text_score") if coarse_debug else None,
1082 "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, 1096 "knn_score": coarse_debug.get("knn_score") if coarse_debug else None,
  1097 + "es_factor": coarse_debug.get("coarse_es_factor") if coarse_debug else None,
1083 "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None, 1098 "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None,
1084 "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None, 1099 "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None,
1085 "signals": coarse_debug, 1100 "signals": coarse_debug,
@@ -1093,8 +1108,10 @@ class Searcher: @@ -1093,8 +1108,10 @@ class Searcher:
1093 else hit.get("_fine_fused_score", hit.get("_fine_score")) 1108 else hit.get("_fine_fused_score", hit.get("_fine_score"))
1094 ), 1109 ),
1095 "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), 1110 "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
  1111 + "es_score": fine_debug.get("es_score") if fine_debug else es_score,
1096 "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), 1112 "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"),
1097 "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), 1113 "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"),
  1114 + "es_factor": fine_debug.get("es_factor") if fine_debug else None,
1098 "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None, 1115 "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None,
1099 "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None, 1116 "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None,
1100 "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None, 1117 "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None,
@@ -1103,8 +1120,9 @@ class Searcher: @@ -1103,8 +1120,9 @@ class Searcher:
1103 }, 1120 },
1104 "rerank": { 1121 "rerank": {
1105 "rank": rerank_rank, 1122 "rank": rerank_rank,
1106 - "rank_change": _rank_change(fine_rank, rerank_rank), 1123 + "rank_change": _rank_change(rerank_previous_rank, rerank_rank),
1107 "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), 1124 "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"),
  1125 + "es_score": rerank_debug.get("es_score") if rerank_debug else es_score,
1108 "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), 1126 "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"),
1109 "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), 1127 "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"),
1110 "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), 1128 "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"),
@@ -1115,13 +1133,14 @@ class Searcher: @@ -1115,13 +1133,14 @@ class Searcher:
1115 "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None, 1133 "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None,
1116 "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None, 1134 "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None,
1117 "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None, 1135 "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None,
  1136 + "es_factor": rerank_debug.get("es_factor") if rerank_debug else None,
1118 "text_factor": rerank_debug.get("text_factor") if rerank_debug else None, 1137 "text_factor": rerank_debug.get("text_factor") if rerank_debug else None,
1119 "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None, 1138 "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None,
1120 "signals": rerank_debug, 1139 "signals": rerank_debug,
1121 }, 1140 },
1122 "final_page": { 1141 "final_page": {
1123 "rank": final_rank, 1142 "rank": final_rank,
1124 - "rank_change": _rank_change(rerank_rank, final_rank), 1143 + "rank_change": _rank_change(final_previous_rank, final_rank),
1125 }, 1144 },
1126 } 1145 }
1127 1146
tests/test_rerank_client.py
@@ -258,3 +258,35 @@ def test_fuse_scores_and_resort_uses_hit_level_fine_score_when_not_passed_separa @@ -258,3 +258,35 @@ def test_fuse_scores_and_resort_uses_hit_level_fine_score_when_not_passed_separa
258 assert isclose(debug[0]["fine_factor"], (0.7 + 0.00001), rel_tol=1e-9) 258 assert isclose(debug[0]["fine_factor"], (0.7 + 0.00001), rel_tol=1e-9)
259 assert debug[0]["fusion_inputs"]["fine_score"] == 0.7 259 assert debug[0]["fusion_inputs"]["fine_score"] == 0.7
260 assert "fine_score=" in debug[0]["fusion_summary"] 260 assert "fine_score=" in debug[0]["fusion_summary"]
  261 +
  262 +
  263 +def test_fuse_scores_and_resort_can_include_raw_es_score_as_factor():
  264 + hits = [
  265 + {
  266 + "_id": "es-strong",
  267 + "_score": 100.0,
  268 + "matched_queries": {"base_query": 1.0, "knn_query": 0.0},
  269 + },
  270 + {
  271 + "_id": "es-weak",
  272 + "_score": 1.0,
  273 + "matched_queries": {"base_query": 1.0, "knn_query": 0.0},
  274 + },
  275 + ]
  276 + fusion = RerankFusionConfig(
  277 + es_bias=0.0,
  278 + es_exponent=1.0,
  279 + rerank_bias=0.0,
  280 + rerank_exponent=1.0,
  281 + text_bias=0.0,
  282 + text_exponent=0.0,
  283 + knn_bias=1.0,
  284 + knn_exponent=0.0,
  285 + )
  286 +
  287 + debug = fuse_scores_and_resort(hits, [1.0, 1.0], fusion=fusion, debug=True)
  288 +
  289 + assert [hit["_id"] for hit in hits] == ["es-strong", "es-weak"]
  290 + assert isclose(hits[0]["_raw_es_score"], 100.0, rel_tol=1e-9)
  291 + assert isclose(debug[0]["es_factor"], 100.0, rel_tol=1e-9)
  292 + assert debug[0]["fusion_inputs"]["es_score"] == 100.0
tests/test_search_rerank_window.py
@@ -10,6 +10,7 @@ import yaml @@ -10,6 +10,7 @@ import yaml
10 10
11 from config import ( 11 from config import (
12 ConfigLoader, 12 ConfigLoader,
  13 + FineRankConfig,
13 FunctionScoreConfig, 14 FunctionScoreConfig,
14 IndexConfig, 15 IndexConfig,
15 QueryConfig, 16 QueryConfig,
@@ -944,3 +945,70 @@ def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeyp @@ -944,3 +945,70 @@ def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeyp
944 assert result.debug_info["per_result"][0]["final_rank"] == 1 945 assert result.debug_info["per_result"][0]["final_rank"] == 1
945 assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0 946 assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0
946 assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0 947 assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0
  948 +
  949 +
  950 +def test_searcher_rerank_rank_change_falls_back_to_coarse_rank_when_fine_disabled(monkeypatch):
  951 + es_client = _FakeESClient(total_hits=5)
  952 + config = _build_search_config(rerank_enabled=True, rerank_window=5)
  953 + config = SearchConfig(
  954 + field_boosts=config.field_boosts,
  955 + indexes=config.indexes,
  956 + query_config=config.query_config,
  957 + function_score=config.function_score,
  958 + coarse_rank=config.coarse_rank,
  959 + fine_rank=FineRankConfig(enabled=False, input_window=5, output_window=5),
  960 + rerank=config.rerank,
  961 + spu_config=config.spu_config,
  962 + es_index_name=config.es_index_name,
  963 + es_settings=config.es_settings,
  964 + )
  965 + searcher = _build_searcher(config, es_client)
  966 + context = create_request_context(reqid="rank-fallback", uid="u-rank-fallback")
  967 +
  968 + monkeypatch.setattr(
  969 + "search.searcher.get_tenant_config_loader",
  970 + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
  971 + )
  972 +
  973 + def _fake_run_rerank(**kwargs):
  974 + hits = kwargs["es_response"]["hits"]["hits"]
  975 + hits.reverse()
  976 + fused_debug = []
  977 + for idx, hit in enumerate(hits):
  978 + hit["_fused_score"] = 100.0 - idx
  979 + hit["_rerank_score"] = 1.0 - 0.1 * idx
  980 + fused_debug.append(
  981 + {
  982 + "doc_id": hit["_id"],
  983 + "score": hit["_fused_score"],
  984 + "es_score": hit.get("_raw_es_score", hit.get("_score")),
  985 + "rerank_score": hit["_rerank_score"],
  986 + "text_score": hit.get("_text_score", hit.get("_score")),
  987 + "knn_score": hit.get("_knn_score", 0.0),
  988 + "es_factor": 1.0,
  989 + "rerank_factor": 1.0,
  990 + "text_factor": 1.0,
  991 + "knn_factor": 1.0,
  992 + "fused_score": hit["_fused_score"],
  993 + }
  994 + )
  995 + return kwargs["es_response"], {"model": "final-reranker"}, fused_debug
  996 +
  997 + monkeypatch.setattr("search.rerank_client.run_rerank", _fake_run_rerank)
  998 +
  999 + result = searcher.search(
  1000 + query="toy",
  1001 + tenant_id="162",
  1002 + from_=0,
  1003 + size=5,
  1004 + context=context,
  1005 + enable_rerank=True,
  1006 + debug=True,
  1007 + )
  1008 +
  1009 + per_result = {row["spu_id"]: row for row in result.debug_info["per_result"]}
  1010 + moved = per_result["4"]["ranking_funnel"]
  1011 + assert moved["fine_rank"]["rank"] is None
  1012 + assert moved["rerank"]["rank"] == 1
  1013 + assert moved["rerank"]["rank_change"] == 4
  1014 + assert moved["final_page"]["rank_change"] == 0