Commit 77ab67adeae5b5b41985c5f24abf93311682d320
1 parent
2260eed2
更新测试用例
Showing
12 changed files
with
24 additions
and
70 deletions
Show diff stats
.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md
| @@ -187,11 +187,8 @@ class RerankEngine: | @@ -187,11 +187,8 @@ class RerankEngine: | ||
| 187 | 用途:复杂的自定义排序逻辑、实时个性化等 | 187 | 用途:复杂的自定义排序逻辑、实时个性化等 |
| 188 | """ | 188 | """ |
| 189 | 189 | ||
| 190 | - def __init__(self, ranking_expression: str, enabled: bool = False): | 190 | + def __init__(self, enabled: bool = False): |
| 191 | self.enabled = enabled | 191 | self.enabled = enabled |
| 192 | - self.ranking_expression = ranking_expression | ||
| 193 | - if enabled: | ||
| 194 | - self.parsed_terms = self._parse_expression(ranking_expression) | ||
| 195 | ``` | 192 | ``` |
| 196 | 193 | ||
| 197 | #### `/home/tw/saas-search/search/__init__.py` | 194 | #### `/home/tw/saas-search/search/__init__.py` |
| @@ -206,14 +203,6 @@ from .rerank_engine import RerankEngine # 原 RankingEngine | @@ -206,14 +203,6 @@ from .rerank_engine import RerankEngine # 原 RankingEngine | ||
| 206 | 203 | ||
| 207 | **修改初始化**(约88行): | 204 | **修改初始化**(约88行): |
| 208 | 205 | ||
| 209 | -```python | ||
| 210 | -# 改为RerankEngine,默认禁用 | ||
| 211 | -self.rerank_engine = RerankEngine( | ||
| 212 | - config.ranking.expression, | ||
| 213 | - enabled=False # 暂时禁用 | ||
| 214 | -) | ||
| 215 | -``` | ||
| 216 | - | ||
| 217 | **修改search方法中的rerank逻辑**(约356-383行): | 206 | **修改search方法中的rerank逻辑**(约356-383行): |
| 218 | 207 | ||
| 219 | ```python | 208 | ```python |
| @@ -295,7 +284,6 @@ class FunctionScoreConfig: | @@ -295,7 +284,6 @@ class FunctionScoreConfig: | ||
| 295 | @dataclass | 284 | @dataclass |
| 296 | class TenantConfig: | 285 | class TenantConfig: |
| 297 | # ... 其他字段 ... | 286 | # ... 其他字段 ... |
| 298 | - ranking: RankingConfig # 保留用于兼容 | ||
| 299 | rerank: RerankConfig # 新增 | 287 | rerank: RerankConfig # 新增 |
| 300 | function_score: FunctionScoreConfig # 新增 | 288 | function_score: FunctionScoreConfig # 新增 |
| 301 | ``` | 289 | ``` |
| @@ -327,11 +315,18 @@ curl -X POST http://localhost:6002/search/ \ | @@ -327,11 +315,18 @@ curl -X POST http://localhost:6002/search/ \ | ||
| 327 | 315 | ||
| 328 | ### 4. 配置迁移 | 316 | ### 4. 配置迁移 |
| 329 | 317 | ||
| 330 | -对于现有的`ranking.expression`配置,建议: | 318 | +```python |
| 319 | +# 改为RerankEngine,默认禁用 | ||
| 320 | +self.rerank_engine = RerankEngine( | ||
| 321 | + enabled=False # 暂时禁用 | ||
| 322 | +) | ||
| 323 | +``` | ||
| 324 | + | ||
| 325 | +@@ | ||
| 326 | +对于现有的排序/打分配置,建议: | ||
| 331 | 327 | ||
| 332 | -- 保留`ranking`配置用于文档说明 | ||
| 333 | -- 新增`rerank.enabled=false`明确禁用状态 | ||
| 334 | -- 新增`function_score`配置用于ES层打分 | 328 | +- 使用 `function_score` 配置 ES 层打分 |
| 329 | +- 使用 `rerank` 配置控制本地/AI 重排行为 | ||
| 335 | 330 | ||
| 336 | ### 5. 后续优化空间 | 331 | ### 5. 后续优化空间 |
| 337 | 332 |
CLAUDE.md
| @@ -588,8 +588,7 @@ python main.py search "query" --tenant-id 1 # Quick search test | @@ -588,8 +588,7 @@ python main.py search "query" --tenant-id 1 # Quick search test | ||
| 588 | 1. **Modifying Search Behavior**: Edit `config/config.yaml` | 588 | 1. **Modifying Search Behavior**: Edit `config/config.yaml` |
| 589 | 2. **Changing Index Structure**: Update `mappings/search_products.json` | 589 | 2. **Changing Index Structure**: Update `mappings/search_products.json` |
| 590 | 3. **Adding New Filters**: Extend `api/models.py` with new Pydantic models | 590 | 3. **Adding New Filters**: Extend `api/models.py` with new Pydantic models |
| 591 | -4. **Updating Ranking**: Modify `ranking.expression` in config | ||
| 592 | -5. **Testing Queries**: Use frontend UI at http://localhost:6003 | 591 | +4. **Testing Queries**: Use frontend UI at http://localhost:6003 |
| 593 | 592 | ||
| 594 | ## Key Implementation Details | 593 | ## Key Implementation Details |
| 595 | 594 |
api/routes/admin.py
| @@ -56,7 +56,6 @@ async def get_configuration(): | @@ -56,7 +56,6 @@ async def get_configuration(): | ||
| 56 | "shared_fields": config.query_config.shared_fields, | 56 | "shared_fields": config.query_config.shared_fields, |
| 57 | "core_multilingual_fields": config.query_config.core_multilingual_fields, | 57 | "core_multilingual_fields": config.query_config.core_multilingual_fields, |
| 58 | "supported_languages": config.query_config.supported_languages, | 58 | "supported_languages": config.query_config.supported_languages, |
| 59 | - "ranking_expression": config.ranking.expression, | ||
| 60 | "spu_enabled": config.spu_config.enabled | 59 | "spu_enabled": config.spu_config.enabled |
| 61 | } | 60 | } |
| 62 | 61 |
config/__init__.py
| @@ -9,7 +9,6 @@ from .config_loader import ( | @@ -9,7 +9,6 @@ from .config_loader import ( | ||
| 9 | QueryConfig, | 9 | QueryConfig, |
| 10 | IndexConfig, | 10 | IndexConfig, |
| 11 | SPUConfig, | 11 | SPUConfig, |
| 12 | - RankingConfig, | ||
| 13 | FunctionScoreConfig, | 12 | FunctionScoreConfig, |
| 14 | RerankConfig, | 13 | RerankConfig, |
| 15 | ConfigLoader, | 14 | ConfigLoader, |
| @@ -38,7 +37,6 @@ __all__ = [ | @@ -38,7 +37,6 @@ __all__ = [ | ||
| 38 | 'QueryConfig', | 37 | 'QueryConfig', |
| 39 | 'IndexConfig', | 38 | 'IndexConfig', |
| 40 | 'SPUConfig', | 39 | 'SPUConfig', |
| 41 | - 'RankingConfig', | ||
| 42 | 'FunctionScoreConfig', | 40 | 'FunctionScoreConfig', |
| 43 | 'RerankConfig', | 41 | 'RerankConfig', |
| 44 | 42 |
config/config.yaml
| @@ -100,11 +100,6 @@ query_config: | @@ -100,11 +100,6 @@ query_config: | ||
| 100 | # KNN boost配置(向量召回的boost值) | 100 | # KNN boost配置(向量召回的boost值) |
| 101 | knn_boost: 0.25 # Lower boost for embedding recall | 101 | knn_boost: 0.25 # Lower boost for embedding recall |
| 102 | 102 | ||
| 103 | -# Ranking Configuration(排序配置) | ||
| 104 | -ranking: | ||
| 105 | - expression: "bm25() + 0.25*text_embedding_relevance()" | ||
| 106 | - description: "BM25 text relevance combined with semantic embedding similarity" | ||
| 107 | - | ||
| 108 | # Function Score配置(ES层打分规则) | 103 | # Function Score配置(ES层打分规则) |
| 109 | function_score: | 104 | function_score: |
| 110 | score_mode: "sum" | 105 | score_mode: "sum" |
config/config_loader.py
| @@ -97,13 +97,6 @@ class FunctionScoreConfig: | @@ -97,13 +97,6 @@ class FunctionScoreConfig: | ||
| 97 | 97 | ||
| 98 | 98 | ||
| 99 | @dataclass | 99 | @dataclass |
| 100 | -class RankingConfig: | ||
| 101 | - """Configuration for ranking expressions.""" | ||
| 102 | - expression: str = "bm25()" | ||
| 103 | - description: str = "Default BM25 ranking" | ||
| 104 | - | ||
| 105 | - | ||
| 106 | -@dataclass | ||
| 107 | class RerankConfig: | 100 | class RerankConfig: |
| 108 | """重排配置(provider/URL 在 services.rerank)""" | 101 | """重排配置(provider/URL 在 services.rerank)""" |
| 109 | enabled: bool = True | 102 | enabled: bool = True |
| @@ -128,9 +121,6 @@ class SearchConfig: | @@ -128,9 +121,6 @@ class SearchConfig: | ||
| 128 | # Query processing | 121 | # Query processing |
| 129 | query_config: QueryConfig | 122 | query_config: QueryConfig |
| 130 | 123 | ||
| 131 | - # Ranking configuration | ||
| 132 | - ranking: RankingConfig | ||
| 133 | - | ||
| 134 | # Function Score configuration (ES层打分) | 124 | # Function Score configuration (ES层打分) |
| 135 | function_score: FunctionScoreConfig | 125 | function_score: FunctionScoreConfig |
| 136 | 126 | ||
| @@ -293,13 +283,6 @@ class ConfigLoader: | @@ -293,13 +283,6 @@ class ConfigLoader: | ||
| 293 | tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)), | 283 | tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)), |
| 294 | ) | 284 | ) |
| 295 | 285 | ||
| 296 | - # Parse ranking config | ||
| 297 | - ranking_data = config_data.get("ranking", {}) | ||
| 298 | - ranking = RankingConfig( | ||
| 299 | - expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()", | ||
| 300 | - description=ranking_data.get("description") or "Default BM25 + text embedding ranking" | ||
| 301 | - ) | ||
| 302 | - | ||
| 303 | # Parse Function Score configuration | 286 | # Parse Function Score configuration |
| 304 | fs_data = config_data.get("function_score", {}) | 287 | fs_data = config_data.get("function_score", {}) |
| 305 | function_score = FunctionScoreConfig( | 288 | function_score = FunctionScoreConfig( |
| @@ -336,7 +319,6 @@ class ConfigLoader: | @@ -336,7 +319,6 @@ class ConfigLoader: | ||
| 336 | field_boosts=field_boosts, | 319 | field_boosts=field_boosts, |
| 337 | indexes=indexes, | 320 | indexes=indexes, |
| 338 | query_config=query_config, | 321 | query_config=query_config, |
| 339 | - ranking=ranking, | ||
| 340 | function_score=function_score, | 322 | function_score=function_score, |
| 341 | rerank=rerank, | 323 | rerank=rerank, |
| 342 | spu_config=spu_config, | 324 | spu_config=spu_config, |
| @@ -510,10 +492,6 @@ class ConfigLoader: | @@ -510,10 +492,6 @@ class ConfigLoader: | ||
| 510 | "field_boosts": config.field_boosts, | 492 | "field_boosts": config.field_boosts, |
| 511 | "indexes": [self._index_to_dict(index) for index in config.indexes], | 493 | "indexes": [self._index_to_dict(index) for index in config.indexes], |
| 512 | "query_config": query_config_dict, | 494 | "query_config": query_config_dict, |
| 513 | - "ranking": { | ||
| 514 | - "expression": config.ranking.expression, | ||
| 515 | - "description": config.ranking.description | ||
| 516 | - }, | ||
| 517 | "function_score": { | 495 | "function_score": { |
| 518 | "score_mode": config.function_score.score_mode, | 496 | "score_mode": config.function_score.score_mode, |
| 519 | "boost_mode": config.function_score.boost_mode, | 497 | "boost_mode": config.function_score.boost_mode, |
docs/QUICKSTART.md
| @@ -357,7 +357,6 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: | @@ -357,7 +357,6 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: | ||
| 357 | - `query_config.search_fields`:动态多语言检索字段(multilingual/shared/core) | 357 | - `query_config.search_fields`:动态多语言检索字段(multilingual/shared/core) |
| 358 | - `query_config.text_query_strategy`:文本召回策略参数(minimum_should_match、翻译boost、翻译失败原文兜底boost等) | 358 | - `query_config.text_query_strategy`:文本召回策略参数(minimum_should_match、翻译boost、翻译失败原文兜底boost等) |
| 359 | - `query_config`:语言、embedding 开关、source_fields、knn_boost、翻译提示词等 | 359 | - `query_config`:语言、embedding 开关、source_fields、knn_boost、翻译提示词等 |
| 360 | -- `ranking.expression`:融合表达式(例如 `bm25() + 0.25*text_embedding_relevance()`) | ||
| 361 | - `function_score`:ES 层加权函数 | 360 | - `function_score`:ES 层加权函数 |
| 362 | - `rerank`:重排窗口、超时、ES/AI 融合权重 | 361 | - `rerank`:重排窗口、超时、ES/AI 融合权重 |
| 363 | 362 |
docs/搜索API对接指南.md
| @@ -1622,7 +1622,6 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ | @@ -1622,7 +1622,6 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ | ||
| 1622 | "num_fields": 20, | 1622 | "num_fields": 20, |
| 1623 | "num_indexes": 4, | 1623 | "num_indexes": 4, |
| 1624 | "supported_languages": ["zh", "en", "ru"], | 1624 | "supported_languages": ["zh", "en", "ru"], |
| 1625 | - "ranking_expression": "bm25() + 0.2*text_embedding_relevance()", | ||
| 1626 | "spu_enabled": false | 1625 | "spu_enabled": false |
| 1627 | } | 1626 | } |
| 1628 | ``` | 1627 | ``` |
tests/ci/test_service_api_contracts.py
| @@ -152,7 +152,7 @@ class _FakeBulkService: | @@ -152,7 +152,7 @@ class _FakeBulkService: | ||
| 152 | 152 | ||
| 153 | 153 | ||
| 154 | class _FakeTransformer: | 154 | class _FakeTransformer: |
| 155 | - def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options): | 155 | + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options, **kwargs): |
| 156 | return { | 156 | return { |
| 157 | "tenant_id": tenant_id, | 157 | "tenant_id": tenant_id, |
| 158 | "spu_id": str(spu_row.get("id", "0")), | 158 | "spu_id": str(spu_row.get("id", "0")), |
| @@ -545,7 +545,7 @@ class _FakeTextModel: | @@ -545,7 +545,7 @@ class _FakeTextModel: | ||
| 545 | 545 | ||
| 546 | 546 | ||
| 547 | class _FakeImageModel: | 547 | class _FakeImageModel: |
| 548 | - def encode_image_urls(self, urls, batch_size=8): | 548 | + def encode_image_urls(self, urls, batch_size=8, normalize_embeddings=True): |
| 549 | return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] | 549 | return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] |
| 550 | 550 | ||
| 551 | 551 |
tests/conftest.py
| @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock | @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock | ||
| 15 | project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | 15 | project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 16 | sys.path.insert(0, project_root) | 16 | sys.path.insert(0, project_root) |
| 17 | 17 | ||
| 18 | -from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig | 18 | +from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, FunctionScoreConfig, RerankConfig |
| 19 | from utils.es_client import ESClient | 19 | from utils.es_client import ESClient |
| 20 | from search import Searcher | 20 | from search import Searcher |
| 21 | from query import QueryParser | 21 | from query import QueryParser |
| @@ -48,11 +48,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: | @@ -48,11 +48,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: | ||
| 48 | inner_hits_size=3 | 48 | inner_hits_size=3 |
| 49 | ) | 49 | ) |
| 50 | 50 | ||
| 51 | - ranking_config = RankingConfig( | ||
| 52 | - expression="static_bm25() + text_embedding_relevance() * 0.2", | ||
| 53 | - description="Test ranking" | ||
| 54 | - ) | ||
| 55 | - | ||
| 56 | function_score_config = FunctionScoreConfig() | 51 | function_score_config = FunctionScoreConfig() |
| 57 | rerank_config = RerankConfig() | 52 | rerank_config = RerankConfig() |
| 58 | 53 | ||
| @@ -67,7 +62,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: | @@ -67,7 +62,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: | ||
| 67 | }, | 62 | }, |
| 68 | indexes=[sample_index_config], | 63 | indexes=[sample_index_config], |
| 69 | query_config=query_config, | 64 | query_config=query_config, |
| 70 | - ranking=ranking_config, | ||
| 71 | function_score=function_score_config, | 65 | function_score=function_score_config, |
| 72 | rerank=rerank_config, | 66 | rerank=rerank_config, |
| 73 | spu_config=spu_config | 67 | spu_config=spu_config |
tests/test_embedding_pipeline.py
| @@ -8,7 +8,6 @@ from config import ( | @@ -8,7 +8,6 @@ from config import ( | ||
| 8 | FunctionScoreConfig, | 8 | FunctionScoreConfig, |
| 9 | IndexConfig, | 9 | IndexConfig, |
| 10 | QueryConfig, | 10 | QueryConfig, |
| 11 | - RankingConfig, | ||
| 12 | RerankConfig, | 11 | RerankConfig, |
| 13 | SPUConfig, | 12 | SPUConfig, |
| 14 | SearchConfig, | 13 | SearchConfig, |
| @@ -82,7 +81,7 @@ def _build_test_config() -> SearchConfig: | @@ -82,7 +81,7 @@ def _build_test_config() -> SearchConfig: | ||
| 82 | text_embedding_field="title_embedding", | 81 | text_embedding_field="title_embedding", |
| 83 | image_embedding_field=None, | 82 | image_embedding_field=None, |
| 84 | ), | 83 | ), |
| 85 | - ranking=RankingConfig(expression="bm25()", description="test"), | 84 | + function_score=FunctionScoreConfig(), |
| 86 | function_score=FunctionScoreConfig(), | 85 | function_score=FunctionScoreConfig(), |
| 87 | rerank=RerankConfig(), | 86 | rerank=RerankConfig(), |
| 88 | spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3), | 87 | spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3), |
| @@ -97,7 +96,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch): | @@ -97,7 +96,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch): | ||
| 97 | fake_redis = _FakeRedis() | 96 | fake_redis = _FakeRedis() |
| 98 | monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) | 97 | monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) |
| 99 | 98 | ||
| 100 | - def _fake_post(url, json, timeout): | 99 | + def _fake_post(url, json, timeout, **kwargs): |
| 101 | assert url.endswith("/embed/text") | 100 | assert url.endswith("/embed/text") |
| 102 | assert json == ["hello", "world"] | 101 | assert json == ["hello", "world"] |
| 103 | return _FakeResponse([[0.1, 0.2], [0.3, 0.4]]) | 102 | return _FakeResponse([[0.1, 0.2], [0.3, 0.4]]) |
| @@ -118,7 +117,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch): | @@ -118,7 +117,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch): | ||
| 118 | fake_redis = _FakeRedis() | 117 | fake_redis = _FakeRedis() |
| 119 | monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) | 118 | monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) |
| 120 | 119 | ||
| 121 | - def _fake_post(url, json, timeout): | 120 | + def _fake_post(url, json, timeout, **kwargs): |
| 122 | return _FakeResponse([[0.1, 0.2], None]) | 121 | return _FakeResponse([[0.1, 0.2], None]) |
| 123 | 122 | ||
| 124 | monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post) | 123 | monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post) |
| @@ -136,7 +135,7 @@ def test_text_embedding_encoder_cache_hit(monkeypatch): | @@ -136,7 +135,7 @@ def test_text_embedding_encoder_cache_hit(monkeypatch): | ||
| 136 | 135 | ||
| 137 | calls = {"count": 0} | 136 | calls = {"count": 0} |
| 138 | 137 | ||
| 139 | - def _fake_post(url, json, timeout): | 138 | + def _fake_post(url, json, timeout, **kwargs): |
| 140 | calls["count"] += 1 | 139 | calls["count"] += 1 |
| 141 | return _FakeResponse([[0.3, 0.4]]) | 140 | return _FakeResponse([[0.3, 0.4]]) |
| 142 | 141 |
tests/test_search_rerank_window.py
| @@ -12,7 +12,6 @@ from config import ( | @@ -12,7 +12,6 @@ from config import ( | ||
| 12 | FunctionScoreConfig, | 12 | FunctionScoreConfig, |
| 13 | IndexConfig, | 13 | IndexConfig, |
| 14 | QueryConfig, | 14 | QueryConfig, |
| 15 | - RankingConfig, | ||
| 16 | RerankConfig, | 15 | RerankConfig, |
| 17 | SPUConfig, | 16 | SPUConfig, |
| 18 | SearchConfig, | 17 | SearchConfig, |
| @@ -141,7 +140,6 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38 | @@ -141,7 +140,6 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38 | ||
| 141 | field_boosts={"title.en": 3.0}, | 140 | field_boosts={"title.en": 3.0}, |
| 142 | indexes=[IndexConfig(name="default", label="default", fields=["title.en"])], | 141 | indexes=[IndexConfig(name="default", label="default", fields=["title.en"])], |
| 143 | query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False), | 142 | query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False), |
| 144 | - ranking=RankingConfig(), | ||
| 145 | function_score=FunctionScoreConfig(), | 143 | function_score=FunctionScoreConfig(), |
| 146 | rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window), | 144 | rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window), |
| 147 | spu_config=SPUConfig(enabled=False), | 145 | spu_config=SPUConfig(enabled=False), |
| @@ -169,7 +167,6 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): | @@ -169,7 +167,6 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): | ||
| 169 | "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}], | 167 | "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}], |
| 170 | "query_config": {"supported_languages": ["en"], "default_language": "en"}, | 168 | "query_config": {"supported_languages": ["en"], "default_language": "en"}, |
| 171 | "spu_config": {"enabled": False}, | 169 | "spu_config": {"enabled": False}, |
| 172 | - "ranking": {"expression": "bm25()", "description": "test"}, | ||
| 173 | "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []}, | 170 | "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []}, |
| 174 | "rerank": {"rerank_window": 384}, | 171 | "rerank": {"rerank_window": 384}, |
| 175 | } | 172 | } |
| @@ -211,9 +208,11 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): | @@ -211,9 +208,11 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): | ||
| 211 | ) | 208 | ) |
| 212 | 209 | ||
| 213 | assert called["count"] == 1 | 210 | assert called["count"] == 1 |
| 214 | - assert called["docs"] == 1000 | 211 | + # 应当对配置的 rerank_window 条文档做重排预取 |
| 212 | + window = searcher.config.rerank.rerank_window | ||
| 213 | + assert called["docs"] == window | ||
| 215 | assert es_client.calls[0]["from_"] == 0 | 214 | assert es_client.calls[0]["from_"] == 0 |
| 216 | - assert es_client.calls[0]["size"] == 1000 | 215 | + assert es_client.calls[0]["size"] == window |
| 217 | assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]} | 216 | assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]} |
| 218 | assert len(es_client.calls) == 2 | 217 | assert len(es_client.calls) == 2 |
| 219 | assert es_client.calls[1]["size"] == 10 | 218 | assert es_client.calls[1]["size"] == 10 |