Commit 77ab67adeae5b5b41985c5f24abf93311682d320

Authored by tangwang
1 parent 2260eed2

更新测试用例

.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md
@@ -187,11 +187,8 @@ class RerankEngine: @@ -187,11 +187,8 @@ class RerankEngine:
187 用途:复杂的自定义排序逻辑、实时个性化等 187 用途:复杂的自定义排序逻辑、实时个性化等
188 """ 188 """
189 189
190 - def __init__(self, ranking_expression: str, enabled: bool = False): 190 + def __init__(self, enabled: bool = False):
191 self.enabled = enabled 191 self.enabled = enabled
192 - self.ranking_expression = ranking_expression  
193 - if enabled:  
194 - self.parsed_terms = self._parse_expression(ranking_expression)  
195 ``` 192 ```
196 193
197 #### `/home/tw/saas-search/search/__init__.py` 194 #### `/home/tw/saas-search/search/__init__.py`
@@ -206,14 +203,6 @@ from .rerank_engine import RerankEngine # 原 RankingEngine @@ -206,14 +203,6 @@ from .rerank_engine import RerankEngine # 原 RankingEngine
206 203
207 **修改初始化**(约88行): 204 **修改初始化**(约88行):
208 205
209 -```python  
210 -# 改为RerankEngine,默认禁用  
211 -self.rerank_engine = RerankEngine(  
212 - config.ranking.expression,  
213 - enabled=False # 暂时禁用  
214 -)  
215 -```  
216 -  
217 **修改search方法中的rerank逻辑**(约356-383行): 206 **修改search方法中的rerank逻辑**(约356-383行):
218 207
219 ```python 208 ```python
@@ -295,7 +284,6 @@ class FunctionScoreConfig: @@ -295,7 +284,6 @@ class FunctionScoreConfig:
295 @dataclass 284 @dataclass
296 class TenantConfig: 285 class TenantConfig:
297 # ... 其他字段 ... 286 # ... 其他字段 ...
298 - ranking: RankingConfig # 保留用于兼容  
299 rerank: RerankConfig # 新增 287 rerank: RerankConfig # 新增
300 function_score: FunctionScoreConfig # 新增 288 function_score: FunctionScoreConfig # 新增
301 ``` 289 ```
@@ -327,11 +315,18 @@ curl -X POST http://localhost:6002/search/ \ @@ -327,11 +315,18 @@ curl -X POST http://localhost:6002/search/ \
327 315
328 ### 4. 配置迁移 316 ### 4. 配置迁移
329 317
330 -对于现有的`ranking.expression`配置,建议: 318 +```python
  319 +# 改为RerankEngine,默认禁用
  320 +self.rerank_engine = RerankEngine(
  321 + enabled=False # 暂时禁用
  322 +)
  323 +```
  324 +
  325 +@@
  326 +对于现有的排序/打分配置,建议:
331 327
332 -- 保留`ranking`配置用于文档说明  
333 -- 新增`rerank.enabled=false`明确禁用状态  
334 -- 新增`function_score`配置用于ES层打分 328 +- 使用 `function_score` 配置 ES 层打分
  329 +- 使用 `rerank` 配置控制本地/AI 重排行为
335 330
336 ### 5. 后续优化空间 331 ### 5. 后续优化空间
337 332
@@ -588,8 +588,7 @@ python main.py search "query" --tenant-id 1 # Quick search test @@ -588,8 +588,7 @@ python main.py search "query" --tenant-id 1 # Quick search test
588 1. **Modifying Search Behavior**: Edit `config/config.yaml` 588 1. **Modifying Search Behavior**: Edit `config/config.yaml`
589 2. **Changing Index Structure**: Update `mappings/search_products.json` 589 2. **Changing Index Structure**: Update `mappings/search_products.json`
590 3. **Adding New Filters**: Extend `api/models.py` with new Pydantic models 590 3. **Adding New Filters**: Extend `api/models.py` with new Pydantic models
591 -4. **Updating Ranking**: Modify `ranking.expression` in config  
592 -5. **Testing Queries**: Use frontend UI at http://localhost:6003 591 +4. **Testing Queries**: Use frontend UI at http://localhost:6003
593 592
594 ## Key Implementation Details 593 ## Key Implementation Details
595 594
api/routes/admin.py
@@ -56,7 +56,6 @@ async def get_configuration(): @@ -56,7 +56,6 @@ async def get_configuration():
56 "shared_fields": config.query_config.shared_fields, 56 "shared_fields": config.query_config.shared_fields,
57 "core_multilingual_fields": config.query_config.core_multilingual_fields, 57 "core_multilingual_fields": config.query_config.core_multilingual_fields,
58 "supported_languages": config.query_config.supported_languages, 58 "supported_languages": config.query_config.supported_languages,
59 - "ranking_expression": config.ranking.expression,  
60 "spu_enabled": config.spu_config.enabled 59 "spu_enabled": config.spu_config.enabled
61 } 60 }
62 61
config/__init__.py
@@ -9,7 +9,6 @@ from .config_loader import ( @@ -9,7 +9,6 @@ from .config_loader import (
9 QueryConfig, 9 QueryConfig,
10 IndexConfig, 10 IndexConfig,
11 SPUConfig, 11 SPUConfig,
12 - RankingConfig,  
13 FunctionScoreConfig, 12 FunctionScoreConfig,
14 RerankConfig, 13 RerankConfig,
15 ConfigLoader, 14 ConfigLoader,
@@ -38,7 +37,6 @@ __all__ = [ @@ -38,7 +37,6 @@ __all__ = [
38 'QueryConfig', 37 'QueryConfig',
39 'IndexConfig', 38 'IndexConfig',
40 'SPUConfig', 39 'SPUConfig',
41 - 'RankingConfig',  
42 'FunctionScoreConfig', 40 'FunctionScoreConfig',
43 'RerankConfig', 41 'RerankConfig',
44 42
config/config.yaml
@@ -100,11 +100,6 @@ query_config: @@ -100,11 +100,6 @@ query_config:
100 # KNN boost配置(向量召回的boost值) 100 # KNN boost配置(向量召回的boost值)
101 knn_boost: 0.25 # Lower boost for embedding recall 101 knn_boost: 0.25 # Lower boost for embedding recall
102 102
103 -# Ranking Configuration(排序配置)  
104 -ranking:  
105 - expression: "bm25() + 0.25*text_embedding_relevance()"  
106 - description: "BM25 text relevance combined with semantic embedding similarity"  
107 -  
108 # Function Score配置(ES层打分规则) 103 # Function Score配置(ES层打分规则)
109 function_score: 104 function_score:
110 score_mode: "sum" 105 score_mode: "sum"
config/config_loader.py
@@ -97,13 +97,6 @@ class FunctionScoreConfig: @@ -97,13 +97,6 @@ class FunctionScoreConfig:
97 97
98 98
99 @dataclass 99 @dataclass
100 -class RankingConfig:  
101 - """Configuration for ranking expressions."""  
102 - expression: str = "bm25()"  
103 - description: str = "Default BM25 ranking"  
104 -  
105 -  
106 -@dataclass  
107 class RerankConfig: 100 class RerankConfig:
108 """重排配置(provider/URL 在 services.rerank)""" 101 """重排配置(provider/URL 在 services.rerank)"""
109 enabled: bool = True 102 enabled: bool = True
@@ -128,9 +121,6 @@ class SearchConfig: @@ -128,9 +121,6 @@ class SearchConfig:
128 # Query processing 121 # Query processing
129 query_config: QueryConfig 122 query_config: QueryConfig
130 123
131 - # Ranking configuration  
132 - ranking: RankingConfig  
133 -  
134 # Function Score configuration (ES层打分) 124 # Function Score configuration (ES层打分)
135 function_score: FunctionScoreConfig 125 function_score: FunctionScoreConfig
136 126
@@ -293,13 +283,6 @@ class ConfigLoader: @@ -293,13 +283,6 @@ class ConfigLoader:
293 tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)), 283 tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)),
294 ) 284 )
295 285
296 - # Parse ranking config  
297 - ranking_data = config_data.get("ranking", {})  
298 - ranking = RankingConfig(  
299 - expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()",  
300 - description=ranking_data.get("description") or "Default BM25 + text embedding ranking"  
301 - )  
302 -  
303 # Parse Function Score configuration 286 # Parse Function Score configuration
304 fs_data = config_data.get("function_score", {}) 287 fs_data = config_data.get("function_score", {})
305 function_score = FunctionScoreConfig( 288 function_score = FunctionScoreConfig(
@@ -336,7 +319,6 @@ class ConfigLoader: @@ -336,7 +319,6 @@ class ConfigLoader:
336 field_boosts=field_boosts, 319 field_boosts=field_boosts,
337 indexes=indexes, 320 indexes=indexes,
338 query_config=query_config, 321 query_config=query_config,
339 - ranking=ranking,  
340 function_score=function_score, 322 function_score=function_score,
341 rerank=rerank, 323 rerank=rerank,
342 spu_config=spu_config, 324 spu_config=spu_config,
@@ -510,10 +492,6 @@ class ConfigLoader: @@ -510,10 +492,6 @@ class ConfigLoader:
510 "field_boosts": config.field_boosts, 492 "field_boosts": config.field_boosts,
511 "indexes": [self._index_to_dict(index) for index in config.indexes], 493 "indexes": [self._index_to_dict(index) for index in config.indexes],
512 "query_config": query_config_dict, 494 "query_config": query_config_dict,
513 - "ranking": {  
514 - "expression": config.ranking.expression,  
515 - "description": config.ranking.description  
516 - },  
517 "function_score": { 495 "function_score": {
518 "score_mode": config.function_score.score_mode, 496 "score_mode": config.function_score.score_mode,
519 "boost_mode": config.function_score.boost_mode, 497 "boost_mode": config.function_score.boost_mode,
docs/QUICKSTART.md
@@ -357,7 +357,6 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: @@ -357,7 +357,6 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源:
357 - `query_config.search_fields`:动态多语言检索字段(multilingual/shared/core) 357 - `query_config.search_fields`:动态多语言检索字段(multilingual/shared/core)
358 - `query_config.text_query_strategy`:文本召回策略参数(minimum_should_match、翻译boost、翻译失败原文兜底boost等) 358 - `query_config.text_query_strategy`:文本召回策略参数(minimum_should_match、翻译boost、翻译失败原文兜底boost等)
359 - `query_config`:语言、embedding 开关、source_fields、knn_boost、翻译提示词等 359 - `query_config`:语言、embedding 开关、source_fields、knn_boost、翻译提示词等
360 -- `ranking.expression`:融合表达式(例如 `bm25() + 0.25*text_embedding_relevance()`)  
361 - `function_score`:ES 层加权函数 360 - `function_score`:ES 层加权函数
362 - `rerank`:重排窗口、超时、ES/AI 融合权重 361 - `rerank`:重排窗口、超时、ES/AI 融合权重
363 362
docs/搜索API对接指南.md
@@ -1622,7 +1622,6 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ @@ -1622,7 +1622,6 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \
1622 "num_fields": 20, 1622 "num_fields": 20,
1623 "num_indexes": 4, 1623 "num_indexes": 4,
1624 "supported_languages": ["zh", "en", "ru"], 1624 "supported_languages": ["zh", "en", "ru"],
1625 - "ranking_expression": "bm25() + 0.2*text_embedding_relevance()",  
1626 "spu_enabled": false 1625 "spu_enabled": false
1627 } 1626 }
1628 ``` 1627 ```
tests/ci/test_service_api_contracts.py
@@ -152,7 +152,7 @@ class _FakeBulkService: @@ -152,7 +152,7 @@ class _FakeBulkService:
152 152
153 153
154 class _FakeTransformer: 154 class _FakeTransformer:
155 - def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options): 155 + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options, **kwargs):
156 return { 156 return {
157 "tenant_id": tenant_id, 157 "tenant_id": tenant_id,
158 "spu_id": str(spu_row.get("id", "0")), 158 "spu_id": str(spu_row.get("id", "0")),
@@ -545,7 +545,7 @@ class _FakeTextModel: @@ -545,7 +545,7 @@ class _FakeTextModel:
545 545
546 546
547 class _FakeImageModel: 547 class _FakeImageModel:
548 - def encode_image_urls(self, urls, batch_size=8): 548 + def encode_image_urls(self, urls, batch_size=8, normalize_embeddings=True):
549 return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] 549 return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls]
550 550
551 551
@@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock
15 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 15 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
16 sys.path.insert(0, project_root) 16 sys.path.insert(0, project_root)
17 17
18 -from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig 18 +from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, FunctionScoreConfig, RerankConfig
19 from utils.es_client import ESClient 19 from utils.es_client import ESClient
20 from search import Searcher 20 from search import Searcher
21 from query import QueryParser 21 from query import QueryParser
@@ -48,11 +48,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: @@ -48,11 +48,6 @@ def sample_search_config(sample_index_config) -> SearchConfig:
48 inner_hits_size=3 48 inner_hits_size=3
49 ) 49 )
50 50
51 - ranking_config = RankingConfig(  
52 - expression="static_bm25() + text_embedding_relevance() * 0.2",  
53 - description="Test ranking"  
54 - )  
55 -  
56 function_score_config = FunctionScoreConfig() 51 function_score_config = FunctionScoreConfig()
57 rerank_config = RerankConfig() 52 rerank_config = RerankConfig()
58 53
@@ -67,7 +62,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: @@ -67,7 +62,6 @@ def sample_search_config(sample_index_config) -> SearchConfig:
67 }, 62 },
68 indexes=[sample_index_config], 63 indexes=[sample_index_config],
69 query_config=query_config, 64 query_config=query_config,
70 - ranking=ranking_config,  
71 function_score=function_score_config, 65 function_score=function_score_config,
72 rerank=rerank_config, 66 rerank=rerank_config,
73 spu_config=spu_config 67 spu_config=spu_config
tests/test_embedding_pipeline.py
@@ -8,7 +8,6 @@ from config import ( @@ -8,7 +8,6 @@ from config import (
8 FunctionScoreConfig, 8 FunctionScoreConfig,
9 IndexConfig, 9 IndexConfig,
10 QueryConfig, 10 QueryConfig,
11 - RankingConfig,  
12 RerankConfig, 11 RerankConfig,
13 SPUConfig, 12 SPUConfig,
14 SearchConfig, 13 SearchConfig,
@@ -82,7 +81,7 @@ def _build_test_config() -> SearchConfig: @@ -82,7 +81,7 @@ def _build_test_config() -> SearchConfig:
82 text_embedding_field="title_embedding", 81 text_embedding_field="title_embedding",
83 image_embedding_field=None, 82 image_embedding_field=None,
84 ), 83 ),
85 - ranking=RankingConfig(expression="bm25()", description="test"), 84 + function_score=FunctionScoreConfig(),
86 function_score=FunctionScoreConfig(), 85 function_score=FunctionScoreConfig(),
87 rerank=RerankConfig(), 86 rerank=RerankConfig(),
88 spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3), 87 spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3),
@@ -97,7 +96,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch): @@ -97,7 +96,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch):
97 fake_redis = _FakeRedis() 96 fake_redis = _FakeRedis()
98 monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) 97 monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis)
99 98
100 - def _fake_post(url, json, timeout): 99 + def _fake_post(url, json, timeout, **kwargs):
101 assert url.endswith("/embed/text") 100 assert url.endswith("/embed/text")
102 assert json == ["hello", "world"] 101 assert json == ["hello", "world"]
103 return _FakeResponse([[0.1, 0.2], [0.3, 0.4]]) 102 return _FakeResponse([[0.1, 0.2], [0.3, 0.4]])
@@ -118,7 +117,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch): @@ -118,7 +117,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch):
118 fake_redis = _FakeRedis() 117 fake_redis = _FakeRedis()
119 monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) 118 monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis)
120 119
121 - def _fake_post(url, json, timeout): 120 + def _fake_post(url, json, timeout, **kwargs):
122 return _FakeResponse([[0.1, 0.2], None]) 121 return _FakeResponse([[0.1, 0.2], None])
123 122
124 monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post) 123 monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post)
@@ -136,7 +135,7 @@ def test_text_embedding_encoder_cache_hit(monkeypatch): @@ -136,7 +135,7 @@ def test_text_embedding_encoder_cache_hit(monkeypatch):
136 135
137 calls = {"count": 0} 136 calls = {"count": 0}
138 137
139 - def _fake_post(url, json, timeout): 138 + def _fake_post(url, json, timeout, **kwargs):
140 calls["count"] += 1 139 calls["count"] += 1
141 return _FakeResponse([[0.3, 0.4]]) 140 return _FakeResponse([[0.3, 0.4]])
142 141
tests/test_search_rerank_window.py
@@ -12,7 +12,6 @@ from config import ( @@ -12,7 +12,6 @@ from config import (
12 FunctionScoreConfig, 12 FunctionScoreConfig,
13 IndexConfig, 13 IndexConfig,
14 QueryConfig, 14 QueryConfig,
15 - RankingConfig,  
16 RerankConfig, 15 RerankConfig,
17 SPUConfig, 16 SPUConfig,
18 SearchConfig, 17 SearchConfig,
@@ -141,7 +140,6 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38 @@ -141,7 +140,6 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38
141 field_boosts={"title.en": 3.0}, 140 field_boosts={"title.en": 3.0},
142 indexes=[IndexConfig(name="default", label="default", fields=["title.en"])], 141 indexes=[IndexConfig(name="default", label="default", fields=["title.en"])],
143 query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False), 142 query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False),
144 - ranking=RankingConfig(),  
145 function_score=FunctionScoreConfig(), 143 function_score=FunctionScoreConfig(),
146 rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window), 144 rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window),
147 spu_config=SPUConfig(enabled=False), 145 spu_config=SPUConfig(enabled=False),
@@ -169,7 +167,6 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): @@ -169,7 +167,6 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
169 "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}], 167 "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}],
170 "query_config": {"supported_languages": ["en"], "default_language": "en"}, 168 "query_config": {"supported_languages": ["en"], "default_language": "en"},
171 "spu_config": {"enabled": False}, 169 "spu_config": {"enabled": False},
172 - "ranking": {"expression": "bm25()", "description": "test"},  
173 "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []}, 170 "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []},
174 "rerank": {"rerank_window": 384}, 171 "rerank": {"rerank_window": 384},
175 } 172 }
@@ -211,9 +208,11 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): @@ -211,9 +208,11 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
211 ) 208 )
212 209
213 assert called["count"] == 1 210 assert called["count"] == 1
214 - assert called["docs"] == 1000 211 + # 应当对配置的 rerank_window 条文档做重排预取
  212 + window = searcher.config.rerank.rerank_window
  213 + assert called["docs"] == window
215 assert es_client.calls[0]["from_"] == 0 214 assert es_client.calls[0]["from_"] == 0
216 - assert es_client.calls[0]["size"] == 1000 215 + assert es_client.calls[0]["size"] == window
217 assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]} 216 assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]}
218 assert len(es_client.calls) == 2 217 assert len(es_client.calls) == 2
219 assert es_client.calls[1]["size"] == 10 218 assert es_client.calls[1]["size"] == 10