diff --git a/.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md b/.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md index 86d2b72..37389f3 100644 --- a/.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md +++ b/.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md @@ -187,11 +187,8 @@ class RerankEngine: 用途:复杂的自定义排序逻辑、实时个性化等 """ - def __init__(self, ranking_expression: str, enabled: bool = False): + def __init__(self, enabled: bool = False): self.enabled = enabled - self.ranking_expression = ranking_expression - if enabled: - self.parsed_terms = self._parse_expression(ranking_expression) ``` #### `/home/tw/saas-search/search/__init__.py` @@ -206,14 +203,6 @@ from .rerank_engine import RerankEngine # 原 RankingEngine **修改初始化**(约88行): -```python -# 改为RerankEngine,默认禁用 -self.rerank_engine = RerankEngine( - config.ranking.expression, - enabled=False # 暂时禁用 -) -``` - **修改search方法中的rerank逻辑**(约356-383行): ```python @@ -295,7 +284,6 @@ class FunctionScoreConfig: @dataclass class TenantConfig: # ... 其他字段 ... - ranking: RankingConfig # 保留用于兼容 rerank: RerankConfig # 新增 function_score: FunctionScoreConfig # 新增 ``` @@ -327,11 +315,18 @@ curl -X POST http://localhost:6002/search/ \ ### 4. 配置迁移 -对于现有的`ranking.expression`配置,建议: +```python +# 改为RerankEngine,默认禁用 +self.rerank_engine = RerankEngine( + enabled=False # 暂时禁用 +) +``` + +@@ +对于现有的排序/打分配置,建议: -- 保留`ranking`配置用于文档说明 -- 新增`rerank.enabled=false`明确禁用状态 -- 新增`function_score`配置用于ES层打分 +- 使用 `function_score` 配置 ES 层打分 +- 使用 `rerank` 配置控制本地/AI 重排行为 ### 5. 后续优化空间 diff --git a/CLAUDE.md b/CLAUDE.md index 6d30d42..810cbb1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -588,8 +588,7 @@ python main.py search "query" --tenant-id 1 # Quick search test 1. **Modifying Search Behavior**: Edit `config/config.yaml` 2. **Changing Index Structure**: Update `mappings/search_products.json` 3. **Adding New Filters**: Extend `api/models.py` with new Pydantic models -4. **Updating Ranking**: Modify `ranking.expression` in config -5. **Testing Queries**: Use frontend UI at http://localhost:6003 +4. **Testing Queries**: Use frontend UI at http://localhost:6003 ## Key Implementation Details diff --git a/api/routes/admin.py b/api/routes/admin.py index 2ff27db..a1fe373 100644 --- a/api/routes/admin.py +++ b/api/routes/admin.py @@ -56,7 +56,6 @@ async def get_configuration(): "shared_fields": config.query_config.shared_fields, "core_multilingual_fields": config.query_config.core_multilingual_fields, "supported_languages": config.query_config.supported_languages, - "ranking_expression": config.ranking.expression, "spu_enabled": config.spu_config.enabled } diff --git a/config/__init__.py b/config/__init__.py index d9e58f3..ee693e9 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -9,7 +9,6 @@ from .config_loader import ( QueryConfig, IndexConfig, SPUConfig, - RankingConfig, FunctionScoreConfig, RerankConfig, ConfigLoader, @@ -38,7 +37,6 @@ __all__ = [ 'QueryConfig', 'IndexConfig', 'SPUConfig', - 'RankingConfig', 'FunctionScoreConfig', 'RerankConfig', diff --git a/config/config.yaml b/config/config.yaml index 67f2204..8d29fd7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -100,11 +100,6 @@ query_config: # KNN boost配置(向量召回的boost值) knn_boost: 0.25 # Lower boost for embedding recall -# Ranking Configuration(排序配置) -ranking: - expression: "bm25() + 0.25*text_embedding_relevance()" - description: "BM25 text relevance combined with semantic embedding similarity" - # Function Score配置(ES层打分规则) function_score: score_mode: "sum" diff --git a/config/config_loader.py b/config/config_loader.py index 29614d5..9de8578 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -97,13 +97,6 @@ class FunctionScoreConfig: @dataclass -class RankingConfig: - """Configuration for ranking expressions.""" - expression: str = "bm25()" - description: str = "Default BM25 ranking" - - -@dataclass class RerankConfig: """重排配置(provider/URL 在 services.rerank)""" enabled: bool = True @@ -128,9 +121,6 @@ class SearchConfig: # Query processing query_config: QueryConfig - # Ranking configuration - ranking: RankingConfig - # Function Score configuration (ES层打分) function_score: FunctionScoreConfig @@ -293,13 +283,6 @@ class ConfigLoader: tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)), ) - # Parse ranking config - ranking_data = config_data.get("ranking", {}) - ranking = RankingConfig( - expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()", - description=ranking_data.get("description") or "Default BM25 + text embedding ranking" - ) - # Parse Function Score configuration fs_data = config_data.get("function_score", {}) function_score = FunctionScoreConfig( @@ -336,7 +319,6 @@ class ConfigLoader: field_boosts=field_boosts, indexes=indexes, query_config=query_config, - ranking=ranking, function_score=function_score, rerank=rerank, spu_config=spu_config, @@ -510,10 +492,6 @@ class ConfigLoader: "field_boosts": config.field_boosts, "indexes": [self._index_to_dict(index) for index in config.indexes], "query_config": query_config_dict, - "ranking": { - "expression": config.ranking.expression, - "description": config.ranking.description - }, "function_score": { "score_mode": config.function_score.score_mode, "boost_mode": config.function_score.boost_mode, diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index cdfb69e..a394346 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -357,7 +357,6 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: - `query_config.search_fields`:动态多语言检索字段(multilingual/shared/core) - `query_config.text_query_strategy`:文本召回策略参数(minimum_should_match、翻译boost、翻译失败原文兜底boost等) - `query_config`:语言、embedding 开关、source_fields、knn_boost、翻译提示词等 -- `ranking.expression`:融合表达式(例如 `bm25() + 0.25*text_embedding_relevance()`) - `function_score`:ES 层加权函数 - `rerank`:重排窗口、超时、ES/AI 融合权重 diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index a74c575..3bb2470 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -1622,7 +1622,6 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ "num_fields": 20, "num_indexes": 4, "supported_languages": ["zh", "en", "ru"], - "ranking_expression": "bm25() + 0.2*text_embedding_relevance()", "spu_enabled": false } ``` diff --git a/tests/ci/test_service_api_contracts.py b/tests/ci/test_service_api_contracts.py index a7488b1..330f5ae 100644 --- a/tests/ci/test_service_api_contracts.py +++ b/tests/ci/test_service_api_contracts.py @@ -152,7 +152,7 @@ class _FakeBulkService: class _FakeTransformer: - def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options): + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options, **kwargs): return { "tenant_id": tenant_id, "spu_id": str(spu_row.get("id", "0")), @@ -545,7 +545,7 @@ class _FakeTextModel: class _FakeImageModel: - def encode_image_urls(self, urls, batch_size=8): + def encode_image_urls(self, urls, batch_size=8, normalize_embeddings=True): return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] diff --git a/tests/conftest.py b/tests/conftest.py index f8a80df..d0c857d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, project_root) -from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig +from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, FunctionScoreConfig, RerankConfig from utils.es_client import ESClient from search import Searcher from query import QueryParser @@ -48,11 +48,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: inner_hits_size=3 ) - ranking_config = RankingConfig( - expression="static_bm25() + text_embedding_relevance() * 0.2", - description="Test ranking" - ) - function_score_config = FunctionScoreConfig() rerank_config = RerankConfig() @@ -67,7 +62,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: }, indexes=[sample_index_config], query_config=query_config, - ranking=ranking_config, function_score=function_score_config, rerank=rerank_config, spu_config=spu_config diff --git a/tests/test_embedding_pipeline.py b/tests/test_embedding_pipeline.py index 1eab72c..9fb135f 100644 --- a/tests/test_embedding_pipeline.py +++ b/tests/test_embedding_pipeline.py @@ -8,7 +8,6 @@ from config import ( FunctionScoreConfig, IndexConfig, QueryConfig, - RankingConfig, RerankConfig, SPUConfig, SearchConfig, @@ -82,7 +81,7 @@ def _build_test_config() -> SearchConfig: text_embedding_field="title_embedding", image_embedding_field=None, ), - ranking=RankingConfig(expression="bm25()", description="test"), + function_score=FunctionScoreConfig(), function_score=FunctionScoreConfig(), rerank=RerankConfig(), spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3), @@ -97,7 +96,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch): fake_redis = _FakeRedis() monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) - def _fake_post(url, json, timeout): + def _fake_post(url, json, timeout, **kwargs): assert url.endswith("/embed/text") assert json == ["hello", "world"] return _FakeResponse([[0.1, 0.2], [0.3, 0.4]]) @@ -118,7 +117,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch): fake_redis = _FakeRedis() monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) - def _fake_post(url, json, timeout): + def _fake_post(url, json, timeout, **kwargs): return _FakeResponse([[0.1, 0.2], None]) monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post) @@ -136,7 +135,7 @@ def test_text_embedding_encoder_cache_hit(monkeypatch): calls = {"count": 0} - def _fake_post(url, json, timeout): + def _fake_post(url, json, timeout, **kwargs): calls["count"] += 1 return _FakeResponse([[0.3, 0.4]]) diff --git a/tests/test_search_rerank_window.py b/tests/test_search_rerank_window.py index 84c2a8b..f7e5c3f 100644 --- a/tests/test_search_rerank_window.py +++ b/tests/test_search_rerank_window.py @@ -12,7 +12,6 @@ from config import ( FunctionScoreConfig, IndexConfig, QueryConfig, - RankingConfig, RerankConfig, SPUConfig, SearchConfig, @@ -141,7 +140,6 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38 field_boosts={"title.en": 3.0}, indexes=[IndexConfig(name="default", label="default", fields=["title.en"])], query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False), - ranking=RankingConfig(), function_score=FunctionScoreConfig(), rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window), spu_config=SPUConfig(enabled=False), @@ -169,7 +167,6 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}], "query_config": {"supported_languages": ["en"], "default_language": "en"}, "spu_config": {"enabled": False}, - "ranking": {"expression": "bm25()", "description": "test"}, "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []}, "rerank": {"rerank_window": 384}, } @@ -211,9 +208,11 @@ def test_searcher_reranks_top_window_by_default(monkeypatch): ) assert called["count"] == 1 - assert called["docs"] == 1000 + # 应当对配置的 rerank_window 条文档做重排预取 + window = searcher.config.rerank.rerank_window + assert called["docs"] == window assert es_client.calls[0]["from_"] == 0 - assert es_client.calls[0]["size"] == 1000 + assert es_client.calls[0]["size"] == window assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]} assert len(es_client.calls) == 2 assert es_client.calls[1]["size"] == 10 -- libgit2 0.21.2