更新测试用例

tangwang
1 parent 2260eed2
Showing 12 changed files with 24 additions and 70 deletions Show diff stats
.cursor/plans/es-query-25a9f060.plan.检索表达式优化.ES_function表达式.md
CLAUDE.md
api/routes/admin.py
config/__init__.py
config/config.yaml
config/config_loader.py
docs/QUICKSTART.md
docs/搜索API对接指南.md
tests/ci/test_service_api_contracts.py
tests/conftest.py
tests/test_embedding_pipeline.py
tests/test_search_rerank_window.py
@@ -187,11 +187,8 @@ class RerankEngine:
     用途：复杂的自定义排序逻辑、实时个性化等
     """
  
-    def __init__(self, ranking_expression: str, enabled: bool = False):
+    def __init__(self, enabled: bool = False):
         self.enabled = enabled
-        self.ranking_expression = ranking_expression
-        if enabled:
-            self.parsed_terms = self._parse_expression(ranking_expression)
 ```
  
 #### `/home/tw/saas-search/search/__init__.py`
@@ -206,14 +203,6 @@ from .rerank_engine import RerankEngine  # 原 RankingEngine
  
 **修改初始化**（约88行）：
  
-```python
-# 改为RerankEngine，默认禁用
-self.rerank_engine = RerankEngine(
-    config.ranking.expression,
-    enabled=False  # 暂时禁用
-)
-```
-
 **修改search方法中的rerank逻辑**（约356-383行）：
  
 ```python
@@ -295,7 +284,6 @@ class FunctionScoreConfig:
 @dataclass
 class TenantConfig:
     # ... 其他字段 ...
-    ranking: RankingConfig  # 保留用于兼容
     rerank: RerankConfig  # 新增
     function_score: FunctionScoreConfig  # 新增
 ```
@@ -327,11 +315,18 @@ curl -X POST http://localhost:6002/search/ \
  
 ### 4. 配置迁移
  
-对于现有的`ranking.expression`配置，建议：
+```python
+# 改为RerankEngine，默认禁用
+self.rerank_engine = RerankEngine(
+    enabled=False  # 暂时禁用
+)
+```
+
+@@
+对于现有的排序/打分配置，建议：
  
-- 保留`ranking`配置用于文档说明
-- 新增`rerank.enabled=false`明确禁用状态
-- 新增`function_score`配置用于ES层打分
+- 使用 `function_score` 配置 ES 层打分
+- 使用 `rerank` 配置控制本地/AI 重排行为
  
 ### 5. 后续优化空间
  
@@ -588,8 +588,7 @@ python main.py search &quot;query&quot; --tenant-id 1  # Quick search test
 1. **Modifying Search Behavior**: Edit `config/config.yaml`
 2. **Changing Index Structure**: Update `mappings/search_products.json`
 3. **Adding New Filters**: Extend `api/models.py` with new Pydantic models
-4. **Updating Ranking**: Modify `ranking.expression` in config
-5. **Testing Queries**: Use frontend UI at http://localhost:6003
+4. **Testing Queries**: Use frontend UI at http://localhost:6003
  
 ## Key Implementation Details
  
@@ -56,7 +56,6 @@ async def get_configuration():
             "shared_fields": config.query_config.shared_fields,
             "core_multilingual_fields": config.query_config.core_multilingual_fields,
             "supported_languages": config.query_config.supported_languages,
-            "ranking_expression": config.ranking.expression,
             "spu_enabled": config.spu_config.enabled
         }
  
@@ -9,7 +9,6 @@ from .config_loader import (
     QueryConfig,
     IndexConfig,
     SPUConfig,
-    RankingConfig,
     FunctionScoreConfig,
     RerankConfig,
     ConfigLoader,
@@ -38,7 +37,6 @@ __all__ = [
     'QueryConfig',
     'IndexConfig',
     'SPUConfig',
-    'RankingConfig',
     'FunctionScoreConfig',
     'RerankConfig',
  
@@ -100,11 +100,6 @@ query_config:
   # KNN boost配置（向量召回的boost值）
   knn_boost: 0.25  # Lower boost for embedding recall
  
-# Ranking Configuration（排序配置）
-ranking:
-  expression: "bm25() + 0.25*text_embedding_relevance()"
-  description: "BM25 text relevance combined with semantic embedding similarity"
-
 # Function Score配置（ES层打分规则）
 function_score:
   score_mode: "sum"
@@ -97,13 +97,6 @@ class FunctionScoreConfig:
  
  
 @dataclass
-class RankingConfig:
-    """Configuration for ranking expressions."""
-    expression: str = "bm25()"
-    description: str = "Default BM25 ranking"
-
-
-@dataclass
 class RerankConfig:
     """重排配置（provider/URL 在 services.rerank）"""
     enabled: bool = True
@@ -128,9 +121,6 @@ class SearchConfig:
     # Query processing
     query_config: QueryConfig
  
-    # Ranking configuration
-    ranking: RankingConfig
-    
     # Function Score configuration (ES层打分)
     function_score: FunctionScoreConfig
  
@@ -293,13 +283,6 @@ class ConfigLoader:
             tie_breaker_keywords=float(text_strategy_cfg.get("tie_breaker_keywords", 0.9)),
         )
  
-        # Parse ranking config
-        ranking_data = config_data.get("ranking", {})
-        ranking = RankingConfig(
-            expression=ranking_data.get("expression") or "bm25() + 0.2*text_embedding_relevance()",
-            description=ranking_data.get("description") or "Default BM25 + text embedding ranking"
-        )
-        
         # Parse Function Score configuration
         fs_data = config_data.get("function_score", {})
         function_score = FunctionScoreConfig(
@@ -336,7 +319,6 @@ class ConfigLoader:
             field_boosts=field_boosts,
             indexes=indexes,
             query_config=query_config,
-            ranking=ranking,
             function_score=function_score,
             rerank=rerank,
             spu_config=spu_config,
@@ -510,10 +492,6 @@ class ConfigLoader:
             "field_boosts": config.field_boosts,
             "indexes": [self._index_to_dict(index) for index in config.indexes],
             "query_config": query_config_dict,
-            "ranking": {
-                "expression": config.ranking.expression,
-                "description": config.ranking.description
-            },
             "function_score": {
                 "score_mode": config.function_score.score_mode,
                 "boost_mode": config.function_score.boost_mode,
@@ -357,7 +357,6 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源：
 - `query_config.search_fields`：动态多语言检索字段（multilingual/shared/core）
 - `query_config.text_query_strategy`：文本召回策略参数（minimum_should_match、翻译boost、翻译失败原文兜底boost等）
 - `query_config`：语言、embedding 开关、source_fields、knn_boost、翻译提示词等
-- `ranking.expression`：融合表达式（例如 `bm25() + 0.25*text_embedding_relevance()`）
 - `function_score`：ES 层加权函数
 - `rerank`：重排窗口、超时、ES/AI 融合权重
  
@@ -1622,7 +1622,6 @@ curl -X POST &quot;http://localhost:6004/indexer/enrich-content&quot; \
   "num_fields": 20,
   "num_indexes": 4,
   "supported_languages": ["zh", "en", "ru"],
-  "ranking_expression": "bm25() + 0.2*text_embedding_relevance()",
   "spu_enabled": false
 }
 ```
@@ -152,7 +152,7 @@ class _FakeBulkService:
  
  
 class _FakeTransformer:
-    def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options):
+    def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options, **kwargs):
         return {
             "tenant_id": tenant_id,
             "spu_id": str(spu_row.get("id", "0")),
@@ -545,7 +545,7 @@ class _FakeTextModel:
  
  
 class _FakeImageModel:
-    def encode_image_urls(self, urls, batch_size=8):
+    def encode_image_urls(self, urls, batch_size=8, normalize_embeddings=True):
         return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls]
  
  
@@ -15,7 +15,7 @@ from unittest.mock import Mock, MagicMock
 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, project_root)
  
-from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, RankingConfig, FunctionScoreConfig, RerankConfig
+from config import SearchConfig, QueryConfig, IndexConfig, SPUConfig, FunctionScoreConfig, RerankConfig
 from utils.es_client import ESClient
 from search import Searcher
 from query import QueryParser
@@ -48,11 +48,6 @@ def sample_search_config(sample_index_config) -&gt; SearchConfig:
         inner_hits_size=3
     )
  
-    ranking_config = RankingConfig(
-        expression="static_bm25() + text_embedding_relevance() * 0.2",
-        description="Test ranking"
-    )
-
     function_score_config = FunctionScoreConfig()
     rerank_config = RerankConfig()
  
@@ -67,7 +62,6 @@ def sample_search_config(sample_index_config) -&gt; SearchConfig:
         },
         indexes=[sample_index_config],
         query_config=query_config,
-        ranking=ranking_config,
         function_score=function_score_config,
         rerank=rerank_config,
         spu_config=spu_config
@@ -8,7 +8,6 @@ from config import (
     FunctionScoreConfig,
     IndexConfig,
     QueryConfig,
-    RankingConfig,
     RerankConfig,
     SPUConfig,
     SearchConfig,
@@ -82,7 +81,7 @@ def _build_test_config() -&gt; SearchConfig:
             text_embedding_field="title_embedding",
             image_embedding_field=None,
         ),
-        ranking=RankingConfig(expression="bm25()", description="test"),
+        function_score=FunctionScoreConfig(),
         function_score=FunctionScoreConfig(),
         rerank=RerankConfig(),
         spu_config=SPUConfig(enabled=True, spu_field="spu_id", inner_hits_size=3),
@@ -97,7 +96,7 @@ def test_text_embedding_encoder_response_alignment(monkeypatch):
     fake_redis = _FakeRedis()
     monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis)
  
-    def _fake_post(url, json, timeout):
+    def _fake_post(url, json, timeout, **kwargs):
         assert url.endswith("/embed/text")
         assert json == ["hello", "world"]
         return _FakeResponse([[0.1, 0.2], [0.3, 0.4]])
@@ -118,7 +117,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch):
     fake_redis = _FakeRedis()
     monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis)
  
-    def _fake_post(url, json, timeout):
+    def _fake_post(url, json, timeout, **kwargs):
         return _FakeResponse([[0.1, 0.2], None])
  
     monkeypatch.setattr("embeddings.text_encoder.requests.post", _fake_post)
@@ -136,7 +135,7 @@ def test_text_embedding_encoder_cache_hit(monkeypatch):
  
     calls = {"count": 0}
  
-    def _fake_post(url, json, timeout):
+    def _fake_post(url, json, timeout, **kwargs):
         calls["count"] += 1
         return _FakeResponse([[0.3, 0.4]])
  
@@ -12,7 +12,6 @@ from config import (
     FunctionScoreConfig,
     IndexConfig,
     QueryConfig,
-    RankingConfig,
     RerankConfig,
     SPUConfig,
     SearchConfig,
@@ -141,7 +140,6 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38
         field_boosts={"title.en": 3.0},
         indexes=[IndexConfig(name="default", label="default", fields=["title.en"])],
         query_config=QueryConfig(enable_text_embedding=False, enable_query_rewrite=False),
-        ranking=RankingConfig(),
         function_score=FunctionScoreConfig(),
         rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window),
         spu_config=SPUConfig(enabled=False),
@@ -169,7 +167,6 @@ def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
         "indexes": [{"name": "default", "label": "default", "fields": ["title.en"]}],
         "query_config": {"supported_languages": ["en"], "default_language": "en"},
         "spu_config": {"enabled": False},
-        "ranking": {"expression": "bm25()", "description": "test"},
         "function_score": {"score_mode": "sum", "boost_mode": "multiply", "functions": []},
         "rerank": {"rerank_window": 384},
     }
@@ -211,9 +208,11 @@ def test_searcher_reranks_top_window_by_default(monkeypatch):
     )
  
     assert called["count"] == 1
-    assert called["docs"] == 1000
+    # 应当对配置的 rerank_window 条文档做重排预取
+    window = searcher.config.rerank.rerank_window
+    assert called["docs"] == window
     assert es_client.calls[0]["from_"] == 0
-    assert es_client.calls[0]["size"] == 1000
+    assert es_client.calls[0]["size"] == window
     assert es_client.calls[0]["body"]["_source"] == {"includes": ["title"]}
     assert len(es_client.calls) == 2
     assert es_client.calls[1]["size"] == 10