rerank mini batch

tangwang
1 parent d387e05d
Showing 9 changed files with 306 additions and 45 deletions Show diff stats
.env
.env.example
config/config.yaml
docs/DEVELOPER_GUIDE.md
docs/QUICKSTART.md
reranker/README.md
reranker/backends/dashscope_rerank.py
reranker/server.py
tests/test_reranker_dashscope_backend.py
@@ -35,9 +35,10 @@ CACHE_DIR=.cache
 API_BASE_URL=http://43.166.252.75:6002
  
  
-# 国内
+# 通用 DashScope key（翻译/内容理解等模块）
 DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b
-# 美国
-DASHSCOPE_API_KEY=sk-482cc3ff37a8467dab134a7a46830556
+# Reranker 专用 key（按地域）
+RERANK_DASHSCOPE_API_KEY_CN=sk-c3b8d4db061840aa8effb748df2a997b
+RERANK_DASHSCOPE_API_KEY_US=sk-482cc3ff37a8467dab134a7a46830556
  
 OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0
@@ -45,7 +45,9 @@ TEI_HEALTH_TIMEOUT_SEC=300
 RERANK_PROVIDER=http
 RERANK_BACKEND=qwen3_vllm
 # Optional for cloud rerank backend (RERANK_BACKEND=dashscope_rerank)
-DASHSCOPE_API_KEY=
+# Reranker cloud API keys by region
+RERANK_DASHSCOPE_API_KEY_CN=
+RERANK_DASHSCOPE_API_KEY_US=
 # Example:
 # RERANK_DASHSCOPE_ENDPOINT=https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
 RERANK_DASHSCOPE_ENDPOINT=
@@ -166,7 +166,7 @@ services:
         base_url: "http://127.0.0.1:6007"
         service_url: "http://127.0.0.1:6007/rerank"
     # 服务内后端（reranker 进程启动时读取）
-    backend: "qwen3_vllm"  # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank
+    backend: "dashscope_rerank"  # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank
     backends:
       bge:
         model_name: "BAAI/bge-reranker-v2-m3"
@@ -203,9 +203,10 @@ services:
         # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
         # 美国:   https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
         endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
-        api_key: null  # 推荐通过环境变量 DASHSCOPE_API_KEY 设置
-        timeout_sec: 15.0
+        api_key_env: "RERANK_DASHSCOPE_API_KEY_CN"
+        timeout_sec: 10.0 # 
         top_n_cap: 0   # 0 表示 top_n=当前请求文档数；>0 则限制 top_n 上限
+        batchsize: 64 # 0 关闭；>0 启用并发小包调度（top_n/top_n_cap 仍生效，分包后全局截断）
         instruct: "Given a shopping query, rank product titles by relevance"
         max_retries: 2
         retry_backoff_sec: 0.2
@@ -334,7 +334,7 @@ services:
  
 - **单一路径**：Provider 和 backend 必须由 `config/config.yaml` 的 `services` 块显式指定；未知配置应直接报错。
 - **无兼容回退**：不保留“旧配置自动推导/兜底默认值”机制，避免静默行为偏差。
-- **环境变量覆盖**：允许环境变量覆盖（如 `RERANKER_SERVICE_URL`、`RERANK_BACKEND`、`DASHSCOPE_API_KEY`、`RERANK_DASHSCOPE_ENDPOINT`、`EMBEDDING_SERVICE_URL`、`EMBEDDING_BACKEND`、`TEI_BASE_URL`），但覆盖后仍需满足合法性校验。
+- **环境变量覆盖**：允许环境变量覆盖（如 `RERANKER_SERVICE_URL`、`RERANK_BACKEND`、`RERANK_DASHSCOPE_API_KEY_CN`/`RERANK_DASHSCOPE_API_KEY_US`、`RERANK_DASHSCOPE_ENDPOINT`、`EMBEDDING_SERVICE_URL`、`EMBEDDING_BACKEND`、`TEI_BASE_URL`），但覆盖后仍需满足合法性校验。
  
 ---
  
@@ -423,7 +423,7 @@ services:
 - `TEI_BASE_URL`
 - `RERANKER_SERVICE_URL`
 - `RERANK_BACKEND`（服务内后端）
-- `DASHSCOPE_API_KEY`（`dashscope_rerank` 后端鉴权）
+- `RERANK_DASHSCOPE_API_KEY_CN` / `RERANK_DASHSCOPE_API_KEY_US`（`dashscope_rerank` 后端鉴权）
 - `RERANK_DASHSCOPE_ENDPOINT`（`dashscope_rerank` 地域 endpoint 覆盖）
  
 ### 3.3 新增 provider 的最小步骤
@@ -69,9 +69,10 @@ services:
       dashscope_rerank:
         model_name: "qwen3-rerank"
         endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
-        api_key: null  # 推荐使用环境变量 DASHSCOPE_API_KEY
+        api_key_env: "RERANK_DASHSCOPE_API_KEY_CN"
         timeout_sec: 15.0
         top_n_cap: 0
+        batchsize: 64  # 0关闭；>0并发小包调度（top_n/top_n_cap 仍生效，分包后全局截断）
         instruct: "Given a shopping query, rank product titles by relevance"
         max_retries: 2
         retry_backoff_sec: 0.2
@@ -83,8 +84,10 @@ DashScope endpoint 地域示例：
 - 美国：`https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks`
  
 DashScope 认证：
-- `api_key` 支持配置在 `config.yaml`
-- 推荐通过环境变量注入：`DASHSCOPE_API_KEY=...`
+- `api_key_env` 必填，表示该后端读取哪个环境变量作为 API Key
+- 推荐按地域分别注入：
+  - `RERANK_DASHSCOPE_API_KEY_CN=...`
+  - `RERANK_DASHSCOPE_API_KEY_US=...`
  
 - 服务端口、请求限制等仍在 `reranker/config.py`（或环境变量 `RERANKER_PORT`、`RERANKER_HOST`）。
  
@@ -16,11 +16,12 @@ import logging
 import math
 import os
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Tuple
 from urllib import error as urllib_error
 from urllib import request as urllib_request
  
-from reranker.backends.batching_utils import deduplicate_with_positions
+from reranker.backends.batching_utils import deduplicate_with_positions, iter_batches
  
 logger = logging.getLogger("reranker.backends.dashscope_rerank")
  
@@ -32,19 +33,20 @@ class DashScopeRerankBackend:
     Config from services.rerank.backends.dashscope_rerank:
       - model_name: str, default "qwen3-rerank"
       - endpoint: str, default "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
-      - api_key: optional str (or env DASHSCOPE_API_KEY)
+      - api_key_env: str, required env var name for this backend key
       - timeout_sec: float, default 15.0
       - top_n_cap: int, optional cap; 0 means use all docs in request
+      - batchsize: int, optional; 0 disables batching; >0 enables concurrent small-batch scheduling
       - instruct: optional str
       - max_retries: int, default 1
       - retry_backoff_sec: float, default 0.2
  
     Env overrides:
-      - DASHSCOPE_API_KEY
       - RERANK_DASHSCOPE_ENDPOINT
       - RERANK_DASHSCOPE_MODEL
       - RERANK_DASHSCOPE_TIMEOUT_SEC
       - RERANK_DASHSCOPE_TOP_N_CAP
+      - RERANK_DASHSCOPE_BATCHSIZE
     """
  
     def __init__(self, config: Dict[str, Any]) -> None:
@@ -59,11 +61,8 @@ class DashScopeRerankBackend:
             or self._config.get("endpoint")
             or "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
         ).strip()
-        self._api_key = str(
-            os.getenv("DASHSCOPE_API_KEY")
-            or self._config.get("api_key")
-            or ""
-        ).strip().strip('"').strip("'")
+        self._api_key_env = str(self._config.get("api_key_env") or "").strip()
+        self._api_key = str(os.getenv(self._api_key_env) or "").strip().strip('"').strip("'")
         self._timeout_sec = float(
             os.getenv("RERANK_DASHSCOPE_TIMEOUT_SEC")
             or self._config.get("timeout_sec")
@@ -74,21 +73,29 @@ class DashScopeRerankBackend:
             or self._config.get("top_n_cap")
             or 0
         )
+        self._batchsize = int(
+            os.getenv("RERANK_DASHSCOPE_BATCHSIZE")
+            or self._config.get("batchsize")
+            or 0
+        )
         self._instruct = str(self._config.get("instruct") or "").strip()
         self._max_retries = int(self._config.get("max_retries", 1))
         self._retry_backoff_sec = float(self._config.get("retry_backoff_sec", 0.2))
  
         if not self._endpoint:
             raise ValueError("dashscope_rerank endpoint is required")
+        if not self._api_key_env:
+            raise ValueError("dashscope_rerank api_key_env is required")
         if not self._api_key:
             raise ValueError(
-                "dashscope_rerank api_key is required (set services.rerank.backends.dashscope_rerank.api_key "
-                "or env DASHSCOPE_API_KEY)"
+                f"dashscope_rerank api key is required (set env {self._api_key_env})"
             )
         if self._timeout_sec <= 0:
             raise ValueError(f"dashscope_rerank timeout_sec must be > 0, got {self._timeout_sec}")
         if self._top_n_cap < 0:
             raise ValueError(f"dashscope_rerank top_n_cap must be >= 0, got {self._top_n_cap}")
+        if self._batchsize < 0:
+            raise ValueError(f"dashscope_rerank batchsize must be >= 0, got {self._batchsize}")
         if self._max_retries <= 0:
             raise ValueError(f"dashscope_rerank max_retries must be > 0, got {self._max_retries}")
         if self._retry_backoff_sec < 0:
@@ -97,11 +104,12 @@ class DashScopeRerankBackend:
             )
  
         logger.info(
-            "DashScope reranker ready | endpoint=%s model=%s timeout_sec=%s top_n_cap=%s",
+            "DashScope reranker ready | endpoint=%s model=%s timeout_sec=%s top_n_cap=%s batchsize=%s",
             self._endpoint,
             self._model_name,
             self._timeout_sec,
             self._top_n_cap,
+            self._batchsize,
         )
  
     def _http_post_json(self, payload: Dict[str, Any]) -> Dict[str, Any]:
@@ -162,6 +170,95 @@ class DashScopeRerankBackend:
  
         raise RuntimeError(str(last_exc) if last_exc else "DashScope rerank failed with unknown error")
  
+    def _score_single_request(
+        self,
+        query: str,
+        unique_texts: List[str],
+        normalize: bool,
+        top_n: int,
+    ) -> Tuple[List[float], int]:
+        response = self._post_rerank(query=query, docs=unique_texts, top_n=top_n)
+        results = self._extract_results(response)
+
+        unique_scores: List[float] = [0.0] * len(unique_texts)
+        for rank, item in enumerate(results):
+            raw_idx = item.get("index", rank)
+            try:
+                idx = int(raw_idx)
+            except (TypeError, ValueError):
+                continue
+            if idx < 0 or idx >= len(unique_scores):
+                continue
+            raw_score = item.get("relevance_score", item.get("score"))
+            unique_scores[idx] = self._coerce_score(raw_score, normalize=normalize)
+        return unique_scores, len(results)
+
+    def _score_batched_concurrent(
+        self,
+        query: str,
+        unique_texts: List[str],
+        normalize: bool,
+    ) -> Tuple[List[float], Dict[str, int]]:
+        """
+        Concurrent batch scoring.
+
+        We intentionally request full local scores in each batch (top_n=len(batch)),
+        then apply global top_n/top_n_cap truncation after merge if needed.
+        """
+        indices = list(range(len(unique_texts)))
+        batches = list(iter_batches(indices, batch_size=self._batchsize))
+        num_batches = len(batches)
+        max_workers = min(8, num_batches) if num_batches > 0 else 1
+        unique_scores: List[float] = [0.0] * len(unique_texts)
+        response_results = 0
+
+        def _run_one(batch_no: int, batch_indices: List[int]) -> Tuple[int, List[int], Dict[str, Any], float]:
+            docs = [unique_texts[i] for i in batch_indices]
+            # Ask each batch for all docs to avoid local truncation.
+            start_ts = time.perf_counter()
+            data = self._post_rerank(query=query, docs=docs, top_n=len(docs))
+            elapsed_ms = round((time.perf_counter() - start_ts) * 1000.0, 3)
+            return batch_no, batch_indices, data, elapsed_ms
+
+        with ThreadPoolExecutor(max_workers=max_workers) as ex:
+            future_to_batch = {ex.submit(_run_one, i + 1, b): b for i, b in enumerate(batches)}
+            for fut in as_completed(future_to_batch):
+                batch_indices = future_to_batch[fut]
+                try:
+                    batch_no, _, data, batch_elapsed_ms = fut.result()
+                except Exception as exc:
+                    raise RuntimeError(
+                        f"DashScope rerank batch failed | batch_size={len(batch_indices)} error={exc}"
+                    ) from exc
+                results = self._extract_results(data)
+                logger.info(
+                    "DashScope batch response | batch=%d/%d docs=%d elapsed_ms=%s results=%d query=%r",
+                    batch_no,
+                    num_batches,
+                    len(batch_indices),
+                    batch_elapsed_ms,
+                    len(results),
+                    query[:80],
+                )
+                response_results += len(results)
+                for rank, item in enumerate(results):
+                    raw_idx = item.get("index", rank)
+                    try:
+                        local_idx = int(raw_idx)
+                    except (TypeError, ValueError):
+                        continue
+                    if local_idx < 0 or local_idx >= len(batch_indices):
+                        continue
+                    global_idx = batch_indices[local_idx]
+                    raw_score = item.get("relevance_score", item.get("score"))
+                    unique_scores[global_idx] = self._coerce_score(raw_score, normalize=normalize)
+
+        return unique_scores, {
+            "batches": num_batches,
+            "batch_concurrency": max_workers,
+            "response_results": response_results,
+        }
+
     @staticmethod
     def _extract_results(data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # Compatible API style: {"results":[...]}
@@ -240,21 +337,34 @@ class DashScopeRerankBackend:
             top_n_effective = min(top_n_effective, int(top_n))
         if self._top_n_cap > 0:
             top_n_effective = min(top_n_effective, self._top_n_cap)
-
-        response = self._post_rerank(query=query, docs=unique_texts, top_n=top_n_effective)
-        results = self._extract_results(response)
-
-        unique_scores: List[float] = [0.0] * len(unique_texts)
-        for rank, item in enumerate(results):
-            raw_idx = item.get("index", rank)
-            try:
-                idx = int(raw_idx)
-            except (TypeError, ValueError):
-                continue
-            if idx < 0 or idx >= len(unique_scores):
-                continue
-            raw_score = item.get("relevance_score", item.get("score"))
-            unique_scores[idx] = self._coerce_score(raw_score, normalize=normalize)
+        can_batch = (
+            self._batchsize > 0
+            and len(unique_texts) > self._batchsize
+        )
+        if can_batch:
+            unique_scores, batch_meta = self._score_batched_concurrent(
+                query=query,
+                unique_texts=unique_texts,
+                normalize=normalize,
+            )
+            if top_n_effective < len(unique_scores):
+                order = sorted(range(len(unique_scores)), key=lambda i: (-unique_scores[i], i))
+                keep = set(order[:top_n_effective])
+                for i in range(len(unique_scores)):
+                    if i not in keep:
+                        unique_scores[i] = 0.0
+            response_results = int(batch_meta["response_results"])
+            batches = int(batch_meta["batches"])
+            batch_concurrency = int(batch_meta["batch_concurrency"])
+        else:
+            unique_scores, response_results = self._score_single_request(
+                query=query,
+                unique_texts=unique_texts,
+                normalize=normalize,
+                top_n=top_n_effective,
+            )
+            batches = 1
+            batch_concurrency = 1
  
         for (orig_idx, _), unique_idx in zip(indexed, position_to_unique):
             output_scores[orig_idx] = float(unique_scores[unique_idx])
@@ -275,7 +385,10 @@ class DashScopeRerankBackend:
             "normalize": normalize,
             "top_n": top_n_effective,
             "requested_top_n": int(top_n) if top_n is not None else None,
-            "response_results": len(results),
+            "response_results": response_results,
+            "batchsize": self._batchsize,
+            "batches": batches,
+            "batch_concurrency": batch_concurrency,
             "endpoint": self._endpoint,
         }
  
@@ -154,11 +154,14 @@ def rerank(request: RerankRequest) -&gt; RerankResponse:
     meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
     score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
     logger.info(
-        "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s",
+        "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s",
         meta.get("input_docs"),
         meta.get("unique_docs"),
         meta.get("dedup_ratio"),
         meta.get("service_elapsed_ms"),
+        meta.get("batches"),
+        meta.get("batchsize"),
+        meta.get("batch_concurrency"),
         _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
         score_preview,
     )
 from __future__ import annotations
  
+import time
+
+import pytest
+
 from reranker.backends import get_rerank_backend
 from reranker.backends.dashscope_rerank import DashScopeRerankBackend
  
  
-def test_dashscope_backend_factory_loads():
+@pytest.fixture(autouse=True)
+def _clear_global_dashscope_key(monkeypatch):
+    # Prevent accidental pass-through from unrelated global key.
+    monkeypatch.delenv("DASHSCOPE_API_KEY", raising=False)
+
+
+def test_dashscope_backend_factory_loads(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
     backend = get_rerank_backend(
         "dashscope_rerank",
         {
             "model_name": "qwen3-rerank",
             "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
-            "api_key": "test-key",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
         },
     )
     assert isinstance(backend, DashScopeRerankBackend)
  
  
 def test_dashscope_backend_score_with_meta_dedup_and_restore(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
     backend = DashScopeRerankBackend(
         {
             "model_name": "qwen3-rerank",
             "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
-            "api_key": "test-key",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
             "top_n_cap": 0,
         }
     )
@@ -55,11 +67,12 @@ def test_dashscope_backend_score_with_meta_dedup_and_restore(monkeypatch):
  
  
 def test_dashscope_backend_top_n_cap_and_normalize_fallback(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
     backend = DashScopeRerankBackend(
         {
             "model_name": "qwen3-rerank",
             "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
-            "api_key": "test-key",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
             "top_n_cap": 1,
         }
     )
@@ -81,11 +94,12 @@ def test_dashscope_backend_top_n_cap_and_normalize_fallback(monkeypatch):
  
  
 def test_dashscope_backend_score_with_meta_topn_request(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
     backend = DashScopeRerankBackend(
         {
             "model_name": "qwen3-rerank",
             "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
-            "api_key": "test-key",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
             "top_n_cap": 0,
         }
     )
@@ -101,3 +115,127 @@ def test_dashscope_backend_score_with_meta_topn_request(monkeypatch):
     assert scores == [0.3, 0.0, 0.8]
     assert meta["top_n"] == 2
     assert meta["requested_top_n"] == 2
+
+
+def test_dashscope_backend_batchsize_concurrent_full_topn(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
+    backend = DashScopeRerankBackend(
+        {
+            "model_name": "qwen3-rerank",
+            "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
+            "top_n_cap": 0,
+            "batchsize": 2,
+        }
+    )
+
+    def _fake_post(query: str, docs: list[str], top_n: int):
+        assert query == "q"
+        # batching path asks every batch for full local list
+        assert top_n == len(docs)
+        time.sleep(0.05)
+        return {
+            "results": [
+                {"index": i, "relevance_score": float(i + 1) / 10.0}
+                for i, _ in enumerate(docs)
+            ]
+        }
+
+    monkeypatch.setattr(backend, "_post_rerank", _fake_post)
+    start = time.perf_counter()
+    scores, meta = backend.score_with_meta(query="q", docs=["d1", "d2", "d3", "d4", "d5", "d6"])
+    elapsed = time.perf_counter() - start
+
+    # 3 batches * 50ms serial ~=150ms; concurrent should be significantly lower.
+    assert elapsed < 0.14
+    assert len(scores) == 6
+    assert meta["batches"] == 3
+    assert meta["batch_concurrency"] == 3
+    assert meta["response_results"] == 6
+
+
+def test_dashscope_backend_batchsize_still_effective_when_topn_limited(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
+    backend = DashScopeRerankBackend(
+        {
+            "model_name": "qwen3-rerank",
+            "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
+            "top_n_cap": 0,
+            "batchsize": 2,
+        }
+    )
+
+    called = {"count": 0}
+
+    def _fake_post(query: str, docs: list[str], top_n: int):
+        called["count"] += 1
+        # batching remains enabled; each batch asks for full local scores
+        assert top_n == len(docs)
+        score_map = {"d1": 0.9, "d2": 0.1, "d3": 0.8, "d4": 0.2}
+        return {
+            "results": [
+                {"index": i, "relevance_score": score_map[doc]}
+                for i, doc in enumerate(docs)
+            ]
+        }
+
+    monkeypatch.setattr(backend, "_post_rerank", _fake_post)
+    scores, meta = backend.score_with_meta_topn(query="q", docs=["d1", "d2", "d3", "d4"], top_n=2)
+
+    assert called["count"] == 2
+    assert scores == [0.9, 0.0, 0.8, 0.0]
+    assert meta["batches"] == 2
+    assert meta["top_n"] == 2
+
+
+def test_dashscope_backend_batchsize_raises_when_one_batch_fails(monkeypatch):
+    monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key")
+    backend = DashScopeRerankBackend(
+        {
+            "model_name": "qwen3-rerank",
+            "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
+            "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
+            "top_n_cap": 0,
+            "batchsize": 2,
+        }
+    )
+
+    def _fake_post(query: str, docs: list[str], top_n: int):
+        if docs == ["d3", "d4"]:
+            raise RuntimeError("provider temporary error")
+        return {
+            "results": [
+                {"index": i, "relevance_score": 0.1}
+                for i, _ in enumerate(docs)
+            ]
+        }
+
+    monkeypatch.setattr(backend, "_post_rerank", _fake_post)
+
+    with pytest.raises(RuntimeError, match="DashScope rerank batch failed"):
+        backend.score_with_meta(query="q", docs=["d1", "d2", "d3", "d4"])
+
+
+def test_dashscope_backend_requires_api_key_env():
+    with pytest.raises(ValueError, match="api_key_env is required"):
+        DashScopeRerankBackend(
+            {
+                "model_name": "qwen3-rerank",
+                "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
+                "top_n_cap": 0,
+            }
+        )
+
+
+def test_dashscope_backend_requires_api_key_env_value(monkeypatch):
+    monkeypatch.delenv("TEST_RERANK_DASHSCOPE_API_KEY", raising=False)
+    with pytest.raises(ValueError, match="set env TEST_RERANK_DASHSCOPE_API_KEY"):
+        DashScopeRerankBackend(
+            {
+                "model_name": "qwen3-rerank",
+                "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
+                "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY",
+                "top_n_cap": 0,
+            }
+        )