diff --git a/.env b/.env index 6fa43db..03604e1 100644 --- a/.env +++ b/.env @@ -35,9 +35,10 @@ CACHE_DIR=.cache API_BASE_URL=http://43.166.252.75:6002 -# 国内 +# 通用 DashScope key(翻译/内容理解等模块) DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b -# 美国 -DASHSCOPE_API_KEY=sk-482cc3ff37a8467dab134a7a46830556 +# Reranker 专用 key(按地域) +RERANK_DASHSCOPE_API_KEY_CN=sk-c3b8d4db061840aa8effb748df2a997b +RERANK_DASHSCOPE_API_KEY_US=sk-482cc3ff37a8467dab134a7a46830556 OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0 diff --git a/.env.example b/.env.example index a43a538..9818f30 100644 --- a/.env.example +++ b/.env.example @@ -45,7 +45,9 @@ TEI_HEALTH_TIMEOUT_SEC=300 RERANK_PROVIDER=http RERANK_BACKEND=qwen3_vllm # Optional for cloud rerank backend (RERANK_BACKEND=dashscope_rerank) -DASHSCOPE_API_KEY= +# Reranker cloud API keys by region +RERANK_DASHSCOPE_API_KEY_CN= +RERANK_DASHSCOPE_API_KEY_US= # Example: # RERANK_DASHSCOPE_ENDPOINT=https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks RERANK_DASHSCOPE_ENDPOINT= diff --git a/config/config.yaml b/config/config.yaml index 73c68df..67f2204 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -166,7 +166,7 @@ services: base_url: "http://127.0.0.1:6007" service_url: "http://127.0.0.1:6007/rerank" # 服务内后端(reranker 进程启动时读取) - backend: "qwen3_vllm" # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank + backend: "dashscope_rerank" # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank backends: bge: model_name: "BAAI/bge-reranker-v2-m3" @@ -203,9 +203,10 @@ services: # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks # 美国: https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks" - api_key: null # 推荐通过环境变量 DASHSCOPE_API_KEY 设置 - timeout_sec: 15.0 + api_key_env: "RERANK_DASHSCOPE_API_KEY_CN" + timeout_sec: 10.0 # top_n_cap: 0 # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限 + batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) instruct: "Given a shopping query, rank product titles by relevance" max_retries: 2 retry_backoff_sec: 0.2 diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md index 70dd1ad..13f5f6b 100644 --- a/docs/DEVELOPER_GUIDE.md +++ b/docs/DEVELOPER_GUIDE.md @@ -334,7 +334,7 @@ services: - **单一路径**:Provider 和 backend 必须由 `config/config.yaml` 的 `services` 块显式指定;未知配置应直接报错。 - **无兼容回退**:不保留“旧配置自动推导/兜底默认值”机制,避免静默行为偏差。 -- **环境变量覆盖**:允许环境变量覆盖(如 `RERANKER_SERVICE_URL`、`RERANK_BACKEND`、`DASHSCOPE_API_KEY`、`RERANK_DASHSCOPE_ENDPOINT`、`EMBEDDING_SERVICE_URL`、`EMBEDDING_BACKEND`、`TEI_BASE_URL`),但覆盖后仍需满足合法性校验。 +- **环境变量覆盖**:允许环境变量覆盖(如 `RERANKER_SERVICE_URL`、`RERANK_BACKEND`、`RERANK_DASHSCOPE_API_KEY_CN`/`RERANK_DASHSCOPE_API_KEY_US`、`RERANK_DASHSCOPE_ENDPOINT`、`EMBEDDING_SERVICE_URL`、`EMBEDDING_BACKEND`、`TEI_BASE_URL`),但覆盖后仍需满足合法性校验。 --- diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index 846682d..63db1b4 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -423,7 +423,7 @@ services: - `TEI_BASE_URL` - `RERANKER_SERVICE_URL` - `RERANK_BACKEND`(服务内后端) -- `DASHSCOPE_API_KEY`(`dashscope_rerank` 后端鉴权) +- `RERANK_DASHSCOPE_API_KEY_CN` / `RERANK_DASHSCOPE_API_KEY_US`(`dashscope_rerank` 后端鉴权) - `RERANK_DASHSCOPE_ENDPOINT`(`dashscope_rerank` 地域 endpoint 覆盖) ### 3.3 新增 provider 的最小步骤 diff --git a/reranker/README.md b/reranker/README.md index 19d9153..102516e 100644 --- a/reranker/README.md +++ b/reranker/README.md @@ -69,9 +69,10 @@ services: dashscope_rerank: model_name: "qwen3-rerank" endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks" - api_key: null # 推荐使用环境变量 DASHSCOPE_API_KEY + api_key_env: "RERANK_DASHSCOPE_API_KEY_CN" timeout_sec: 15.0 top_n_cap: 0 + batchsize: 64 # 0关闭;>0并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) instruct: "Given a shopping query, rank product titles by relevance" max_retries: 2 retry_backoff_sec: 0.2 @@ -83,8 +84,10 @@ DashScope endpoint 地域示例: - 美国:`https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks` DashScope 认证: -- `api_key` 支持配置在 `config.yaml` -- 推荐通过环境变量注入:`DASHSCOPE_API_KEY=...` +- `api_key_env` 必填,表示该后端读取哪个环境变量作为 API Key +- 推荐按地域分别注入: + - `RERANK_DASHSCOPE_API_KEY_CN=...` + - `RERANK_DASHSCOPE_API_KEY_US=...` - 服务端口、请求限制等仍在 `reranker/config.py`(或环境变量 `RERANKER_PORT`、`RERANKER_HOST`)。 diff --git a/reranker/backends/dashscope_rerank.py b/reranker/backends/dashscope_rerank.py index 2b73f6b..eb5ef24 100644 --- a/reranker/backends/dashscope_rerank.py +++ b/reranker/backends/dashscope_rerank.py @@ -16,11 +16,12 @@ import logging import math import os import time +from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Any, Dict, List, Tuple from urllib import error as urllib_error from urllib import request as urllib_request -from reranker.backends.batching_utils import deduplicate_with_positions +from reranker.backends.batching_utils import deduplicate_with_positions, iter_batches logger = logging.getLogger("reranker.backends.dashscope_rerank") @@ -32,19 +33,20 @@ class DashScopeRerankBackend: Config from services.rerank.backends.dashscope_rerank: - model_name: str, default "qwen3-rerank" - endpoint: str, default "https://dashscope.aliyuncs.com/compatible-api/v1/reranks" - - api_key: optional str (or env DASHSCOPE_API_KEY) + - api_key_env: str, required env var name for this backend key - timeout_sec: float, default 15.0 - top_n_cap: int, optional cap; 0 means use all docs in request + - batchsize: int, optional; 0 disables batching; >0 enables concurrent small-batch scheduling - instruct: optional str - max_retries: int, default 1 - retry_backoff_sec: float, default 0.2 Env overrides: - - DASHSCOPE_API_KEY - RERANK_DASHSCOPE_ENDPOINT - RERANK_DASHSCOPE_MODEL - RERANK_DASHSCOPE_TIMEOUT_SEC - RERANK_DASHSCOPE_TOP_N_CAP + - RERANK_DASHSCOPE_BATCHSIZE """ def __init__(self, config: Dict[str, Any]) -> None: @@ -59,11 +61,8 @@ class DashScopeRerankBackend: or self._config.get("endpoint") or "https://dashscope.aliyuncs.com/compatible-api/v1/reranks" ).strip() - self._api_key = str( - os.getenv("DASHSCOPE_API_KEY") - or self._config.get("api_key") - or "" - ).strip().strip('"').strip("'") + self._api_key_env = str(self._config.get("api_key_env") or "").strip() + self._api_key = str(os.getenv(self._api_key_env) or "").strip().strip('"').strip("'") self._timeout_sec = float( os.getenv("RERANK_DASHSCOPE_TIMEOUT_SEC") or self._config.get("timeout_sec") @@ -74,21 +73,29 @@ class DashScopeRerankBackend: or self._config.get("top_n_cap") or 0 ) + self._batchsize = int( + os.getenv("RERANK_DASHSCOPE_BATCHSIZE") + or self._config.get("batchsize") + or 0 + ) self._instruct = str(self._config.get("instruct") or "").strip() self._max_retries = int(self._config.get("max_retries", 1)) self._retry_backoff_sec = float(self._config.get("retry_backoff_sec", 0.2)) if not self._endpoint: raise ValueError("dashscope_rerank endpoint is required") + if not self._api_key_env: + raise ValueError("dashscope_rerank api_key_env is required") if not self._api_key: raise ValueError( - "dashscope_rerank api_key is required (set services.rerank.backends.dashscope_rerank.api_key " - "or env DASHSCOPE_API_KEY)" + f"dashscope_rerank api key is required (set env {self._api_key_env})" ) if self._timeout_sec <= 0: raise ValueError(f"dashscope_rerank timeout_sec must be > 0, got {self._timeout_sec}") if self._top_n_cap < 0: raise ValueError(f"dashscope_rerank top_n_cap must be >= 0, got {self._top_n_cap}") + if self._batchsize < 0: + raise ValueError(f"dashscope_rerank batchsize must be >= 0, got {self._batchsize}") if self._max_retries <= 0: raise ValueError(f"dashscope_rerank max_retries must be > 0, got {self._max_retries}") if self._retry_backoff_sec < 0: @@ -97,11 +104,12 @@ class DashScopeRerankBackend: ) logger.info( - "DashScope reranker ready | endpoint=%s model=%s timeout_sec=%s top_n_cap=%s", + "DashScope reranker ready | endpoint=%s model=%s timeout_sec=%s top_n_cap=%s batchsize=%s", self._endpoint, self._model_name, self._timeout_sec, self._top_n_cap, + self._batchsize, ) def _http_post_json(self, payload: Dict[str, Any]) -> Dict[str, Any]: @@ -162,6 +170,95 @@ class DashScopeRerankBackend: raise RuntimeError(str(last_exc) if last_exc else "DashScope rerank failed with unknown error") + def _score_single_request( + self, + query: str, + unique_texts: List[str], + normalize: bool, + top_n: int, + ) -> Tuple[List[float], int]: + response = self._post_rerank(query=query, docs=unique_texts, top_n=top_n) + results = self._extract_results(response) + + unique_scores: List[float] = [0.0] * len(unique_texts) + for rank, item in enumerate(results): + raw_idx = item.get("index", rank) + try: + idx = int(raw_idx) + except (TypeError, ValueError): + continue + if idx < 0 or idx >= len(unique_scores): + continue + raw_score = item.get("relevance_score", item.get("score")) + unique_scores[idx] = self._coerce_score(raw_score, normalize=normalize) + return unique_scores, len(results) + + def _score_batched_concurrent( + self, + query: str, + unique_texts: List[str], + normalize: bool, + ) -> Tuple[List[float], Dict[str, int]]: + """ + Concurrent batch scoring. + + We intentionally request full local scores in each batch (top_n=len(batch)), + then apply global top_n/top_n_cap truncation after merge if needed. + """ + indices = list(range(len(unique_texts))) + batches = list(iter_batches(indices, batch_size=self._batchsize)) + num_batches = len(batches) + max_workers = min(8, num_batches) if num_batches > 0 else 1 + unique_scores: List[float] = [0.0] * len(unique_texts) + response_results = 0 + + def _run_one(batch_no: int, batch_indices: List[int]) -> Tuple[int, List[int], Dict[str, Any], float]: + docs = [unique_texts[i] for i in batch_indices] + # Ask each batch for all docs to avoid local truncation. + start_ts = time.perf_counter() + data = self._post_rerank(query=query, docs=docs, top_n=len(docs)) + elapsed_ms = round((time.perf_counter() - start_ts) * 1000.0, 3) + return batch_no, batch_indices, data, elapsed_ms + + with ThreadPoolExecutor(max_workers=max_workers) as ex: + future_to_batch = {ex.submit(_run_one, i + 1, b): b for i, b in enumerate(batches)} + for fut in as_completed(future_to_batch): + batch_indices = future_to_batch[fut] + try: + batch_no, _, data, batch_elapsed_ms = fut.result() + except Exception as exc: + raise RuntimeError( + f"DashScope rerank batch failed | batch_size={len(batch_indices)} error={exc}" + ) from exc + results = self._extract_results(data) + logger.info( + "DashScope batch response | batch=%d/%d docs=%d elapsed_ms=%s results=%d query=%r", + batch_no, + num_batches, + len(batch_indices), + batch_elapsed_ms, + len(results), + query[:80], + ) + response_results += len(results) + for rank, item in enumerate(results): + raw_idx = item.get("index", rank) + try: + local_idx = int(raw_idx) + except (TypeError, ValueError): + continue + if local_idx < 0 or local_idx >= len(batch_indices): + continue + global_idx = batch_indices[local_idx] + raw_score = item.get("relevance_score", item.get("score")) + unique_scores[global_idx] = self._coerce_score(raw_score, normalize=normalize) + + return unique_scores, { + "batches": num_batches, + "batch_concurrency": max_workers, + "response_results": response_results, + } + @staticmethod def _extract_results(data: Dict[str, Any]) -> List[Dict[str, Any]]: # Compatible API style: {"results":[...]} @@ -240,21 +337,34 @@ class DashScopeRerankBackend: top_n_effective = min(top_n_effective, int(top_n)) if self._top_n_cap > 0: top_n_effective = min(top_n_effective, self._top_n_cap) - - response = self._post_rerank(query=query, docs=unique_texts, top_n=top_n_effective) - results = self._extract_results(response) - - unique_scores: List[float] = [0.0] * len(unique_texts) - for rank, item in enumerate(results): - raw_idx = item.get("index", rank) - try: - idx = int(raw_idx) - except (TypeError, ValueError): - continue - if idx < 0 or idx >= len(unique_scores): - continue - raw_score = item.get("relevance_score", item.get("score")) - unique_scores[idx] = self._coerce_score(raw_score, normalize=normalize) + can_batch = ( + self._batchsize > 0 + and len(unique_texts) > self._batchsize + ) + if can_batch: + unique_scores, batch_meta = self._score_batched_concurrent( + query=query, + unique_texts=unique_texts, + normalize=normalize, + ) + if top_n_effective < len(unique_scores): + order = sorted(range(len(unique_scores)), key=lambda i: (-unique_scores[i], i)) + keep = set(order[:top_n_effective]) + for i in range(len(unique_scores)): + if i not in keep: + unique_scores[i] = 0.0 + response_results = int(batch_meta["response_results"]) + batches = int(batch_meta["batches"]) + batch_concurrency = int(batch_meta["batch_concurrency"]) + else: + unique_scores, response_results = self._score_single_request( + query=query, + unique_texts=unique_texts, + normalize=normalize, + top_n=top_n_effective, + ) + batches = 1 + batch_concurrency = 1 for (orig_idx, _), unique_idx in zip(indexed, position_to_unique): output_scores[orig_idx] = float(unique_scores[unique_idx]) @@ -275,7 +385,10 @@ class DashScopeRerankBackend: "normalize": normalize, "top_n": top_n_effective, "requested_top_n": int(top_n) if top_n is not None else None, - "response_results": len(results), + "response_results": response_results, + "batchsize": self._batchsize, + "batches": batches, + "batch_concurrency": batch_concurrency, "endpoint": self._endpoint, } diff --git a/reranker/server.py b/reranker/server.py index f944d77..11034db 100644 --- a/reranker/server.py +++ b/reranker/server.py @@ -154,11 +154,14 @@ def rerank(request: RerankRequest) -> RerankResponse: meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)}) score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]] logger.info( - "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s", + "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s", meta.get("input_docs"), meta.get("unique_docs"), meta.get("dedup_ratio"), meta.get("service_elapsed_ms"), + meta.get("batches"), + meta.get("batchsize"), + meta.get("batch_concurrency"), _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), score_preview, ) diff --git a/tests/test_reranker_dashscope_backend.py b/tests/test_reranker_dashscope_backend.py index e1b0bb4..4ace050 100644 --- a/tests/test_reranker_dashscope_backend.py +++ b/tests/test_reranker_dashscope_backend.py @@ -1,27 +1,39 @@ from __future__ import annotations +import time + +import pytest + from reranker.backends import get_rerank_backend from reranker.backends.dashscope_rerank import DashScopeRerankBackend -def test_dashscope_backend_factory_loads(): +@pytest.fixture(autouse=True) +def _clear_global_dashscope_key(monkeypatch): + # Prevent accidental pass-through from unrelated global key. + monkeypatch.delenv("DASHSCOPE_API_KEY", raising=False) + + +def test_dashscope_backend_factory_loads(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") backend = get_rerank_backend( "dashscope_rerank", { "model_name": "qwen3-rerank", "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", - "api_key": "test-key", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", }, ) assert isinstance(backend, DashScopeRerankBackend) def test_dashscope_backend_score_with_meta_dedup_and_restore(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") backend = DashScopeRerankBackend( { "model_name": "qwen3-rerank", "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", - "api_key": "test-key", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", "top_n_cap": 0, } ) @@ -55,11 +67,12 @@ def test_dashscope_backend_score_with_meta_dedup_and_restore(monkeypatch): def test_dashscope_backend_top_n_cap_and_normalize_fallback(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") backend = DashScopeRerankBackend( { "model_name": "qwen3-rerank", "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", - "api_key": "test-key", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", "top_n_cap": 1, } ) @@ -81,11 +94,12 @@ def test_dashscope_backend_top_n_cap_and_normalize_fallback(monkeypatch): def test_dashscope_backend_score_with_meta_topn_request(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") backend = DashScopeRerankBackend( { "model_name": "qwen3-rerank", "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", - "api_key": "test-key", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", "top_n_cap": 0, } ) @@ -101,3 +115,127 @@ def test_dashscope_backend_score_with_meta_topn_request(monkeypatch): assert scores == [0.3, 0.0, 0.8] assert meta["top_n"] == 2 assert meta["requested_top_n"] == 2 + + +def test_dashscope_backend_batchsize_concurrent_full_topn(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") + backend = DashScopeRerankBackend( + { + "model_name": "qwen3-rerank", + "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", + "top_n_cap": 0, + "batchsize": 2, + } + ) + + def _fake_post(query: str, docs: list[str], top_n: int): + assert query == "q" + # batching path asks every batch for full local list + assert top_n == len(docs) + time.sleep(0.05) + return { + "results": [ + {"index": i, "relevance_score": float(i + 1) / 10.0} + for i, _ in enumerate(docs) + ] + } + + monkeypatch.setattr(backend, "_post_rerank", _fake_post) + start = time.perf_counter() + scores, meta = backend.score_with_meta(query="q", docs=["d1", "d2", "d3", "d4", "d5", "d6"]) + elapsed = time.perf_counter() - start + + # 3 batches * 50ms serial ~=150ms; concurrent should be significantly lower. + assert elapsed < 0.14 + assert len(scores) == 6 + assert meta["batches"] == 3 + assert meta["batch_concurrency"] == 3 + assert meta["response_results"] == 6 + + +def test_dashscope_backend_batchsize_still_effective_when_topn_limited(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") + backend = DashScopeRerankBackend( + { + "model_name": "qwen3-rerank", + "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", + "top_n_cap": 0, + "batchsize": 2, + } + ) + + called = {"count": 0} + + def _fake_post(query: str, docs: list[str], top_n: int): + called["count"] += 1 + # batching remains enabled; each batch asks for full local scores + assert top_n == len(docs) + score_map = {"d1": 0.9, "d2": 0.1, "d3": 0.8, "d4": 0.2} + return { + "results": [ + {"index": i, "relevance_score": score_map[doc]} + for i, doc in enumerate(docs) + ] + } + + monkeypatch.setattr(backend, "_post_rerank", _fake_post) + scores, meta = backend.score_with_meta_topn(query="q", docs=["d1", "d2", "d3", "d4"], top_n=2) + + assert called["count"] == 2 + assert scores == [0.9, 0.0, 0.8, 0.0] + assert meta["batches"] == 2 + assert meta["top_n"] == 2 + + +def test_dashscope_backend_batchsize_raises_when_one_batch_fails(monkeypatch): + monkeypatch.setenv("TEST_RERANK_DASHSCOPE_API_KEY", "test-key") + backend = DashScopeRerankBackend( + { + "model_name": "qwen3-rerank", + "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", + "top_n_cap": 0, + "batchsize": 2, + } + ) + + def _fake_post(query: str, docs: list[str], top_n: int): + if docs == ["d3", "d4"]: + raise RuntimeError("provider temporary error") + return { + "results": [ + {"index": i, "relevance_score": 0.1} + for i, _ in enumerate(docs) + ] + } + + monkeypatch.setattr(backend, "_post_rerank", _fake_post) + + with pytest.raises(RuntimeError, match="DashScope rerank batch failed"): + backend.score_with_meta(query="q", docs=["d1", "d2", "d3", "d4"]) + + +def test_dashscope_backend_requires_api_key_env(): + with pytest.raises(ValueError, match="api_key_env is required"): + DashScopeRerankBackend( + { + "model_name": "qwen3-rerank", + "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", + "top_n_cap": 0, + } + ) + + +def test_dashscope_backend_requires_api_key_env_value(monkeypatch): + monkeypatch.delenv("TEST_RERANK_DASHSCOPE_API_KEY", raising=False) + with pytest.raises(ValueError, match="set env TEST_RERANK_DASHSCOPE_API_KEY"): + DashScopeRerankBackend( + { + "model_name": "qwen3-rerank", + "endpoint": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", + "api_key_env": "TEST_RERANK_DASHSCOPE_API_KEY", + "top_n_cap": 0, + } + ) -- libgit2 0.21.2