From a3734f13ec48c457670d4cd68fe3e865bc0a18fe Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 1 Apr 2026 10:40:32 +0800 Subject: [PATCH] eval任务 美国地区不支持batch调用,改为在线调用 --- scripts/evaluation/eval_framework/clients.py | 17 ++++++++++++++--- scripts/evaluation/eval_framework/constants.py | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts/evaluation/eval_framework/clients.py b/scripts/evaluation/eval_framework/clients.py index 77edd20..3775638 100644 --- a/scripts/evaluation/eval_framework/clients.py +++ b/scripts/evaluation/eval_framework/clients.py @@ -74,6 +74,10 @@ class DashScopeLabelClient: """DashScope OpenAI-compatible chat: synchronous or Batch File API (JSONL job). Batch flow: https://help.aliyun.com/zh/model-studio/batch-interfaces-compatible-with-openai/ + + Some regional endpoints (e.g. ``dashscope-us`` compatible-mode) do not implement ``/batches``; + on HTTP 404 from batch calls we fall back to synchronous ``/chat/completions`` and stop using batch + for subsequent requests on this client. """ def __init__( @@ -86,7 +90,7 @@ class DashScopeLabelClient: batch_completion_window: str = "24h", batch_poll_interval_sec: float = 10.0, enable_thinking: bool = True, - use_batch: bool = True, + use_batch: bool = False, ): self.model = model self.base_url = base_url.rstrip("/") @@ -205,9 +209,16 @@ class DashScopeLabelClient: return content, safe_json_dumps(row) def _chat(self, prompt: str) -> Tuple[str, str]: - if self.use_batch: + if not self.use_batch: + return self._chat_sync(prompt) + try: return self._chat_batch(prompt) - return self._chat_sync(prompt) + except requests.exceptions.HTTPError as e: + resp = getattr(e, "response", None) + if resp is not None and resp.status_code == 404: + self.use_batch = False + return self._chat_sync(prompt) + raise def _find_batch_line_for_custom_id( self, diff --git a/scripts/evaluation/eval_framework/constants.py b/scripts/evaluation/eval_framework/constants.py index 4dacc1d..395d96c 100644 --- a/scripts/evaluation/eval_framework/constants.py +++ b/scripts/evaluation/eval_framework/constants.py @@ -37,7 +37,7 @@ DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt" # Judge LLM (eval_framework only; override via CLI --judge-model / constructor kwargs) DEFAULT_JUDGE_MODEL = "qwen3.5-flash" DEFAULT_JUDGE_ENABLE_THINKING = True -DEFAULT_JUDGE_DASHSCOPE_BATCH = True +DEFAULT_JUDGE_DASHSCOPE_BATCH = False DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW = "24h" DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC = 10.0 -- libgit2 0.21.2