From a3734f13ec48c457670d4cd68fe3e865bc0a18fe Mon Sep 17 00:00:00 2001
From: tangwang <tangwang@hsyl>
Date: Wed, 1 Apr 2026 10:40:32 +0800
Subject: [PATCH] eval任务 美国地区不支持batch调用，改为在线调用

---
 scripts/evaluation/eval_framework/clients.py   | 17 ++++++++++++++---
 scripts/evaluation/eval_framework/constants.py |  2 +-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/scripts/evaluation/eval_framework/clients.py b/scripts/evaluation/eval_framework/clients.py
index 77edd20..3775638 100644
--- a/scripts/evaluation/eval_framework/clients.py
+++ b/scripts/evaluation/eval_framework/clients.py
@@ -74,6 +74,10 @@ class DashScopeLabelClient:
     """DashScope OpenAI-compatible chat: synchronous or Batch File API (JSONL job).
 
     Batch flow: https://help.aliyun.com/zh/model-studio/batch-interfaces-compatible-with-openai/
+
+    Some regional endpoints (e.g. ``dashscope-us`` compatible-mode) do not implement ``/batches``;
+    on HTTP 404 from batch calls we fall back to synchronous ``/chat/completions`` and stop using batch
+    for subsequent requests on this client.
     """
 
     def __init__(
@@ -86,7 +90,7 @@ class DashScopeLabelClient:
         batch_completion_window: str = "24h",
         batch_poll_interval_sec: float = 10.0,
         enable_thinking: bool = True,
-        use_batch: bool = True,
+        use_batch: bool = False,
     ):
         self.model = model
         self.base_url = base_url.rstrip("/")
@@ -205,9 +209,16 @@ class DashScopeLabelClient:
         return content, safe_json_dumps(row)
 
     def _chat(self, prompt: str) -> Tuple[str, str]:
-        if self.use_batch:
+        if not self.use_batch:
+            return self._chat_sync(prompt)
+        try:
             return self._chat_batch(prompt)
-        return self._chat_sync(prompt)
+        except requests.exceptions.HTTPError as e:
+            resp = getattr(e, "response", None)
+            if resp is not None and resp.status_code == 404:
+                self.use_batch = False
+                return self._chat_sync(prompt)
+            raise
 
     def _find_batch_line_for_custom_id(
         self,
diff --git a/scripts/evaluation/eval_framework/constants.py b/scripts/evaluation/eval_framework/constants.py
index 4dacc1d..395d96c 100644
--- a/scripts/evaluation/eval_framework/constants.py
+++ b/scripts/evaluation/eval_framework/constants.py
@@ -37,7 +37,7 @@ DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt"
 # Judge LLM (eval_framework only; override via CLI --judge-model / constructor kwargs)
 DEFAULT_JUDGE_MODEL = "qwen3.5-flash"
 DEFAULT_JUDGE_ENABLE_THINKING = True
-DEFAULT_JUDGE_DASHSCOPE_BATCH = True
+DEFAULT_JUDGE_DASHSCOPE_BATCH = False
 DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW = "24h"
 DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC = 10.0
 
--
libgit2 0.21.2