Commit a3734f13ec48c457670d4cd68fe3e865bc0a18fe
1 parent
a345b01f
eval任务 美国地区不支持batch调用,改为在线调用
Showing
2 changed files
with
15 additions
and
4 deletions
Show diff stats
scripts/evaluation/eval_framework/clients.py
| ... | ... | @@ -74,6 +74,10 @@ class DashScopeLabelClient: |
| 74 | 74 | """DashScope OpenAI-compatible chat: synchronous or Batch File API (JSONL job). |
| 75 | 75 | |
| 76 | 76 | Batch flow: https://help.aliyun.com/zh/model-studio/batch-interfaces-compatible-with-openai/ |
| 77 | + | |
| 78 | + Some regional endpoints (e.g. ``dashscope-us`` compatible-mode) do not implement ``/batches``; | |
| 79 | + on HTTP 404 from batch calls we fall back to synchronous ``/chat/completions`` and stop using batch | |
| 80 | + for subsequent requests on this client. | |
| 77 | 81 | """ |
| 78 | 82 | |
| 79 | 83 | def __init__( |
| ... | ... | @@ -86,7 +90,7 @@ class DashScopeLabelClient: |
| 86 | 90 | batch_completion_window: str = "24h", |
| 87 | 91 | batch_poll_interval_sec: float = 10.0, |
| 88 | 92 | enable_thinking: bool = True, |
| 89 | - use_batch: bool = True, | |
| 93 | + use_batch: bool = False, | |
| 90 | 94 | ): |
| 91 | 95 | self.model = model |
| 92 | 96 | self.base_url = base_url.rstrip("/") |
| ... | ... | @@ -205,9 +209,16 @@ class DashScopeLabelClient: |
| 205 | 209 | return content, safe_json_dumps(row) |
| 206 | 210 | |
| 207 | 211 | def _chat(self, prompt: str) -> Tuple[str, str]: |
| 208 | - if self.use_batch: | |
| 212 | + if not self.use_batch: | |
| 213 | + return self._chat_sync(prompt) | |
| 214 | + try: | |
| 209 | 215 | return self._chat_batch(prompt) |
| 210 | - return self._chat_sync(prompt) | |
| 216 | + except requests.exceptions.HTTPError as e: | |
| 217 | + resp = getattr(e, "response", None) | |
| 218 | + if resp is not None and resp.status_code == 404: | |
| 219 | + self.use_batch = False | |
| 220 | + return self._chat_sync(prompt) | |
| 221 | + raise | |
| 211 | 222 | |
| 212 | 223 | def _find_batch_line_for_custom_id( |
| 213 | 224 | self, | ... | ... |
scripts/evaluation/eval_framework/constants.py
| ... | ... | @@ -37,7 +37,7 @@ DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt" |
| 37 | 37 | # Judge LLM (eval_framework only; override via CLI --judge-model / constructor kwargs) |
| 38 | 38 | DEFAULT_JUDGE_MODEL = "qwen3.5-flash" |
| 39 | 39 | DEFAULT_JUDGE_ENABLE_THINKING = True |
| 40 | -DEFAULT_JUDGE_DASHSCOPE_BATCH = True | |
| 40 | +DEFAULT_JUDGE_DASHSCOPE_BATCH = False | |
| 41 | 41 | DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW = "24h" |
| 42 | 42 | DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC = 10.0 |
| 43 | 43 | ... | ... |