test_eval_framework_clients.py 2.35 KB
import pytest
import requests

from scripts.evaluation.eval_framework.clients import DashScopeLabelClient
from scripts.evaluation.eval_framework.utils import build_label_doc_line


def _http_error(status_code: int, body: str) -> requests.exceptions.HTTPError:
    response = requests.Response()
    response.status_code = status_code
    response._content = body.encode("utf-8")
    response.url = "https://dashscope-us.aliyuncs.com/compatible-mode/v1/chat/completions"
    return requests.exceptions.HTTPError(f"{status_code} error", response=response)


def test_build_label_doc_line_truncates_long_fields():
    doc = {
        "title": {"en": "Minimalist Top " * 40},
        "skus": [
            {
                "option1_value": "Very Long Color Name " * 10,
                "option2_value": "Very Long Size Name " * 10,
                "option3_value": "Very Long Material Name " * 10,
            }
        ],
    }

    line = build_label_doc_line(1, doc)

    assert line.startswith("1. ")
    assert len(line) <= 260
    assert line.endswith("...")


def test_dashscope_chat_does_not_retry_non_transient_400(monkeypatch):
    client = DashScopeLabelClient(model="qwen3.5-plus", base_url="https://example.com", api_key="test")
    client.retry_attempts = 4
    client.retry_delay_sec = 0
    calls = {"count": 0}

    def fake_chat_sync(prompt: str):
        calls["count"] += 1
        raise _http_error(400, '{"code":"InvalidParameter"}')

    monkeypatch.setattr(client, "_chat_sync", fake_chat_sync)

    with pytest.raises(requests.exceptions.HTTPError):
        client._chat("prompt", phase="relevance_classify")

    assert calls["count"] == 1


def test_dashscope_chat_retries_transient_500(monkeypatch):
    client = DashScopeLabelClient(model="qwen3.5-plus", base_url="https://example.com", api_key="test")
    client.retry_attempts = 4
    client.retry_delay_sec = 0
    calls = {"count": 0}

    def fake_chat_sync(prompt: str):
        calls["count"] += 1
        if calls["count"] < 3:
            raise _http_error(500, '{"code":"InternalError"}')
        return "Fully Relevant", '{"choices":[{"message":{"content":"Fully Relevant"}}]}'

    monkeypatch.setattr(client, "_chat_sync", fake_chat_sync)

    content, raw = client._chat("prompt", phase="relevance_classify")

    assert content == "Fully Relevant"
    assert "Fully Relevant" in raw
    assert calls["count"] == 3