test_service_api_contracts.py 8.53 KB
from __future__ import annotations

from types import SimpleNamespace
from typing import Any, Dict, List

import numpy as np
import pytest
from fastapi.testclient import TestClient


class _FakeSearcher:
    def search(self, **kwargs):
        return SimpleNamespace(
            results=[
                {
                    "spu_id": "spu-1",
                    "title": "测试商品",
                    "price": 99.0,
                    "currency": "USD",
                    "in_stock": True,
                    "skus": [],
                    "relevance_score": 1.2,
                }
            ],
            total=1,
            max_score=1.2,
            took_ms=8,
            facets=[],
            query_info={"normalized_query": kwargs.get("query", "")},
            suggestions=[],
            related_searches=[],
            debug_info=None,
        )

    def search_by_image(self, **kwargs):
        return self.search(**kwargs)


class _FakeSuggestionService:
    def search(self, **kwargs):
        return {
            "query": kwargs["query"],
            "language": kwargs.get("language", "en"),
            "resolved_language": kwargs.get("language", "en"),
            "suggestions": [{"text": "iphone 15", "score": 1.0}],
            "took_ms": 3,
        }


@pytest.fixture
def search_client(monkeypatch):
    import api.app as search_app

    monkeypatch.setattr(search_app, "init_service", lambda es_host="": None)
    monkeypatch.setattr(search_app, "get_searcher", lambda: _FakeSearcher())
    monkeypatch.setattr(search_app, "get_suggestion_service", lambda: _FakeSuggestionService())

    with TestClient(search_app.app) as client:
        yield client


def test_search_api_contract(search_client: TestClient):
    response = search_client.post(
        "/search/",
        headers={"X-Tenant-ID": "162"},
        json={"query": "toy", "size": 5},
    )
    assert response.status_code == 200
    data = response.json()
    assert data["total"] == 1
    assert data["results"][0]["spu_id"] == "spu-1"


def test_image_search_api_contract(search_client: TestClient):
    response = search_client.post(
        "/search/image",
        headers={"X-Tenant-ID": "162"},
        json={"image_url": "https://example.com/a.jpg", "size": 3},
    )
    assert response.status_code == 200
    assert response.json()["results"][0]["spu_id"] == "spu-1"


def test_suggestion_api_contract(search_client: TestClient):
    response = search_client.get(
        "/search/suggestions?q=iph&size=5&language=en",
        headers={"X-Tenant-ID": "162"},
    )
    assert response.status_code == 200
    data = response.json()
    assert data["query"] == "iph"
    assert len(data["suggestions"]) == 1


class _FakeBulkService:
    def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int):
        return {
            "tenant_id": tenant_id,
            "recreate_index": recreate_index,
            "batch_size": batch_size,
            "success": True,
        }


class _FakeTransformer:
    def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options):
        return {
            "tenant_id": tenant_id,
            "spu_id": str(spu_row.get("id", "0")),
            "title": {"zh": str(spu_row.get("title", ""))},
        }


class _FakeIncrementalService:
    def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None):
        return {
            "tenant_id": tenant_id,
            "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids],
            "delete_spu_ids": [],
            "total": len(spu_ids),
            "success_count": len(spu_ids),
            "failed_count": 0,
        }

    def _get_transformer_bundle(self, tenant_id: str):
        return _FakeTransformer(), None, False


@pytest.fixture
def indexer_client(monkeypatch):
    import api.indexer_app as indexer_app
    import api.routes.indexer as indexer_routes

    monkeypatch.setattr(indexer_app, "init_indexer_service", lambda es_host="": None)
    monkeypatch.setattr(indexer_routes, "get_bulk_indexing_service", lambda: _FakeBulkService())
    monkeypatch.setattr(indexer_routes, "get_incremental_service", lambda: _FakeIncrementalService())
    monkeypatch.setattr(indexer_routes, "get_es_client", lambda: object())

    with TestClient(indexer_app.app) as client:
        yield client


def test_indexer_reindex_contract(indexer_client: TestClient):
    response = indexer_client.post(
        "/indexer/reindex",
        json={"tenant_id": "162", "batch_size": 100},
    )
    assert response.status_code == 200
    assert response.json()["success"] is True


def test_indexer_incremental_contract(indexer_client: TestClient):
    response = indexer_client.post(
        "/indexer/index",
        json={"tenant_id": "162", "spu_ids": ["1001", "1002"]},
    )
    assert response.status_code == 200
    data = response.json()
    assert data["success_count"] == 2


def test_indexer_build_docs_contract(indexer_client: TestClient):
    response = indexer_client.post(
        "/indexer/build-docs",
        json={
            "tenant_id": "162",
            "items": [{"spu": {"id": 1, "title": "T-shirt"}, "skus": [], "options": []}],
        },
    )
    assert response.status_code == 200
    data = response.json()
    assert data["success_count"] == 1
    assert data["docs"][0]["spu_id"] == "1"


class _FakeTextModel:
    def encode_batch(self, texts, batch_size=32, device="cpu"):
        return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts]


class _FakeImageModel:
    def encode_image_urls(self, urls, batch_size=8):
        return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls]


@pytest.fixture
def embedding_client():
    import embeddings.server as emb_server

    emb_server.app.router.on_startup.clear()
    emb_server._text_model = _FakeTextModel()
    emb_server._image_model = _FakeImageModel()

    with TestClient(emb_server.app) as client:
        yield client


def test_embedding_text_contract(embedding_client: TestClient):
    response = embedding_client.post("/embed/text", json=["hello", "world"])
    assert response.status_code == 200
    data = response.json()
    assert len(data) == 2
    assert len(data[0]) == 3


def test_embedding_image_contract(embedding_client: TestClient):
    response = embedding_client.post("/embed/image", json=["https://example.com/a.jpg"])
    assert response.status_code == 200
    assert len(response.json()[0]) == 3


class _FakeTranslator:
    model = "qwen"
    use_cache = True

    def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None):
        return f"{text}-{target_lang}"


@pytest.fixture
def translator_client(monkeypatch):
    import api.translator_app as translator_app

    translator_app.app.router.on_startup.clear()
    monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FakeTranslator())

    with TestClient(translator_app.app) as client:
        yield client


def test_translator_api_contract(translator_client: TestClient):
    response = translator_client.post(
        "/translate",
        json={"text": "商品名称", "target_lang": "en", "source_lang": "zh"},
    )
    assert response.status_code == 200
    assert response.json()["translated_text"] == "商品名称-en"


def test_translator_health_contract(translator_client: TestClient):
    response = translator_client.get("/health")
    assert response.status_code == 200
    assert response.json()["status"] == "healthy"


class _FakeReranker:
    _model_name = "fake-reranker"

    def score_with_meta(self, query: str, docs: List[str], normalize: bool = True):
        scores = [float(i + 1) for i in range(len(docs))]
        meta: Dict[str, Any] = {"input_docs": len(docs), "unique_docs": len(set(docs))}
        return scores, meta


@pytest.fixture
def reranker_client():
    import reranker.server as reranker_server

    reranker_server.app.router.on_startup.clear()
    reranker_server._reranker = _FakeReranker()
    reranker_server._backend_name = "fake"

    with TestClient(reranker_server.app) as client:
        yield client


def test_reranker_api_contract(reranker_client: TestClient):
    response = reranker_client.post(
        "/rerank",
        json={"query": "wireless mouse", "docs": ["doc-a", "doc-b"]},
    )
    assert response.status_code == 200
    data = response.json()
    assert data["scores"] == [1.0, 2.0]
    assert data["meta"]["input_docs"] == 2


def test_reranker_health_contract(reranker_client: TestClient):
    response = reranker_client.get("/health")
    assert response.status_code == 200
    assert response.json()["status"] == "ok"