scripts/evaluation/eval_framework/reports.py

"""Markdown and text reports for batch evaluation."""
from __future__ import annotations
from typing import Any, Dict
from .constants import RELEVANCE_EXACT, RELEVANCE_HIGH, RELEVANCE_IRRELEVANT, RELEVANCE_LOW
def render_batch_report_markdown(payload: Dict[str, Any]) -> str:
    lines = [
        "# Search Batch Evaluation",
        "",
        f"- Batch ID: {payload['batch_id']}",
        f"- Created at: {payload['created_at']}",
        f"- Tenant ID: {payload['tenant_id']}",
        f"- Query count: {len(payload['queries'])}",
        f"- Top K: {payload['top_k']}",
        "",
        "## Aggregate Metrics",
        "",
    ]
    for key, value in sorted((payload.get("aggregate_metrics") or {}).items()):
        lines.append(f"- {key}: {value}")
    distribution = payload.get("aggregate_distribution") or {}
    if distribution:
        lines.extend(
            [
                "",
                "## Label Distribution",
                "",
                f"- Exact Match: {distribution.get(RELEVANCE_EXACT, 0)}",
                f"- High Relevant: {distribution.get(RELEVANCE_HIGH, 0)}",
                f"- Low Relevant: {distribution.get(RELEVANCE_LOW, 0)}",
                f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}",
            ]
        )
    lines.extend(["", "## Per Query", ""])
    for item in payload.get("per_query") or []:
        lines.append(f"### {item['query']}")
        lines.append("")
        for key, value in sorted((item.get("metrics") or {}).items()):
            lines.append(f"- {key}: {value}")
        distribution = item.get("distribution") or {}
        lines.append(f"- Exact Match: {distribution.get(RELEVANCE_EXACT, 0)}")
        lines.append(f"- High Relevant: {distribution.get(RELEVANCE_HIGH, 0)}")
        lines.append(f"- Low Relevant: {distribution.get(RELEVANCE_LOW, 0)}")
        lines.append(f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}")
        lines.append("")
    return "\n".join(lines)