"""Markdown and text reports for batch evaluation.""" from __future__ import annotations from typing import Any, Dict from .constants import RELEVANCE_EXACT, RELEVANCE_HIGH, RELEVANCE_IRRELEVANT, RELEVANCE_LOW from .metrics import PRIMARY_METRIC_KEYS def _append_metric_block(lines: list[str], metrics: Dict[str, Any]) -> None: primary_keys = ( "Primary_Metric_Score", *PRIMARY_METRIC_KEYS, "ERR@10", ) included = set() for key in primary_keys: if key in metrics: lines.append(f"- {key}: {metrics[key]}") included.add(key) for key, value in sorted(metrics.items()): if key in included: continue lines.append(f"- {key}: {value}") def render_batch_report_markdown(payload: Dict[str, Any]) -> str: lines = [ "# Search Batch Evaluation", "", f"- Batch ID: {payload['batch_id']}", f"- Created at: {payload['created_at']}", f"- Tenant ID: {payload['tenant_id']}", f"- Query count: {len(payload['queries'])}", f"- Top K: {payload['top_k']}", "", "## Aggregate Metrics", "", ] metric_context = payload.get("metric_context") or {} if metric_context: lines.extend( [ f"- Primary metric: {metric_context.get('primary_metric', 'N/A')}", f"- Gain scheme (NDCG): {metric_context.get('gain_scheme', {})}", f"- Stop probabilities (ERR): {metric_context.get('stop_prob_scheme', {})}", "", ] ) _append_metric_block(lines, payload.get("aggregate_metrics") or {}) distribution = payload.get("aggregate_distribution") or {} if distribution: lines.extend( [ "", "## Label Distribution", "", f"- Fully Relevant: {distribution.get(RELEVANCE_EXACT, 0)}", f"- Mostly Relevant: {distribution.get(RELEVANCE_HIGH, 0)}", f"- Weakly Relevant: {distribution.get(RELEVANCE_LOW, 0)}", f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}", ] ) lines.extend(["", "## Per Query", ""]) for item in payload.get("per_query") or []: lines.append(f"### {item['query']}") lines.append("") _append_metric_block(lines, item.get("metrics") or {}) distribution = item.get("distribution") or {} lines.append(f"- Fully Relevant: {distribution.get(RELEVANCE_EXACT, 0)}") lines.append(f"- Mostly Relevant: {distribution.get(RELEVANCE_HIGH, 0)}") lines.append(f"- Weakly Relevant: {distribution.get(RELEVANCE_LOW, 0)}") lines.append(f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}") lines.append("") return "\n".join(lines)