Blame view

scripts/evaluation/eval_framework/reports.py 1.77 KB
c81b0fc1   tangwang   scripts/evaluatio...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  """Markdown and text reports for batch evaluation."""
  
  from __future__ import annotations
  
  from typing import Any, Dict
  
  from .constants import RELEVANCE_EXACT, RELEVANCE_IRRELEVANT, RELEVANCE_PARTIAL
  
  
  def render_batch_report_markdown(payload: Dict[str, Any]) -> str:
      lines = [
          "# Search Batch Evaluation",
          "",
          f"- Batch ID: {payload['batch_id']}",
          f"- Created at: {payload['created_at']}",
          f"- Tenant ID: {payload['tenant_id']}",
          f"- Query count: {len(payload['queries'])}",
          f"- Top K: {payload['top_k']}",
          "",
          "## Aggregate Metrics",
          "",
      ]
      for key, value in sorted((payload.get("aggregate_metrics") or {}).items()):
          lines.append(f"- {key}: {value}")
      distribution = payload.get("aggregate_distribution") or {}
      if distribution:
          lines.extend(
              [
                  "",
                  "## Label Distribution",
                  "",
                  f"- Exact: {distribution.get(RELEVANCE_EXACT, 0)}",
                  f"- Partial: {distribution.get(RELEVANCE_PARTIAL, 0)}",
                  f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}",
              ]
          )
      lines.extend(["", "## Per Query", ""])
      for item in payload.get("per_query") or []:
          lines.append(f"### {item['query']}")
          lines.append("")
          for key, value in sorted((item.get("metrics") or {}).items()):
              lines.append(f"- {key}: {value}")
          distribution = item.get("distribution") or {}
          lines.append(f"- Exact: {distribution.get(RELEVANCE_EXACT, 0)}")
          lines.append(f"- Partial: {distribution.get(RELEVANCE_PARTIAL, 0)}")
          lines.append(f"- Irrelevant: {distribution.get(RELEVANCE_IRRELEVANT, 0)}")
          lines.append("")
      return "\n".join(lines)