eval_web.js 13.3 KB
async function fetchJSON(url, options) {
  const res = await fetch(url, options);
  if (!res.ok) throw new Error(await res.text());
  return await res.json();
}

let _datasets = [];
let _currentDatasetId = "";

function fmtNumber(value, digits = 3) {
  if (value == null || Number.isNaN(Number(value))) return "-";
  return Number(value).toFixed(digits);
}

function metricColumns(metrics) {
  const defs = [
    {
      title: "Primary",
      keys: [
        "Primary_Metric_Score",
        "NDCG@20",
        "NDCG@50",
        "ERR@10",
        "Strong_Precision@10",
        "Strong_Precision@20",
        "Useful_Precision@50",
        "Avg_Grade@10",
        "Gain_Recall@20",
      ],
    },
    { title: "NDCG", keys: ["NDCG@5", "NDCG@10", "NDCG@20", "NDCG@50"] },
    { title: "ERR", keys: ["ERR@5", "ERR@10", "ERR@20", "ERR@50"] },
    {
      title: "Top slot",
      keys: [
        "Exact_Precision@5",
        "Exact_Precision@10",
        "Strong_Precision@5",
        "Strong_Precision@10",
        "Strong_Precision@20",
      ],
    },
    {
      title: "Recall",
      keys: [
        "Useful_Precision@10",
        "Useful_Precision@20",
        "Useful_Precision@50",
        "Gain_Recall@10",
        "Gain_Recall@20",
        "Gain_Recall@50",
      ],
    },
    {
      title: "First good",
      keys: [
        "Exact_Success@5",
        "Exact_Success@10",
        "Strong_Success@5",
        "Strong_Success@10",
        "MRR_Exact@10",
        "MRR_Strong@10",
        "Avg_Grade@10",
      ],
    },
  ];
  const seen = new Set();
  const columns = defs
    .map((col) => {
      const rows = col.keys
        .filter((key) => metrics && Object.prototype.hasOwnProperty.call(metrics, key))
        .map((key) => {
          seen.add(key);
          return [key, metrics[key]];
        });
      return { title: col.title, rows };
    })
    .filter((col) => col.rows.length);
  const rest = Object.keys(metrics || {})
    .filter((key) => !seen.has(key))
    .sort()
    .map((key) => [key, metrics[key]]);
  if (rest.length) columns.push({ title: "Other", rows: rest });
  return columns;
}

function renderMetrics(metrics, metricContext) {
  const root = document.getElementById("metrics");
  root.innerHTML = "";
  const ctx = document.getElementById("metricContext");
  const parts = [];
  if (metricContext && metricContext.primary_metric) {
    parts.push(`Primary: ${metricContext.primary_metric}`);
  }
  if (metricContext && Array.isArray(metricContext.primary_metrics) && metricContext.primary_metrics.length) {
    parts.push(`Primary metrics: ${metricContext.primary_metrics.join(", ")}`);
  }
  if (metricContext && metricContext.primary_metric_formula) {
    parts.push(metricContext.primary_metric_formula);
  }
  if (metricContext && metricContext.gain_scheme) {
    parts.push(
      `NDCG gains: ${Object.entries(metricContext.gain_scheme)
        .map(([label, gain]) => `${label}=${gain}`)
        .join(", ")}`
    );
  }
  if (metricContext && metricContext.stop_prob_scheme) {
    parts.push(
      `ERR P(stop): ${Object.entries(metricContext.stop_prob_scheme)
        .map(([label, p]) => `${label}=${p}`)
        .join(", ")}`
    );
  }
  ctx.textContent = parts.length ? `${parts.join(". ")}.` : "";

  const bar = document.createElement("div");
  bar.className = "metrics-columns";
  metricColumns(metrics || {}).forEach((col) => {
    const column = document.createElement("div");
    column.className = "metric-column";
    const h = document.createElement("h4");
    h.className = "metric-column-title";
    h.textContent = col.title;
    column.appendChild(h);
    col.rows.forEach(([key, value]) => {
      const row = document.createElement("div");
      row.className = "metric-row";
      row.innerHTML = `<span class="metric-row-name">${key}:</span> <span class="metric-row-value">${fmtNumber(value)}</span>`;
      column.appendChild(row);
    });
    bar.appendChild(column);
  });
  root.appendChild(bar);
}

function labelBadgeClass(label) {
  if (!label || label === "Unknown") return "badge-unknown";
  return "label-" + String(label).toLowerCase().replace(/\s+/g, "-");
}

function renderResults(results, rootId = "results", showRank = true) {
  const mount = document.getElementById(rootId);
  mount.innerHTML = "";
  (results || []).forEach((item) => {
    const label = item.label || "Unknown";
    const box = document.createElement("div");
    box.className = "result";
    box.innerHTML = `
      <div><span class="badge ${labelBadgeClass(label)}">${label}</span><div class="muted" style="margin-top:8px">${showRank ? `#${item.rank || "-"}` : (item.rerank_score != null ? `rerank=${item.rerank_score.toFixed ? item.rerank_score.toFixed(4) : item.rerank_score}` : "not recalled")}</div></div>
      <img class="thumb" src="${item.image_url || ""}" alt="" />
      <div>
        <div class="title">${item.title || ""}</div>
        ${item.title_zh ? `<div class="title-zh">${item.title_zh}</div>` : ""}
        <div class="options">
          <div>${(item.option_values || [])[0] || ""}</div>
          <div>${(item.option_values || [])[1] || ""}</div>
          <div>${(item.option_values || [])[2] || ""}</div>
        </div>
      </div>`;
    mount.appendChild(box);
  });
  if (!(results || []).length) {
    mount.innerHTML = '<div class="muted">None.</div>';
  }
}

function renderTips(data) {
  const root = document.getElementById("tips");
  const tips = [...(data.tips || [])];
  const stats = data.label_stats || {};
  tips.unshift(
    `Cached labels: ${stats.total || 0}. Recalled hits: ${stats.recalled_hits || 0}. Missed judged useful results: ${stats.missing_relevant_count || 0} (Exact ${stats.missing_exact_count || 0}, High ${stats.missing_high_count || 0}, Low ${stats.missing_low_count || 0}).`
  );
  root.innerHTML = tips.map((text) => `<div class="tip">${text}</div>`).join("");
}

async function loadQueries() {
  if (!_currentDatasetId) return;
  const data = await fetchJSON("/api/datasets/" + encodeURIComponent(_currentDatasetId) + "/queries");
  const root = document.getElementById("queryList");
  root.innerHTML = "";
  const dataset = data.dataset || {};
  document.getElementById("queriesMeta").innerHTML = `Loaded from <code>${dataset.query_file || ""}</code>`;
  document.getElementById("datasetMeta").textContent =
    `${dataset.display_name || dataset.dataset_id || ""} · ${dataset.query_count || 0} queries`;
  document.getElementById("pageSubtitle").textContent =
    `Current dataset: ${dataset.display_name || dataset.dataset_id || ""}. Single-query evaluation and batch evaluation share the same service on port 6010.`;
  document.getElementById("batchButton").textContent =
    `Batch Evaluation: ${dataset.display_name || dataset.dataset_id || ""}`;
  data.queries.forEach((query) => {
    const btn = document.createElement("button");
    btn.className = "query-item";
    btn.textContent = query;
    btn.onclick = () => {
      document.getElementById("queryInput").value = query;
      runSingle();
    };
    root.appendChild(btn);
  });
}

async function loadDatasets() {
  const data = await fetchJSON("/api/datasets");
  _datasets = data.datasets || [];
  if (!_currentDatasetId) _currentDatasetId = data.current_dataset_id || (_datasets[0] && _datasets[0].dataset_id) || "";
  const select = document.getElementById("datasetSelect");
  select.innerHTML = "";
  _datasets.forEach((dataset) => {
    const opt = document.createElement("option");
    opt.value = dataset.dataset_id;
    opt.textContent = `${dataset.display_name || dataset.dataset_id} (${dataset.query_count || 0})`;
    if (dataset.dataset_id === _currentDatasetId) opt.selected = true;
    select.appendChild(opt);
  });
  select.onchange = async (ev) => {
    _currentDatasetId = ev.target.value;
    await loadQueries();
    await loadHistory();
  };
}

function historySummaryHtml(meta) {
  const m = meta && meta.aggregate_metrics;
  const nq = (meta && meta.query_count) || (meta && meta.queries && meta.queries.length) || (meta && meta.per_query && meta.per_query.length) || null;
  const parts = [];
  if (nq != null) parts.push(`<span>Queries</span> ${nq}`);
  if (m && m["Primary_Metric_Score"] != null) parts.push(`<span>Primary</span> ${fmtNumber(m["Primary_Metric_Score"])}`);
  if (m && m["NDCG@20"] != null) parts.push(`<span>NDCG@20</span> ${fmtNumber(m["NDCG@20"])}`);
  if (m && m["ERR@10"] != null) parts.push(`<span>ERR@10</span> ${fmtNumber(m["ERR@10"])}`);
  if (m && m["Strong_Precision@10"] != null) parts.push(`<span>Strong@10</span> ${fmtNumber(m["Strong_Precision@10"])}`);
  if (m && m["Gain_Recall@20"] != null) parts.push(`<span>Gain Recall@20</span> ${fmtNumber(m["Gain_Recall@20"])}`);
  if (!parts.length) return "";
  return `<div class="hstats">${parts.join(" · ")}</div>`;
}

async function loadHistory() {
  if (!_currentDatasetId) return;
  const data = await fetchJSON("/api/history?dataset_id=" + encodeURIComponent(_currentDatasetId));
  const root = document.getElementById("history");
  root.classList.remove("muted");
  const items = data.history || [];
  if (!items.length) {
    root.innerHTML = '<span class="muted">No history yet.</span>';
    return;
  }
  root.innerHTML = `<div class="history-list"></div>`;
  const list = root.querySelector(".history-list");
  items.forEach((item) => {
    const btn = document.createElement("button");
    btn.type = "button";
    btn.className = "history-item";
    btn.setAttribute("aria-label", `Open report ${item.batch_id}`);
    const sum = historySummaryHtml(item.metadata);
    const dataset = (item.metadata && item.metadata.dataset) || {};
    const datasetName = dataset.display_name || dataset.dataset_id || item.dataset_id || "";
    btn.innerHTML = `<div class="hid">${item.batch_id}</div>
      <div class="hmeta">${item.created_at} · tenant ${item.tenant_id}${datasetName ? ` · ${datasetName}` : ""}</div>${sum}`;
    btn.onclick = () => openBatchReport(item.batch_id);
    list.appendChild(btn);
  });
}

let _lastReportPath = "";

function closeReportModal() {
  const el = document.getElementById("reportModal");
  el.classList.remove("is-open");
  el.setAttribute("aria-hidden", "true");
  document.getElementById("reportModalBody").innerHTML = "";
  document.getElementById("reportModalMeta").textContent = "";
}

async function openBatchReport(batchId) {
  const el = document.getElementById("reportModal");
  const body = document.getElementById("reportModalBody");
  const metaEl = document.getElementById("reportModalMeta");
  const titleEl = document.getElementById("reportModalTitle");
  el.classList.add("is-open");
  el.setAttribute("aria-hidden", "false");
  titleEl.textContent = batchId;
  metaEl.textContent = "";
  body.className = "report-modal-body batch-report-md report-modal-loading";
  body.textContent = "Loading report…";
  try {
    const rep = await fetchJSON("/api/history/" + encodeURIComponent(batchId) + "/report");
    _lastReportPath = rep.report_markdown_path || "";
    const dataset = rep.dataset || {};
    metaEl.textContent = [dataset.display_name || dataset.dataset_id || "", rep.report_markdown_path || ""]
      .filter(Boolean)
      .join(" · ");
    const raw = marked.parse(rep.markdown || "", { gfm: true });
    const safe = DOMPurify.sanitize(raw, { USE_PROFILES: { html: true } });
    body.className = "report-modal-body batch-report-md";
    body.innerHTML = safe;
  } catch (e) {
    body.className = "report-modal-body report-modal-error";
    body.textContent = e && e.message ? e.message : String(e);
  }
}

document.getElementById("reportModal").addEventListener("click", (ev) => {
  if (ev.target && ev.target.getAttribute("data-close-report") === "1") closeReportModal();
});

document.addEventListener("keydown", (ev) => {
  if (ev.key === "Escape") closeReportModal();
});

document.getElementById("reportCopyPath").addEventListener("click", async () => {
  if (!_lastReportPath) return;
  try {
    await navigator.clipboard.writeText(_lastReportPath);
  } catch (_) {}
});

async function runSingle() {
  const query = document.getElementById("queryInput").value.trim();
  if (!query) return;
  document.getElementById("status").textContent = `Evaluating "${query}" on ${_currentDatasetId}...`;
  const data = await fetchJSON("/api/search-eval", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ query, dataset_id: _currentDatasetId, top_k: 100, auto_annotate: false }),
  });
  document.getElementById("status").textContent = `Done. total=${data.total}`;
  renderMetrics(data.metrics, data.metric_context);
  renderResults(data.results, "results", true);
  renderResults(data.missing_relevant, "missingRelevant", false);
  renderTips(data);
  loadHistory();
}

async function runBatch() {
  document.getElementById("status").textContent = `Running batch evaluation for ${_currentDatasetId}...`;
  const data = await fetchJSON("/api/batch-eval", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ dataset_id: _currentDatasetId, top_k: 100, auto_annotate: false }),
  });
  document.getElementById("status").textContent = `Batch done. report=${data.batch_id}`;
  renderMetrics(data.aggregate_metrics, data.metric_context);
  renderResults([], "results", true);
  renderResults([], "missingRelevant", false);
  document.getElementById("tips").innerHTML =
    '<div class="tip">Batch evaluation uses cached labels only unless force refresh is requested via CLI/API.</div>';
  loadHistory();
}

loadDatasets().then(() => loadQueries()).then(() => loadHistory());