async function fetchJSON(url, options) { const res = await fetch(url, options); if (!res.ok) throw new Error(await res.text()); return await res.json(); } let _datasets = []; let _currentDatasetId = ""; function fmtNumber(value, digits = 3) { if (value == null || Number.isNaN(Number(value))) return "-"; return Number(value).toFixed(digits); } function metricColumns(metrics) { const defs = [ { title: "Primary", keys: [ "Primary_Metric_Score", "NDCG@20", "NDCG@50", "ERR@10", "Strong_Precision@10", "Strong_Precision@20", "Useful_Precision@50", "Avg_Grade@10", "Gain_Recall@20", ], }, { title: "NDCG", keys: ["NDCG@5", "NDCG@10", "NDCG@20", "NDCG@50"] }, { title: "ERR", keys: ["ERR@5", "ERR@10", "ERR@20", "ERR@50"] }, { title: "Top slot", keys: [ "Exact_Precision@5", "Exact_Precision@10", "Strong_Precision@5", "Strong_Precision@10", "Strong_Precision@20", ], }, { title: "Recall", keys: [ "Useful_Precision@10", "Useful_Precision@20", "Useful_Precision@50", "Gain_Recall@10", "Gain_Recall@20", "Gain_Recall@50", ], }, { title: "First good", keys: [ "Exact_Success@5", "Exact_Success@10", "Strong_Success@5", "Strong_Success@10", "MRR_Exact@10", "MRR_Strong@10", "Avg_Grade@10", ], }, ]; const seen = new Set(); const columns = defs .map((col) => { const rows = col.keys .filter((key) => metrics && Object.prototype.hasOwnProperty.call(metrics, key)) .map((key) => { seen.add(key); return [key, metrics[key]]; }); return { title: col.title, rows }; }) .filter((col) => col.rows.length); const rest = Object.keys(metrics || {}) .filter((key) => !seen.has(key)) .sort() .map((key) => [key, metrics[key]]); if (rest.length) columns.push({ title: "Other", rows: rest }); return columns; } function renderMetrics(metrics, metricContext) { const root = document.getElementById("metrics"); root.innerHTML = ""; const ctx = document.getElementById("metricContext"); const parts = []; if (metricContext && metricContext.primary_metric) { parts.push(`Primary: ${metricContext.primary_metric}`); } if (metricContext && Array.isArray(metricContext.primary_metrics) && metricContext.primary_metrics.length) { parts.push(`Primary metrics: ${metricContext.primary_metrics.join(", ")}`); } if (metricContext && metricContext.primary_metric_formula) { parts.push(metricContext.primary_metric_formula); } if (metricContext && metricContext.gain_scheme) { parts.push( `NDCG gains: ${Object.entries(metricContext.gain_scheme) .map(([label, gain]) => `${label}=${gain}`) .join(", ")}` ); } if (metricContext && metricContext.stop_prob_scheme) { parts.push( `ERR P(stop): ${Object.entries(metricContext.stop_prob_scheme) .map(([label, p]) => `${label}=${p}`) .join(", ")}` ); } ctx.textContent = parts.length ? `${parts.join(". ")}.` : ""; const bar = document.createElement("div"); bar.className = "metrics-columns"; metricColumns(metrics || {}).forEach((col) => { const column = document.createElement("div"); column.className = "metric-column"; const h = document.createElement("h4"); h.className = "metric-column-title"; h.textContent = col.title; column.appendChild(h); col.rows.forEach(([key, value]) => { const row = document.createElement("div"); row.className = "metric-row"; row.innerHTML = `${key}: ${fmtNumber(value)}`; column.appendChild(row); }); bar.appendChild(column); }); root.appendChild(bar); } function labelBadgeClass(label) { if (!label || label === "Unknown") return "badge-unknown"; return "label-" + String(label).toLowerCase().replace(/\s+/g, "-"); } function renderResults(results, rootId = "results", showRank = true) { const mount = document.getElementById(rootId); mount.innerHTML = ""; (results || []).forEach((item) => { const label = item.label || "Unknown"; const box = document.createElement("div"); box.className = "result"; box.innerHTML = `
${dataset.query_file || ""}`;
document.getElementById("datasetMeta").textContent =
`${dataset.display_name || dataset.dataset_id || ""} · ${dataset.query_count || 0} queries`;
document.getElementById("pageSubtitle").textContent =
`Current dataset: ${dataset.display_name || dataset.dataset_id || ""}. Single-query evaluation and batch evaluation share the same service on port 6010.`;
document.getElementById("batchButton").textContent =
`Batch Evaluation: ${dataset.display_name || dataset.dataset_id || ""}`;
data.queries.forEach((query) => {
const btn = document.createElement("button");
btn.className = "query-item";
btn.textContent = query;
btn.onclick = () => {
document.getElementById("queryInput").value = query;
runSingle();
};
root.appendChild(btn);
});
}
async function loadDatasets() {
const data = await fetchJSON("/api/datasets");
_datasets = data.datasets || [];
if (!_currentDatasetId) _currentDatasetId = data.current_dataset_id || (_datasets[0] && _datasets[0].dataset_id) || "";
const select = document.getElementById("datasetSelect");
select.innerHTML = "";
_datasets.forEach((dataset) => {
const opt = document.createElement("option");
opt.value = dataset.dataset_id;
opt.textContent = `${dataset.display_name || dataset.dataset_id} (${dataset.query_count || 0})`;
if (dataset.dataset_id === _currentDatasetId) opt.selected = true;
select.appendChild(opt);
});
select.onchange = async (ev) => {
_currentDatasetId = ev.target.value;
await loadQueries();
await loadHistory();
};
}
function historySummaryHtml(meta) {
const m = meta && meta.aggregate_metrics;
const nq = (meta && meta.query_count) || (meta && meta.queries && meta.queries.length) || (meta && meta.per_query && meta.per_query.length) || null;
const parts = [];
if (nq != null) parts.push(`Queries ${nq}`);
if (m && m["Primary_Metric_Score"] != null) parts.push(`Primary ${fmtNumber(m["Primary_Metric_Score"])}`);
if (m && m["NDCG@20"] != null) parts.push(`NDCG@20 ${fmtNumber(m["NDCG@20"])}`);
if (m && m["ERR@10"] != null) parts.push(`ERR@10 ${fmtNumber(m["ERR@10"])}`);
if (m && m["Strong_Precision@10"] != null) parts.push(`Strong@10 ${fmtNumber(m["Strong_Precision@10"])}`);
if (m && m["Gain_Recall@20"] != null) parts.push(`Gain Recall@20 ${fmtNumber(m["Gain_Recall@20"])}`);
if (!parts.length) return "";
return `