async function fetchJSON(url, options) { const res = await fetch(url, options); if (!res.ok) throw new Error(await res.text()); return await res.json(); } let _datasets = []; let _currentDatasetId = ""; function fmtNumber(value, digits = 3) { if (value == null || Number.isNaN(Number(value))) return "-"; return Number(value).toFixed(digits); } function metricColumns(metrics) { const defs = [ { title: "Primary", keys: [ "Primary_Metric_Score", "NDCG@20", "NDCG@50", "ERR@10", "Strong_Precision@10", "Strong_Precision@20", "Useful_Precision@50", "Avg_Grade@10", "Gain_Recall@20", ], }, { title: "NDCG", keys: ["NDCG@5", "NDCG@10", "NDCG@20", "NDCG@50"] }, { title: "ERR", keys: ["ERR@5", "ERR@10", "ERR@20", "ERR@50"] }, { title: "Top slot", keys: [ "Exact_Precision@5", "Exact_Precision@10", "Strong_Precision@5", "Strong_Precision@10", "Strong_Precision@20", ], }, { title: "Recall", keys: [ "Useful_Precision@10", "Useful_Precision@20", "Useful_Precision@50", "Gain_Recall@10", "Gain_Recall@20", "Gain_Recall@50", ], }, { title: "First good", keys: [ "Exact_Success@5", "Exact_Success@10", "Strong_Success@5", "Strong_Success@10", "MRR_Exact@10", "MRR_Strong@10", "Avg_Grade@10", ], }, ]; const seen = new Set(); const columns = defs .map((col) => { const rows = col.keys .filter((key) => metrics && Object.prototype.hasOwnProperty.call(metrics, key)) .map((key) => { seen.add(key); return [key, metrics[key]]; }); return { title: col.title, rows }; }) .filter((col) => col.rows.length); const rest = Object.keys(metrics || {}) .filter((key) => !seen.has(key)) .sort() .map((key) => [key, metrics[key]]); if (rest.length) columns.push({ title: "Other", rows: rest }); return columns; } function renderMetrics(metrics, metricContext) { const root = document.getElementById("metrics"); root.innerHTML = ""; const ctx = document.getElementById("metricContext"); const parts = []; if (metricContext && metricContext.primary_metric) { parts.push(`Primary: ${metricContext.primary_metric}`); } if (metricContext && Array.isArray(metricContext.primary_metrics) && metricContext.primary_metrics.length) { parts.push(`Primary metrics: ${metricContext.primary_metrics.join(", ")}`); } if (metricContext && metricContext.primary_metric_formula) { parts.push(metricContext.primary_metric_formula); } if (metricContext && metricContext.gain_scheme) { parts.push( `NDCG gains: ${Object.entries(metricContext.gain_scheme) .map(([label, gain]) => `${label}=${gain}`) .join(", ")}` ); } if (metricContext && metricContext.stop_prob_scheme) { parts.push( `ERR P(stop): ${Object.entries(metricContext.stop_prob_scheme) .map(([label, p]) => `${label}=${p}`) .join(", ")}` ); } ctx.textContent = parts.length ? `${parts.join(". ")}.` : ""; const bar = document.createElement("div"); bar.className = "metrics-columns"; metricColumns(metrics || {}).forEach((col) => { const column = document.createElement("div"); column.className = "metric-column"; const h = document.createElement("h4"); h.className = "metric-column-title"; h.textContent = col.title; column.appendChild(h); col.rows.forEach(([key, value]) => { const row = document.createElement("div"); row.className = "metric-row"; row.innerHTML = `${key}: ${fmtNumber(value)}`; column.appendChild(row); }); bar.appendChild(column); }); root.appendChild(bar); } function labelBadgeClass(label) { if (!label || label === "Unknown") return "badge-unknown"; return "label-" + String(label).toLowerCase().replace(/\s+/g, "-"); } function renderResults(results, rootId = "results", showRank = true) { const mount = document.getElementById(rootId); mount.innerHTML = ""; (results || []).forEach((item) => { const label = item.label || "Unknown"; const box = document.createElement("div"); box.className = "result"; box.innerHTML = `
${label}
${showRank ? `#${item.rank || "-"}` : (item.rerank_score != null ? `rerank=${item.rerank_score.toFixed ? item.rerank_score.toFixed(4) : item.rerank_score}` : "not recalled")}
${item.title || ""}
${item.title_zh ? `
${item.title_zh}
` : ""}
${(item.option_values || [])[0] || ""}
${(item.option_values || [])[1] || ""}
${(item.option_values || [])[2] || ""}
`; mount.appendChild(box); }); if (!(results || []).length) { mount.innerHTML = '
None.
'; } } function renderTips(data) { const root = document.getElementById("tips"); const tips = [...(data.tips || [])]; const stats = data.label_stats || {}; tips.unshift( `Cached labels: ${stats.total || 0}. Recalled hits: ${stats.recalled_hits || 0}. Missed judged useful results: ${stats.missing_relevant_count || 0} (Exact ${stats.missing_exact_count || 0}, High ${stats.missing_high_count || 0}, Low ${stats.missing_low_count || 0}).` ); root.innerHTML = tips.map((text) => `
${text}
`).join(""); } async function loadQueries() { if (!_currentDatasetId) return; const data = await fetchJSON("/api/datasets/" + encodeURIComponent(_currentDatasetId) + "/queries"); const root = document.getElementById("queryList"); root.innerHTML = ""; const dataset = data.dataset || {}; document.getElementById("queriesMeta").innerHTML = `Loaded from ${dataset.query_file || ""}`; document.getElementById("datasetMeta").textContent = `${dataset.display_name || dataset.dataset_id || ""} · ${dataset.query_count || 0} queries`; document.getElementById("pageSubtitle").textContent = `Current dataset: ${dataset.display_name || dataset.dataset_id || ""}. Single-query evaluation and batch evaluation share the same service on port 6010.`; document.getElementById("batchButton").textContent = `Batch Evaluation: ${dataset.display_name || dataset.dataset_id || ""}`; data.queries.forEach((query) => { const btn = document.createElement("button"); btn.className = "query-item"; btn.textContent = query; btn.onclick = () => { document.getElementById("queryInput").value = query; runSingle(); }; root.appendChild(btn); }); } async function loadDatasets() { const data = await fetchJSON("/api/datasets"); _datasets = data.datasets || []; if (!_currentDatasetId) _currentDatasetId = data.current_dataset_id || (_datasets[0] && _datasets[0].dataset_id) || ""; const select = document.getElementById("datasetSelect"); select.innerHTML = ""; _datasets.forEach((dataset) => { const opt = document.createElement("option"); opt.value = dataset.dataset_id; opt.textContent = `${dataset.display_name || dataset.dataset_id} (${dataset.query_count || 0})`; if (dataset.dataset_id === _currentDatasetId) opt.selected = true; select.appendChild(opt); }); select.onchange = async (ev) => { _currentDatasetId = ev.target.value; await loadQueries(); await loadHistory(); }; } function historySummaryHtml(meta) { const m = meta && meta.aggregate_metrics; const nq = (meta && meta.query_count) || (meta && meta.queries && meta.queries.length) || (meta && meta.per_query && meta.per_query.length) || null; const parts = []; if (nq != null) parts.push(`Queries ${nq}`); if (m && m["Primary_Metric_Score"] != null) parts.push(`Primary ${fmtNumber(m["Primary_Metric_Score"])}`); if (m && m["NDCG@20"] != null) parts.push(`NDCG@20 ${fmtNumber(m["NDCG@20"])}`); if (m && m["ERR@10"] != null) parts.push(`ERR@10 ${fmtNumber(m["ERR@10"])}`); if (m && m["Strong_Precision@10"] != null) parts.push(`Strong@10 ${fmtNumber(m["Strong_Precision@10"])}`); if (m && m["Gain_Recall@20"] != null) parts.push(`Gain Recall@20 ${fmtNumber(m["Gain_Recall@20"])}`); if (!parts.length) return ""; return `
${parts.join(" · ")}
`; } async function loadHistory() { if (!_currentDatasetId) return; const data = await fetchJSON("/api/history?dataset_id=" + encodeURIComponent(_currentDatasetId)); const root = document.getElementById("history"); root.classList.remove("muted"); const items = data.history || []; if (!items.length) { root.innerHTML = 'No history yet.'; return; } root.innerHTML = `
`; const list = root.querySelector(".history-list"); items.forEach((item) => { const btn = document.createElement("button"); btn.type = "button"; btn.className = "history-item"; btn.setAttribute("aria-label", `Open report ${item.batch_id}`); const sum = historySummaryHtml(item.metadata); const dataset = (item.metadata && item.metadata.dataset) || {}; const datasetName = dataset.display_name || dataset.dataset_id || item.dataset_id || ""; btn.innerHTML = `
${item.batch_id}
${item.created_at} · tenant ${item.tenant_id}${datasetName ? ` · ${datasetName}` : ""}
${sum}`; btn.onclick = () => openBatchReport(item.batch_id); list.appendChild(btn); }); } let _lastReportPath = ""; function closeReportModal() { const el = document.getElementById("reportModal"); el.classList.remove("is-open"); el.setAttribute("aria-hidden", "true"); document.getElementById("reportModalBody").innerHTML = ""; document.getElementById("reportModalMeta").textContent = ""; } async function openBatchReport(batchId) { const el = document.getElementById("reportModal"); const body = document.getElementById("reportModalBody"); const metaEl = document.getElementById("reportModalMeta"); const titleEl = document.getElementById("reportModalTitle"); el.classList.add("is-open"); el.setAttribute("aria-hidden", "false"); titleEl.textContent = batchId; metaEl.textContent = ""; body.className = "report-modal-body batch-report-md report-modal-loading"; body.textContent = "Loading report…"; try { const rep = await fetchJSON("/api/history/" + encodeURIComponent(batchId) + "/report"); _lastReportPath = rep.report_markdown_path || ""; const dataset = rep.dataset || {}; metaEl.textContent = [dataset.display_name || dataset.dataset_id || "", rep.report_markdown_path || ""] .filter(Boolean) .join(" · "); const raw = marked.parse(rep.markdown || "", { gfm: true }); const safe = DOMPurify.sanitize(raw, { USE_PROFILES: { html: true } }); body.className = "report-modal-body batch-report-md"; body.innerHTML = safe; } catch (e) { body.className = "report-modal-body report-modal-error"; body.textContent = e && e.message ? e.message : String(e); } } document.getElementById("reportModal").addEventListener("click", (ev) => { if (ev.target && ev.target.getAttribute("data-close-report") === "1") closeReportModal(); }); document.addEventListener("keydown", (ev) => { if (ev.key === "Escape") closeReportModal(); }); document.getElementById("reportCopyPath").addEventListener("click", async () => { if (!_lastReportPath) return; try { await navigator.clipboard.writeText(_lastReportPath); } catch (_) {} }); async function runSingle() { const query = document.getElementById("queryInput").value.trim(); if (!query) return; document.getElementById("status").textContent = `Evaluating "${query}" on ${_currentDatasetId}...`; const data = await fetchJSON("/api/search-eval", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ query, dataset_id: _currentDatasetId, top_k: 100, auto_annotate: false }), }); document.getElementById("status").textContent = `Done. total=${data.total}`; renderMetrics(data.metrics, data.metric_context); renderResults(data.results, "results", true); renderResults(data.missing_relevant, "missingRelevant", false); renderTips(data); loadHistory(); } async function runBatch() { document.getElementById("status").textContent = `Running batch evaluation for ${_currentDatasetId}...`; const data = await fetchJSON("/api/batch-eval", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ dataset_id: _currentDatasetId, top_k: 100, auto_annotate: false }), }); document.getElementById("status").textContent = `Batch done. report=${data.batch_id}`; renderMetrics(data.aggregate_metrics, data.metric_context); renderResults([], "results", true); renderResults([], "missingRelevant", false); document.getElementById("tips").innerHTML = '
Batch evaluation uses cached labels only unless force refresh is requested via CLI/API.
'; loadHistory(); } loadDatasets().then(() => loadQueries()).then(() => loadHistory());