leaderboard / index.html
msana's picture
Update index.html
98ae972 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<meta name="author" content="Dr. Mohamed Sana">
<title>GSMA Open-Telco LLM Benchmarks</title>
<!-- Tailwind + Chart.js -->
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<!-- Fonts -->
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap" rel="stylesheet">
<style>
:root{
color-scheme: light dark;
--text: #0f172a;
--bg: #f8fafc;
--card-bg: rgba(255,255,255,.75);
--border: #e2e8f0;
--header-bg: rgba(241,245,249,.85);
--sticky-bg: rgba(248,250,252,.92);
--chip-bg:#f1f5f9;
}
.dark{
--text: #e2e8f0;
--bg: #020617;
--card-bg: rgba(2,6,23,.6);
--border:#334155;
--header-bg: rgba(30,41,59,.75);
--sticky-bg: rgba(2,6,23,.92);
--chip-bg: rgba(15,23,42,.6);
}
html { font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, "Helvetica Neue", Arial, "Noto Sans"; }
body { color: var(--text); background: var(--bg); }
.card{ border-radius:1rem; box-shadow:0 10px 25px rgba(2,6,23,.08); background:var(--card-bg); backdrop-filter: blur(8px); border:1px solid var(--border); }
.btn{ display:inline-flex; align-items:center; justify-content:center; gap:.5rem; border-radius:.8rem; padding:.625rem 1rem; font-weight:700; transition: transform .05s ease; }
.btn:active{ transform: scale(.98); }
.btn-primary{ background:#4f46e5; color:#fff; } .btn-primary:hover{ background:#6366f1; }
.btn-ghost{ background:transparent; border:1px solid var(--border); } .btn-ghost:hover{ background:var(--chip-bg); }
.btn-outline{ border:1px solid #4f46e5; color:#4338ca; } .dark .btn-outline{ color:#a5b4fc; }
.btn-outline:hover{ background:#eef2ff; } .dark .btn-outline:hover{ background: rgba(30,27,75,.5); }
.input{ width:100%; border:1px solid var(--border); border-radius:.8rem; padding:.6rem .8rem; background:#fff; color:#0f172a; }
.dark .input{ background:#0b1220; color:#e2e8f0; }
.input:focus{ outline:none; box-shadow:0 0 0 2px rgba(99,102,241,.6); }
.label{ font-size:.875rem; font-weight:600; color:#334155; } .dark .label{ color:#cbd5e1; }
.tab{ padding:.6rem 1rem; border-radius:.8rem; cursor:pointer; font-weight:700; }
.tab-active{ background:#fff; border:1px solid var(--border); box-shadow:0 1px 2px rgba(0,0,0,.04); } .dark .tab-active{ background:#0b1220; }
.pill{ display:inline-flex; align-items:center; padding:.1rem .5rem; border-radius:999px; font-size:.53rem; font-weight:700; }
.metric-badge{ background:#ecfdf5; color:#065f46; } .dark .metric-badge{ background:rgba(16,185,129,.18); color:#d1fae5; }
.metric-badge-judge{ background:#e0f2fe; color:#075985; } .dark .metric-badge-judge{ background:rgba(56,189,248,.18); color:#bae6fd; }
.kpi{ color:#475569; font-size:.875rem; } .dark .kpi{ color:#94a3b8; }
/* Column widths */
/* .rank-col{ width:1.5rem; }
.provider-col{ width:4rem; }
.model-col{ width:10rem; white-space:nowrap; }
.mean-col{ width:3rem; white-space:nowrap; }
.ds-col{ min-width:9.5rem; white-space:nowrap; } */
.rank-col{ }
.provider-col{ }
.model-col{ white-space:nowrap; }
.mean-col{ white-space:nowrap; }
.ds-col{ white-space:nowrap; }
/* Sticky columns: use calc so offsets update easily */
.sticky-rank{ position: sticky; left: 0; z-index: 3; background: var(--sticky-bg); }
.sticky-provider{ position: sticky; left: 2.3rem; z-index: 3; background: var(--sticky-bg); }
.sticky-model{ position: sticky; left: calc(1.85rem + 6rem); z-index: 3; background: var(--sticky-bg); }
.sticky-mean{ position: sticky; left: calc(2.3rem + 4rem + 10rem + 0.5rem); z-index: 3; background: var(--sticky-bg); }
/* Mobile: hide rank & dataset columns; update sticky offsets */
@media (max-width: 768px){
.rank-col, .ds-col, .ds-head { display:none !important; }
.sticky-provider{ left: 0; }
.sticky-model{ left: 14rem; }
.sticky-mean{ left: calc(14rem + 18rem); }
}
/* Header background */
thead th{ background: var(--header-bg); }
/* Global gradient */
.gradient-bg{
background:
radial-gradient(1200px 600px at 20% -10%, rgba(99,102,241,.25), rgba(99,102,241,0) 60%),
radial-gradient(1200px 600px at 80% -10%, rgba(20,184,166,.2), rgba(20,184,166,0) 60%);
}
/* Scrollbar for horizontal overflow */
.scrollbar-thin::-webkit-scrollbar{ height:10px; }
.scrollbar-thin::-webkit-scrollbar-thumb{ background:#c7d2fe; border-radius:999px; }
.scrollbar-thin::-webkit-scrollbar-track{ background:transparent; }
/* Chart container: responsive height */
#chartWrap{ height: clamp(260px, 42vh, 460px); }
</style>
</head>
<body class="min-h-screen dark:bg-gray-800">
<!-- Header -->
<header class="sticky top-0 z-40 backdrop-blur dark:bg-white-800 border-b" style="border-color: var(--border)">
<div class="mx-auto max-w-7xl px-4 md:px-6 py-3 md:py-4 flex items-center justify-between">
<div class="flex items-center gap-3 md:gap-4">
<div class="h-10 w-10 rounded-xl bg-indigo-600 text-white grid place-items-center shadow-lg">📡</div>
<div>
<h1 class="text-lg md:text-xl font-extrabold tracking-tight" style="color: var(--text)">GSMA Open-Telco LLM Benchmarks</h1>
<p class="text-xs md:text-sm">Benchmarking models across telecom datasets</p>
</div>
</div>
<div class="md:flex items-center gap-2 hidden">
<button id="refreshBtn" class="btn btn-ghost" title="Refresh results">⟲ Refresh</button>
<!--button id="exportCsvBtn" class="btn btn-outline" title="Export current view to CSV">⭳ Export CSV</button-->
<button id="themeToggle" class="btn btn-ghost" title="Toggle dark mode">🌙</button>
</div>
</div>
</header>
<!-- Main -->
<main class="mx-auto max-w-7xl px-4 md:px-6 py-6 md:py-8 space-y-6 md:space-y-8">
<!-- Tabs -->
<div class="flex gap-2 md:gap-3">
<button class="tab tab-active" data-tab="leaderboard">🏆 Leaderboard</button>
<button class="tab" data-tab="submit">📤 Submit Model</button>
<button class="tab" data-tab="about">ℹ️ About</button>
</div>
<!-- Leaderboard Tab -->
<section id="tab-leaderboard" class="space-y-6 md:space-y-8">
<!-- Controls -->
<div class="grid lg:grid-cols-4 gap-4 md:gap-6">
<div class="card p-4 md:p-6 lg:col-span-2">
<div class="flex items-center justify-between">
<h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">Select Datasets</h2>
<div class="flex gap-2">
<button id="selectAllBtn" class="btn btn-ghost text-xs md:text-sm px-3">Select all</button>
<button id="clearAllBtn" class="btn btn-ghost text-xs md:text-sm px-3">Clear</button>
</div>
</div>
<div id="datasetFilters" class="mt-3 md:mt-4 flex flex-wrap gap-2.5"></div> <!--grid-cols-1 sm:grid-cols-2 lg:grid-cols-3-->
</div>
<div class="card p-4 md:p-6">
<h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">Search & Filter</h2>
<div class="mt-2 md:mt-3 space-y-2.5">
<input id="searchInput" class="input" placeholder="Search provider/model…"/>
<select id="providerSelect" class="input">
<option value="">All providers</option>
</select>
</div>
</div>
<div class="card p-4 md:p-6">
<h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">KPI</h2>
<div class="mt-2 grid grid-cols-2 gap-3">
<div>
<div class="text-2xl md:text-3xl font-extrabold" id="kpiModels"></div>
<div class="kpi">Models</div>
</div>
<div>
<div class="text-2xl md:text-3xl font-extrabold" id="kpiDatasets"></div>
<div class="kpi">Datasets</div>
</div>
<div class="col-span-2 text-xs text-slate-500 dark:text-slate-400" id="lastUpdated">Last updated —</div>
</div>
</div>
</div>
<!-- Visualization -->
<div class="card p-5 md:p-6 lg:p-8">
<div class="flex items-center justify-between gap-3">
<h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">Model Comparison</h2>
<div class="text-xs md:text-sm text-slate-500">Click ⭐ on rows to compare (max 3)</div>
</div>
<div id="chartWrap" class="mt-3 md:mt-4">
<canvas id="scoresChart"></canvas>
</div>
</div>
<!-- Table (always horizontally scrollable) -->
<div class="card overflow-hidden">
<div class="px-4 md:px-6 py-3 md:py-4 flex items-center justify-between">
<div class="font-semibold" style="color: var(--text)">Leaderboard</div>
<div class="text-xs md:text-sm text-slate-500">Sorted by mean score across selected datasets</div>
</div>
<div class="overflow-x-auto overscroll-x-contain scrollbar-thin pb-2">
<table class="min-w-[64rem] w-full text-sm">
<thead class="border-t border-b" style="border-color: var(--border)">
<tr id="tableHeaderRow">
<th class="p-3 text-left rank-col sticky-rank">#</th>
<th class="p-3 text-left provider-col sticky-provider border-l">Provider</th>
<th class="p-3 text-left model-col sticky-model border-l">Model</th>
<th class="p-3 text-left mean-col sticky-mean border-l">
Mean
<div class="text-[11px] text-slate-500">on selected</div>
</th>
<!-- Dataset columns injected here -->
</tr>
</thead>
<tbody id="tableBody"></tbody>
</table>
</div>
<div class="flex items-center gap-2 justify-end p-3">
<button id="refreshBtn" class="btn btn-ghost md:hidden" title="Refresh results">⟲ Refresh</button>
<button id="exportCsvBtn" class="btn btn-outline" title="Export current view to CSV">⭳ Export CSV</button>
<button id="themeToggle" class="btn btn-ghost md:hidden" title="Toggle dark mode">🌙</button>
</div>
</div>
</section>
<!-- Submit Tab -->
<section id="tab-submit" class="hidden">
<div class="card p-5 md:p-6 lg:p-8">
<h2 class="text-lg md:text-xl font-bold" style="color: var(--text)">Submit a Model for Evaluation</h2>
<p class="text-sm text-slate-600 dark:text-slate-300 mt-1">
Provide your model details. Submissions are queued (status: <span class="font-semibold">pending</span>) and evaluated automatically. Results will appear on the leaderboard when ready.
</p>
<form id="submitForm" class="mt-4 grid md:grid-cols-2 gap-4 md:gap-6">
<div>
<label class="label" for="modelProvider">Model Provider</label>
<input class="input" id="modelProvider" name="model_provider" required placeholder="e.g., TelcoAI Labs"/>
</div>
<div>
<label class="label" for="modelName">Model Name</label>
<input class="input" id="modelName" name="model_name" required placeholder="e.g., T-LLM-7B"/>
</div>
<div>
<label class="label" for="hfRepo">Hugging Face Repo</label>
<input class="input" id="hfRepo" name="hf_repo" required placeholder="e.g., telcoai/t-llm-7b"/>
</div>
<div>
<label class="label" for="contactEmail">Contact Email</label>
<input class="input" id="contactEmail" name="contact_email" required type="email" placeholder="[email protected]"/>
</div>
<div class="md:col-span-2">
<label class="label" for="notes">Notes (optional)</label>
<textarea class="input" id="notes" name="notes" rows="3" placeholder="Anything we should know about your model or expected behavior"></textarea>
</div>
<div class="md:col-span-2 flex items-center gap-2">
<input id="agree" type="checkbox" required class="h-4 w-4 accent-indigo-600"/>
<label for="agree" class="text-sm">I agree to have my model evaluated and results published.</label>
</div>
<div class="md:col-span-2 flex items-center gap-3">
<button class="btn btn-primary" type="submit">Submit to Queue</button>
<button class="btn btn-ghost" type="reset">Reset</button>
<span id="submitStatus" class="text-sm"></span>
</div>
</form>
</div>
</section>
<!-- About Tab -->
<section id="tab-about" class="hidden">
<div class="card p-5 md:p-6 lg:p-8 space-y-3">
<h2 class="text-lg md:text-xl font-bold" style="color: var(--text)">About this Leaderboard</h2>
<p class="text-sm text-slate-600 dark:text-slate-300">
This dashboard ranks LLMs on telecom-focused datasets. Each cell shows the score and the metric type used
(<span class="pill metric-badge">standard</span> or <span class="pill metric-badge-judge">llm-as-judge</span>). Energy & CO₂ (TODO) appear on hover.
</p>
<p class="text-sm text-slate-600 dark:text-slate-300">The metric <span class="pill metric-badge-judge">llm-as-judge</span> adopts OpenAI OSS-120B as a judge model.</p>
</div>
</section>
</main>
<div id="toast" class="fixed bottom-4 left-1/2 -translate-x-1/2 hidden">
<div class="rounded-xl bg-slate-900 text-white px-4 py-2 shadow-lg"><span id="toastMsg">Done</span></div>
</div>
<script>
// ===== Config =====
const API_BASE = "";
const USE_MOCK_FALLBACK = false;
const MAX_COMPARE = 3;
// ===== State =====
const state = {
datasets: [],
models: [],
selectedDatasets: new Set(),
providerFilter: "",
search: "",
compare: new Set(),
dark: window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches,
};
// ===== Utils =====
const $ = (s)=>document.querySelector(s);
const $$ = (s)=>Array.from(document.querySelectorAll(s));
const showToast=(m)=>{const t=$("#toast");$("#toastMsg").textContent=m;t.classList.remove("hidden");setTimeout(()=>t.classList.add("hidden"),2200);};
const fmt=(n,d=2)=>(n==null||Number.isNaN(n))?"—":Number(n).toFixed(d);
const slug=(s)=>s.toLowerCase().replace(/[^a-z0-9]+/g,'-');
function savePrefs(){
localStorage.setItem('llm_lb_prefs', JSON.stringify({
selectedDatasets:[...state.selectedDatasets],
datasets:[...state.datasets],
providerFilter:state.providerFilter,
search:state.search, dark:state.dark,
compare:[...state.compare],
}));
}
function loadPrefs(){
try{
const p=JSON.parse(localStorage.getItem('llm_lb_prefs')||'{}');
if(p.selectedDatasets) state.selectedDatasets=new Set(p.selectedDatasets);
if(p.providerFilter) state.providerFilter=p.providerFilter;
if(p.search) state.search=p.search;
if(typeof p.dark==='boolean') state.dark=p.dark;
if(p.compare) state.compare=new Set(p.compare);
}catch{}
}
// ===== Mock =====
function mockDatasets(){ return [
"3GPP-TSG", "NetBench", "TeleQna", "TeleLogs", "TeleMath",
]; }
function mockResults(){
const providers=["Qwen","Qwen","OpenAI","OpenAI","DeepSeek","ByteDance", "LLama"];
const models=["Qwen3-32B","QwQ-32B","GPT-OSS-120B","GPT-OSS-20B","R1-Distill-Llama-70B","Seed-OSS-36B", "Llama-8B"];
const repos=["qwen/qwen3-32b","qwen/qwq-32b","openai/gpt-oss-120b","openai/gpt-oss-20b","deepseek/r1-distill-llama-70b","bytedance/seed-oss-36b", "llama/llama-8B-instruct"];
const ds=mockDatasets(); const now=new Date().toISOString();
const mtypes=["raw","llm-as-judge"];
const rnd=(a)=>a[Math.floor(Math.random()*a.length)];
const rScore=()=>Math.round((45+Math.random()*50)*100)/100;
return {models: providers.map((p,i)=>({
provider:p, name:models[i], repo:repos[i], updated_at:now,
scores: ds.map(d=>({dataset_name:d, metric_type:rnd(mtypes), score:rScore(),
energy_consumed:+(0.1+Math.random()*2.4).toFixed(3),
co2_consumed:+(0.05+Math.random()*0.95).toFixed(3)}))
}))};
}
// ===== API =====
async function apiGet(path){
try{ const r=await fetch(`${API_BASE}${path}`); if(!r.ok) throw new Error(r.status); return await r.json(); }
catch(e){ if(USE_MOCK_FALLBACK){ if(path==='/api/datasets') return {datasets:mockDatasets()}; if(path==='/api/results') return mockResults(); } throw e; }
}
async function apiPost(path, body){
try{ const r=await fetch(`${API_BASE}${path}`,{method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(body)}); if(!r.ok) throw new Error(r.status); return await r.json(); }
catch(e){ if(USE_MOCK_FALLBACK) return {status:'pending', id:`mock-${Date.now()}`}; throw e; }
}
// ===== Rendering =====
function visibleDatasets(){return state.selectedDatasets.size ? state.datasets.filter(d=>state.selectedDatasets.has(d)) : []; }
// function modelRowMean(model, dsList){
// const map=Object.fromEntries(model.scores.map(s=>[s.dataset_name,s]));
// const vals=dsList.map(d=>map[d]?.score).filter(v=>typeof v==='number');
// return vals.length? vals.reduce((a,b)=>a+b,0)/vals.length : null;
// }
function modelRowMean(model, dsList){
const map=Object.fromEntries(model.scores.map(s=>[s.dataset_name,s]));
const vals=dsList.map(d=>map[d]?.score).filter(v=>typeof v==='number');
return (vals.length >= dsList.length)? vals.reduce((a,b)=>a+b,0)/vals.length : null;
}
function renderDatasetFilters(){
const c=$("#datasetFilters"); c.innerHTML='';
state.datasets.forEach(d=>{
const checked=state.selectedDatasets.has(d);
c.insertAdjacentHTML('beforeend', `
<label class="flex flex-nowrap items-center gap-2 rounded-lg border" style="border-color: var(--border); padding:.5rem .6rem;">
<input type="checkbox" class="h-4 w-4 accent-indigo-600" data-dataset="${d}" ${checked?'checked':''}/>
<span class="text-sm" style="color: var(--text)">${d}</span>
</label>
`);
});
c.querySelectorAll('input[type="checkbox"]').forEach(cb=>{
cb.addEventListener('change', ()=>{
if(cb.checked) state.selectedDatasets.add(cb.dataset.dataset);
else state.selectedDatasets.delete(cb.dataset.dataset);
if([...c.querySelectorAll('input:checked')].length===0) state.selectedDatasets=new Set();
savePrefs(); renderTable(); updateChart();
});
});
$("#kpiDatasets").textContent = state.datasets.length;
}
function renderProviders(){
const provs=[...new Set(state.models.map(m=>m.provider))].sort();
const sel=$("#providerSelect");
sel.innerHTML='<option value="">All providers</option>'+provs.map(p=>`<option>${p}</option>`).join('');
if(state.providerFilter) sel.value=state.providerFilter;
}
function renderHeader(){
const head=$("#tableHeaderRow");
head.querySelectorAll('th[data-ds]').forEach(el=>el.remove());
visibleDatasets().forEach(d=>{
const th=document.createElement('th');
th.dataset.ds=d; th.className='p-3 text-left ds-col ds-head';
th.innerHTML = `<div class="font-semibold" style="color: var(--text)">${d}</div><div class="text-[11px] text-slate-500">score • metric</div>`;
head.appendChild(th);
});
}
function renderTable(){
renderHeader();
const ds=visibleDatasets();
const tb=$("#tableBody"); tb.innerHTML='';
let models=state.models.filter(m=>{
const text=(m.provider+" "+m.name+" "+m.repo).toLowerCase();
const okProvider=!state.providerFilter || m.provider===state.providerFilter;
const okSearch=!state.search || text.includes(state.search.toLowerCase());
return okProvider && okSearch;
}).map(m=>({...m, mean:modelRowMean(m, ds)}))
.sort((a,b)=>(b.mean??-1)-(a.mean??-1));
models.forEach((m, i)=>{
const sMap=Object.fromEntries(m.scores.map(s=>[s.dataset_name, s]));
const id=slug(m.provider+"-"+m.name);
const favOn=state.compare.has(id);
const row=document.createElement('tr');
row.className='border-b'; row.style.borderColor=getComputedStyle(document.documentElement).getPropertyValue('--border');
row.innerHTML = `
<td class="p-3 rank-col sticky-rank">${i+1}</td>
<td class="p-3 provider-col sticky-provider border-l">
<div class="font-medium">${m.provider}</div>
</td>
<td class="p-3 model-col sticky-model border-l">
<div class="flex items-center gap-2">
<button class="text-lg" data-fav="${id}" title="Add to compare">${favOn?'⭐':'☆'}</button>
<div class="font-semibold" style="color: var(--text)">${m.name}</div>
</div>
<div class="text-xs text-slate-500">${m.repo}</div>
</td>
<td class="p-3 mean-col sticky-mean border-l">
<div class="text-base font-semibold text-blue-500">${m.mean==null?'—':fmt(m.mean,2)}</div>
<!--div class="text-[11px] text-slate-500">mean across selected</div-->
</td>
`;
ds.forEach(d=>{
const s=sMap[d]; const mt=s?.metric_type;
// const badge= mt==='llm-as-judge' ? 'metric-badge-judge' : 'metric-badge';
const title= s ? `Energy: ${fmt(s.energy_consumed,3)} kWh\nCO₂: ${fmt(s.co2_consumed,3)} kg` : '';
const cell=document.createElement('td');
cell.className='p-3 ds-col'; cell.title=title;
cell.innerHTML = s ? (
mt==='llm-as-judge' ? `
<div class="flex item-center">
<div class="text-base font-semibold" style="color: var(--text)">${fmt(s.score,2)}</div>
<div class="pill metric-badge-judge ms-2">${mt}</div>
</div>
` :
`
<div class="flex item-center">
<div class="text-base font-semibold" style="color: var(--text)">${fmt(s.score,2)}</div>
</div>
`
) : '—';
row.appendChild(cell);
});
tb.appendChild(row);
});
$("#kpiModels").textContent=models.length;
$$('button[data-fav]').forEach(b=>{
b.onclick=()=>{
const id=b.dataset.fav;
if(state.compare.has(id)) state.compare.delete(id);
else{
if(state.compare.size>=MAX_COMPARE){ showToast(`You can compare up to ${MAX_COMPARE} models.`); return; }
state.compare.add(id);
}
savePrefs(); renderTable(); updateChart();
};
});
}
// ===== Chart =====
let chart;
function updateChart(){
const ds=visibleDatasets();
const labels=ds;
const datasets=[];
const chosen=state.models.filter(m=>state.compare.has(slug(m.provider+"-"+m.name)));
chosen.forEach(m=>{
const map=Object.fromEntries(m.scores.map(s=>[s.dataset_name, s.score]));
datasets.push({
label:`${m.provider} / ${m.name}`,
data: labels.map(d=>map[d] ?? null),
tension:.25, spanGaps:true, borderWidth:3, pointRadius:3, pointHoverRadius:5,
});
});
const ctx=document.getElementById('scoresChart').getContext('2d');
if(!chart){
chart=new Chart(ctx,{
type:'line',
data:{labels, datasets},
options:{
responsive:true,
maintainAspectRatio:false, // uses #chartWrap height (clamp -> responsive)
plugins:{
legend:{ display:true, labels:{ boxWidth:18, usePointStyle:true }},
tooltip:{ mode:'index', intersect:false }
},
interaction:{ mode:'nearest', intersect:false },
scales:{
y:{ beginAtZero:true, max:100, title:{display:true, text:'Score'}, grid:{ drawBorder:false }},
x:{ grid:{ display:false } }
},
layout:{ padding:0 }
}
});
}else{
chart.data.labels=labels;
chart.data.datasets=datasets;
chart.update();
}
}
// ===== Tabs / Theme / Export =====
function bindTabs(){
$$(".tab").forEach(btn=>{
btn.addEventListener('click', ()=>{
const t=btn.dataset.tab;
$$(".tab").forEach(b=>b.classList.remove('tab-active')); btn.classList.add('tab-active');
["leaderboard","submit","about"].forEach(x=>{
const el=document.getElementById(`tab-${x}`); (x===t)?el.classList.remove('hidden'):el.classList.add('hidden');
});
});
});
}
function applyTheme(){
document.documentElement.classList.toggle('dark', state.dark);
$("#themeToggle").textContent= state.dark ? '☀️' : '🌙'; savePrefs();
}
function exportCSV(){
const ds=visibleDatasets();
const headers=['Rank','Provider','Model','Repo','Mean',...ds];
const rows=[];
let models=state.models.map(m=>({...m, mean:modelRowMean(m, ds)})).sort((a,b)=>(b.mean??-1)-(a.mean??-1));
models=models.filter(m=>{
const text=(m.provider+" "+m.name+" "+m.repo).toLowerCase();
const okProvider=!state.providerFilter || m.provider===state.providerFilter;
const okSearch=!state.search || text.includes(state.search.toLowerCase());
return okProvider && okSearch;
});
models.forEach((m,i)=>{
const map=Object.fromEntries(m.scores.map(s=>[s.dataset_name, s]));
const row=[i+1, m.provider, m.name, m.repo, fmt(m.mean,2)];
ds.forEach(d=>{ const s=map[d]; row.push(s? `${fmt(s.score,2)} (${s.metric_type})`:''); });
rows.push(row);
});
const csv=[headers, ...rows].map(r=> r.map(x=>'"'+String(x).replaceAll('"','""')+'"').join(',')).join('\n');
const blob=new Blob([csv],{type:'text/csv;charset=utf-8;'}); const url=URL.createObjectURL(blob);
const a=document.createElement('a'); a.href=url; a.download='telecom-llm-leaderboard.csv'; a.click(); URL.revokeObjectURL(url);
}
// ===== Controls & Submit =====
function bindControls(){
$("#selectAllBtn").onclick=()=>{ state.selectedDatasets=new Set(state.datasets); renderDatasetFilters(); renderTable(); updateChart(); savePrefs(); };
$("#clearAllBtn").onclick=()=>{ state.selectedDatasets=new Set(); renderDatasetFilters(); renderTable(); updateChart(); savePrefs(); };
$("#searchInput").addEventListener('input', (e)=>{ state.search=e.target.value; savePrefs(); renderTable(); });
$("#providerSelect").addEventListener('change', (e)=>{ state.providerFilter=e.target.value; savePrefs(); renderTable(); });
$("#refreshBtn").onclick=init;
$("#exportCsvBtn").onclick=exportCSV;
$("#themeToggle").onclick=()=>{ state.dark=!state.dark; applyTheme(); };
$("#submitForm").addEventListener('submit', async (e)=>{
e.preventDefault();
const payload={
model_provider: $("#modelProvider").value.trim(),
model_name: $("#modelName").value.trim(),
hf_repo: $("#hfRepo").value.trim(),
contact_email: $("#contactEmail").value.trim(),
notes: $("#notes").value.trim(),
};
$("#submitStatus").textContent='Submitting…';
try{
const res=await apiPost('/api/submit', payload);
$("#submitStatus").innerHTML=`Status: <span class="font-semibold">${res.status}</span> (id: ${res.id})`;
showToast('Submission received — queued as pending'); e.target.reset();
}catch(err){ $("#submitStatus").textContent='Submission failed. See console.'; }
});
}
// ===== Init =====
async function init(){
loadPrefs(); applyTheme(); bindTabs(); bindControls();
try{ state.datasets=(await apiGet('/api/datasets')).datasets; } catch{ state.datasets=mockDatasets(); }
try{ state.models=(await apiGet('/api/results')).models; }catch{ state.models=mockResults().models; }
const last=state.models.map(m=>new Date(m.updated_at)).sort((a,b)=>b-a)[0];
if(last) $("#lastUpdated").textContent='Last updated '+last.toLocaleString();
if (!state.selectedDatasets.size) {
state.selectedDatasets = new Set (state.datasets);
}
renderDatasetFilters(); renderProviders(); renderTable(); updateChart();
}
init();
</script>
</body>
</html>