Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> | |
| <meta name="author" content="Dr. Mohamed Sana"> | |
| <title>GSMA Open-Telco LLM Benchmarks</title> | |
| <!-- Tailwind + Chart.js --> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> | |
| <!-- Fonts --> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap" rel="stylesheet"> | |
| <style> | |
| :root{ | |
| color-scheme: light dark; | |
| --text: #0f172a; | |
| --bg: #f8fafc; | |
| --card-bg: rgba(255,255,255,.75); | |
| --border: #e2e8f0; | |
| --header-bg: rgba(241,245,249,.85); | |
| --sticky-bg: rgba(248,250,252,.92); | |
| --chip-bg:#f1f5f9; | |
| } | |
| .dark{ | |
| --text: #e2e8f0; | |
| --bg: #020617; | |
| --card-bg: rgba(2,6,23,.6); | |
| --border:#334155; | |
| --header-bg: rgba(30,41,59,.75); | |
| --sticky-bg: rgba(2,6,23,.92); | |
| --chip-bg: rgba(15,23,42,.6); | |
| } | |
| html { font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, "Helvetica Neue", Arial, "Noto Sans"; } | |
| body { color: var(--text); background: var(--bg); } | |
| .card{ border-radius:1rem; box-shadow:0 10px 25px rgba(2,6,23,.08); background:var(--card-bg); backdrop-filter: blur(8px); border:1px solid var(--border); } | |
| .btn{ display:inline-flex; align-items:center; justify-content:center; gap:.5rem; border-radius:.8rem; padding:.625rem 1rem; font-weight:700; transition: transform .05s ease; } | |
| .btn:active{ transform: scale(.98); } | |
| .btn-primary{ background:#4f46e5; color:#fff; } .btn-primary:hover{ background:#6366f1; } | |
| .btn-ghost{ background:transparent; border:1px solid var(--border); } .btn-ghost:hover{ background:var(--chip-bg); } | |
| .btn-outline{ border:1px solid #4f46e5; color:#4338ca; } .dark .btn-outline{ color:#a5b4fc; } | |
| .btn-outline:hover{ background:#eef2ff; } .dark .btn-outline:hover{ background: rgba(30,27,75,.5); } | |
| .input{ width:100%; border:1px solid var(--border); border-radius:.8rem; padding:.6rem .8rem; background:#fff; color:#0f172a; } | |
| .dark .input{ background:#0b1220; color:#e2e8f0; } | |
| .input:focus{ outline:none; box-shadow:0 0 0 2px rgba(99,102,241,.6); } | |
| .label{ font-size:.875rem; font-weight:600; color:#334155; } .dark .label{ color:#cbd5e1; } | |
| .tab{ padding:.6rem 1rem; border-radius:.8rem; cursor:pointer; font-weight:700; } | |
| .tab-active{ background:#fff; border:1px solid var(--border); box-shadow:0 1px 2px rgba(0,0,0,.04); } .dark .tab-active{ background:#0b1220; } | |
| .pill{ display:inline-flex; align-items:center; padding:.1rem .5rem; border-radius:999px; font-size:.53rem; font-weight:700; } | |
| .metric-badge{ background:#ecfdf5; color:#065f46; } .dark .metric-badge{ background:rgba(16,185,129,.18); color:#d1fae5; } | |
| .metric-badge-judge{ background:#e0f2fe; color:#075985; } .dark .metric-badge-judge{ background:rgba(56,189,248,.18); color:#bae6fd; } | |
| .kpi{ color:#475569; font-size:.875rem; } .dark .kpi{ color:#94a3b8; } | |
| /* Column widths */ | |
| /* .rank-col{ width:1.5rem; } | |
| .provider-col{ width:4rem; } | |
| .model-col{ width:10rem; white-space:nowrap; } | |
| .mean-col{ width:3rem; white-space:nowrap; } | |
| .ds-col{ min-width:9.5rem; white-space:nowrap; } */ | |
| .rank-col{ } | |
| .provider-col{ } | |
| .model-col{ white-space:nowrap; } | |
| .mean-col{ white-space:nowrap; } | |
| .ds-col{ white-space:nowrap; } | |
| /* Sticky columns: use calc so offsets update easily */ | |
| .sticky-rank{ position: sticky; left: 0; z-index: 3; background: var(--sticky-bg); } | |
| .sticky-provider{ position: sticky; left: 2.3rem; z-index: 3; background: var(--sticky-bg); } | |
| .sticky-model{ position: sticky; left: calc(1.85rem + 6rem); z-index: 3; background: var(--sticky-bg); } | |
| .sticky-mean{ position: sticky; left: calc(2.3rem + 4rem + 10rem + 0.5rem); z-index: 3; background: var(--sticky-bg); } | |
| /* Mobile: hide rank & dataset columns; update sticky offsets */ | |
| @media (max-width: 768px){ | |
| .rank-col, .ds-col, .ds-head { display:none ; } | |
| .sticky-provider{ left: 0; } | |
| .sticky-model{ left: 14rem; } | |
| .sticky-mean{ left: calc(14rem + 18rem); } | |
| } | |
| /* Header background */ | |
| thead th{ background: var(--header-bg); } | |
| /* Global gradient */ | |
| .gradient-bg{ | |
| background: | |
| radial-gradient(1200px 600px at 20% -10%, rgba(99,102,241,.25), rgba(99,102,241,0) 60%), | |
| radial-gradient(1200px 600px at 80% -10%, rgba(20,184,166,.2), rgba(20,184,166,0) 60%); | |
| } | |
| /* Scrollbar for horizontal overflow */ | |
| .scrollbar-thin::-webkit-scrollbar{ height:10px; } | |
| .scrollbar-thin::-webkit-scrollbar-thumb{ background:#c7d2fe; border-radius:999px; } | |
| .scrollbar-thin::-webkit-scrollbar-track{ background:transparent; } | |
| /* Chart container: responsive height */ | |
| #chartWrap{ height: clamp(260px, 42vh, 460px); } | |
| </style> | |
| </head> | |
| <body class="min-h-screen dark:bg-gray-800"> | |
| <!-- Header --> | |
| <header class="sticky top-0 z-40 backdrop-blur dark:bg-white-800 border-b" style="border-color: var(--border)"> | |
| <div class="mx-auto max-w-7xl px-4 md:px-6 py-3 md:py-4 flex items-center justify-between"> | |
| <div class="flex items-center gap-3 md:gap-4"> | |
| <div class="h-10 w-10 rounded-xl bg-indigo-600 text-white grid place-items-center shadow-lg">📡</div> | |
| <div> | |
| <h1 class="text-lg md:text-xl font-extrabold tracking-tight" style="color: var(--text)">GSMA Open-Telco LLM Benchmarks</h1> | |
| <p class="text-xs md:text-sm">Benchmarking models across telecom datasets</p> | |
| </div> | |
| </div> | |
| <div class="md:flex items-center gap-2 hidden"> | |
| <button id="refreshBtn" class="btn btn-ghost" title="Refresh results">⟲ Refresh</button> | |
| <!--button id="exportCsvBtn" class="btn btn-outline" title="Export current view to CSV">⭳ Export CSV</button--> | |
| <button id="themeToggle" class="btn btn-ghost" title="Toggle dark mode">🌙</button> | |
| </div> | |
| </div> | |
| </header> | |
| <!-- Main --> | |
| <main class="mx-auto max-w-7xl px-4 md:px-6 py-6 md:py-8 space-y-6 md:space-y-8"> | |
| <!-- Tabs --> | |
| <div class="flex gap-2 md:gap-3"> | |
| <button class="tab tab-active" data-tab="leaderboard">🏆 Leaderboard</button> | |
| <button class="tab" data-tab="submit">📤 Submit Model</button> | |
| <button class="tab" data-tab="about">ℹ️ About</button> | |
| </div> | |
| <!-- Leaderboard Tab --> | |
| <section id="tab-leaderboard" class="space-y-6 md:space-y-8"> | |
| <!-- Controls --> | |
| <div class="grid lg:grid-cols-4 gap-4 md:gap-6"> | |
| <div class="card p-4 md:p-6 lg:col-span-2"> | |
| <div class="flex items-center justify-between"> | |
| <h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">Select Datasets</h2> | |
| <div class="flex gap-2"> | |
| <button id="selectAllBtn" class="btn btn-ghost text-xs md:text-sm px-3">Select all</button> | |
| <button id="clearAllBtn" class="btn btn-ghost text-xs md:text-sm px-3">Clear</button> | |
| </div> | |
| </div> | |
| <div id="datasetFilters" class="mt-3 md:mt-4 flex flex-wrap gap-2.5"></div> <!--grid-cols-1 sm:grid-cols-2 lg:grid-cols-3--> | |
| </div> | |
| <div class="card p-4 md:p-6"> | |
| <h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">Search & Filter</h2> | |
| <div class="mt-2 md:mt-3 space-y-2.5"> | |
| <input id="searchInput" class="input" placeholder="Search provider/model…"/> | |
| <select id="providerSelect" class="input"> | |
| <option value="">All providers</option> | |
| </select> | |
| </div> | |
| </div> | |
| <div class="card p-4 md:p-6"> | |
| <h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">KPI</h2> | |
| <div class="mt-2 grid grid-cols-2 gap-3"> | |
| <div> | |
| <div class="text-2xl md:text-3xl font-extrabold" id="kpiModels">—</div> | |
| <div class="kpi">Models</div> | |
| </div> | |
| <div> | |
| <div class="text-2xl md:text-3xl font-extrabold" id="kpiDatasets">—</div> | |
| <div class="kpi">Datasets</div> | |
| </div> | |
| <div class="col-span-2 text-xs text-slate-500 dark:text-slate-400" id="lastUpdated">Last updated —</div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Visualization --> | |
| <div class="card p-5 md:p-6 lg:p-8"> | |
| <div class="flex items-center justify-between gap-3"> | |
| <h2 class="font-semibold text-base md:text-lg" style="color: var(--text)">Model Comparison</h2> | |
| <div class="text-xs md:text-sm text-slate-500">Click ⭐ on rows to compare (max 3)</div> | |
| </div> | |
| <div id="chartWrap" class="mt-3 md:mt-4"> | |
| <canvas id="scoresChart"></canvas> | |
| </div> | |
| </div> | |
| <!-- Table (always horizontally scrollable) --> | |
| <div class="card overflow-hidden"> | |
| <div class="px-4 md:px-6 py-3 md:py-4 flex items-center justify-between"> | |
| <div class="font-semibold" style="color: var(--text)">Leaderboard</div> | |
| <div class="text-xs md:text-sm text-slate-500">Sorted by mean score across selected datasets</div> | |
| </div> | |
| <div class="overflow-x-auto overscroll-x-contain scrollbar-thin pb-2"> | |
| <table class="min-w-[64rem] w-full text-sm"> | |
| <thead class="border-t border-b" style="border-color: var(--border)"> | |
| <tr id="tableHeaderRow"> | |
| <th class="p-3 text-left rank-col sticky-rank">#</th> | |
| <th class="p-3 text-left provider-col sticky-provider border-l">Provider</th> | |
| <th class="p-3 text-left model-col sticky-model border-l">Model</th> | |
| <th class="p-3 text-left mean-col sticky-mean border-l"> | |
| Mean | |
| <div class="text-[11px] text-slate-500">on selected</div> | |
| </th> | |
| <!-- Dataset columns injected here --> | |
| </tr> | |
| </thead> | |
| <tbody id="tableBody"></tbody> | |
| </table> | |
| </div> | |
| <div class="flex items-center gap-2 justify-end p-3"> | |
| <button id="refreshBtn" class="btn btn-ghost md:hidden" title="Refresh results">⟲ Refresh</button> | |
| <button id="exportCsvBtn" class="btn btn-outline" title="Export current view to CSV">⭳ Export CSV</button> | |
| <button id="themeToggle" class="btn btn-ghost md:hidden" title="Toggle dark mode">🌙</button> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Submit Tab --> | |
| <section id="tab-submit" class="hidden"> | |
| <div class="card p-5 md:p-6 lg:p-8"> | |
| <h2 class="text-lg md:text-xl font-bold" style="color: var(--text)">Submit a Model for Evaluation</h2> | |
| <p class="text-sm text-slate-600 dark:text-slate-300 mt-1"> | |
| Provide your model details. Submissions are queued (status: <span class="font-semibold">pending</span>) and evaluated automatically. Results will appear on the leaderboard when ready. | |
| </p> | |
| <form id="submitForm" class="mt-4 grid md:grid-cols-2 gap-4 md:gap-6"> | |
| <div> | |
| <label class="label" for="modelProvider">Model Provider</label> | |
| <input class="input" id="modelProvider" name="model_provider" required placeholder="e.g., TelcoAI Labs"/> | |
| </div> | |
| <div> | |
| <label class="label" for="modelName">Model Name</label> | |
| <input class="input" id="modelName" name="model_name" required placeholder="e.g., T-LLM-7B"/> | |
| </div> | |
| <div> | |
| <label class="label" for="hfRepo">Hugging Face Repo</label> | |
| <input class="input" id="hfRepo" name="hf_repo" required placeholder="e.g., telcoai/t-llm-7b"/> | |
| </div> | |
| <div> | |
| <label class="label" for="contactEmail">Contact Email</label> | |
| <input class="input" id="contactEmail" name="contact_email" required type="email" placeholder="[email protected]"/> | |
| </div> | |
| <div class="md:col-span-2"> | |
| <label class="label" for="notes">Notes (optional)</label> | |
| <textarea class="input" id="notes" name="notes" rows="3" placeholder="Anything we should know about your model or expected behavior"></textarea> | |
| </div> | |
| <div class="md:col-span-2 flex items-center gap-2"> | |
| <input id="agree" type="checkbox" required class="h-4 w-4 accent-indigo-600"/> | |
| <label for="agree" class="text-sm">I agree to have my model evaluated and results published.</label> | |
| </div> | |
| <div class="md:col-span-2 flex items-center gap-3"> | |
| <button class="btn btn-primary" type="submit">Submit to Queue</button> | |
| <button class="btn btn-ghost" type="reset">Reset</button> | |
| <span id="submitStatus" class="text-sm"></span> | |
| </div> | |
| </form> | |
| </div> | |
| </section> | |
| <!-- About Tab --> | |
| <section id="tab-about" class="hidden"> | |
| <div class="card p-5 md:p-6 lg:p-8 space-y-3"> | |
| <h2 class="text-lg md:text-xl font-bold" style="color: var(--text)">About this Leaderboard</h2> | |
| <p class="text-sm text-slate-600 dark:text-slate-300"> | |
| This dashboard ranks LLMs on telecom-focused datasets. Each cell shows the score and the metric type used | |
| (<span class="pill metric-badge">standard</span> or <span class="pill metric-badge-judge">llm-as-judge</span>). Energy & CO₂ (TODO) appear on hover. | |
| </p> | |
| <p class="text-sm text-slate-600 dark:text-slate-300">The metric <span class="pill metric-badge-judge">llm-as-judge</span> adopts OpenAI OSS-120B as a judge model.</p> | |
| </div> | |
| </section> | |
| </main> | |
| <div id="toast" class="fixed bottom-4 left-1/2 -translate-x-1/2 hidden"> | |
| <div class="rounded-xl bg-slate-900 text-white px-4 py-2 shadow-lg">✅ <span id="toastMsg">Done</span></div> | |
| </div> | |
| <script> | |
| // ===== Config ===== | |
| const API_BASE = ""; | |
| const USE_MOCK_FALLBACK = false; | |
| const MAX_COMPARE = 3; | |
| // ===== State ===== | |
| const state = { | |
| datasets: [], | |
| models: [], | |
| selectedDatasets: new Set(), | |
| providerFilter: "", | |
| search: "", | |
| compare: new Set(), | |
| dark: window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches, | |
| }; | |
| // ===== Utils ===== | |
| const $ = (s)=>document.querySelector(s); | |
| const $$ = (s)=>Array.from(document.querySelectorAll(s)); | |
| const showToast=(m)=>{const t=$("#toast");$("#toastMsg").textContent=m;t.classList.remove("hidden");setTimeout(()=>t.classList.add("hidden"),2200);}; | |
| const fmt=(n,d=2)=>(n==null||Number.isNaN(n))?"—":Number(n).toFixed(d); | |
| const slug=(s)=>s.toLowerCase().replace(/[^a-z0-9]+/g,'-'); | |
| function savePrefs(){ | |
| localStorage.setItem('llm_lb_prefs', JSON.stringify({ | |
| selectedDatasets:[...state.selectedDatasets], | |
| datasets:[...state.datasets], | |
| providerFilter:state.providerFilter, | |
| search:state.search, dark:state.dark, | |
| compare:[...state.compare], | |
| })); | |
| } | |
| function loadPrefs(){ | |
| try{ | |
| const p=JSON.parse(localStorage.getItem('llm_lb_prefs')||'{}'); | |
| if(p.selectedDatasets) state.selectedDatasets=new Set(p.selectedDatasets); | |
| if(p.providerFilter) state.providerFilter=p.providerFilter; | |
| if(p.search) state.search=p.search; | |
| if(typeof p.dark==='boolean') state.dark=p.dark; | |
| if(p.compare) state.compare=new Set(p.compare); | |
| }catch{} | |
| } | |
| // ===== Mock ===== | |
| function mockDatasets(){ return [ | |
| "3GPP-TSG", "NetBench", "TeleQna", "TeleLogs", "TeleMath", | |
| ]; } | |
| function mockResults(){ | |
| const providers=["Qwen","Qwen","OpenAI","OpenAI","DeepSeek","ByteDance", "LLama"]; | |
| const models=["Qwen3-32B","QwQ-32B","GPT-OSS-120B","GPT-OSS-20B","R1-Distill-Llama-70B","Seed-OSS-36B", "Llama-8B"]; | |
| const repos=["qwen/qwen3-32b","qwen/qwq-32b","openai/gpt-oss-120b","openai/gpt-oss-20b","deepseek/r1-distill-llama-70b","bytedance/seed-oss-36b", "llama/llama-8B-instruct"]; | |
| const ds=mockDatasets(); const now=new Date().toISOString(); | |
| const mtypes=["raw","llm-as-judge"]; | |
| const rnd=(a)=>a[Math.floor(Math.random()*a.length)]; | |
| const rScore=()=>Math.round((45+Math.random()*50)*100)/100; | |
| return {models: providers.map((p,i)=>({ | |
| provider:p, name:models[i], repo:repos[i], updated_at:now, | |
| scores: ds.map(d=>({dataset_name:d, metric_type:rnd(mtypes), score:rScore(), | |
| energy_consumed:+(0.1+Math.random()*2.4).toFixed(3), | |
| co2_consumed:+(0.05+Math.random()*0.95).toFixed(3)})) | |
| }))}; | |
| } | |
| // ===== API ===== | |
| async function apiGet(path){ | |
| try{ const r=await fetch(`${API_BASE}${path}`); if(!r.ok) throw new Error(r.status); return await r.json(); } | |
| catch(e){ if(USE_MOCK_FALLBACK){ if(path==='/api/datasets') return {datasets:mockDatasets()}; if(path==='/api/results') return mockResults(); } throw e; } | |
| } | |
| async function apiPost(path, body){ | |
| try{ const r=await fetch(`${API_BASE}${path}`,{method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(body)}); if(!r.ok) throw new Error(r.status); return await r.json(); } | |
| catch(e){ if(USE_MOCK_FALLBACK) return {status:'pending', id:`mock-${Date.now()}`}; throw e; } | |
| } | |
| // ===== Rendering ===== | |
| function visibleDatasets(){return state.selectedDatasets.size ? state.datasets.filter(d=>state.selectedDatasets.has(d)) : []; } | |
| // function modelRowMean(model, dsList){ | |
| // const map=Object.fromEntries(model.scores.map(s=>[s.dataset_name,s])); | |
| // const vals=dsList.map(d=>map[d]?.score).filter(v=>typeof v==='number'); | |
| // return vals.length? vals.reduce((a,b)=>a+b,0)/vals.length : null; | |
| // } | |
| function modelRowMean(model, dsList){ | |
| const map=Object.fromEntries(model.scores.map(s=>[s.dataset_name,s])); | |
| const vals=dsList.map(d=>map[d]?.score).filter(v=>typeof v==='number'); | |
| return (vals.length >= dsList.length)? vals.reduce((a,b)=>a+b,0)/vals.length : null; | |
| } | |
| function renderDatasetFilters(){ | |
| const c=$("#datasetFilters"); c.innerHTML=''; | |
| state.datasets.forEach(d=>{ | |
| const checked=state.selectedDatasets.has(d); | |
| c.insertAdjacentHTML('beforeend', ` | |
| <label class="flex flex-nowrap items-center gap-2 rounded-lg border" style="border-color: var(--border); padding:.5rem .6rem;"> | |
| <input type="checkbox" class="h-4 w-4 accent-indigo-600" data-dataset="${d}" ${checked?'checked':''}/> | |
| <span class="text-sm" style="color: var(--text)">${d}</span> | |
| </label> | |
| `); | |
| }); | |
| c.querySelectorAll('input[type="checkbox"]').forEach(cb=>{ | |
| cb.addEventListener('change', ()=>{ | |
| if(cb.checked) state.selectedDatasets.add(cb.dataset.dataset); | |
| else state.selectedDatasets.delete(cb.dataset.dataset); | |
| if([...c.querySelectorAll('input:checked')].length===0) state.selectedDatasets=new Set(); | |
| savePrefs(); renderTable(); updateChart(); | |
| }); | |
| }); | |
| $("#kpiDatasets").textContent = state.datasets.length; | |
| } | |
| function renderProviders(){ | |
| const provs=[...new Set(state.models.map(m=>m.provider))].sort(); | |
| const sel=$("#providerSelect"); | |
| sel.innerHTML='<option value="">All providers</option>'+provs.map(p=>`<option>${p}</option>`).join(''); | |
| if(state.providerFilter) sel.value=state.providerFilter; | |
| } | |
| function renderHeader(){ | |
| const head=$("#tableHeaderRow"); | |
| head.querySelectorAll('th[data-ds]').forEach(el=>el.remove()); | |
| visibleDatasets().forEach(d=>{ | |
| const th=document.createElement('th'); | |
| th.dataset.ds=d; th.className='p-3 text-left ds-col ds-head'; | |
| th.innerHTML = `<div class="font-semibold" style="color: var(--text)">${d}</div><div class="text-[11px] text-slate-500">score • metric</div>`; | |
| head.appendChild(th); | |
| }); | |
| } | |
| function renderTable(){ | |
| renderHeader(); | |
| const ds=visibleDatasets(); | |
| const tb=$("#tableBody"); tb.innerHTML=''; | |
| let models=state.models.filter(m=>{ | |
| const text=(m.provider+" "+m.name+" "+m.repo).toLowerCase(); | |
| const okProvider=!state.providerFilter || m.provider===state.providerFilter; | |
| const okSearch=!state.search || text.includes(state.search.toLowerCase()); | |
| return okProvider && okSearch; | |
| }).map(m=>({...m, mean:modelRowMean(m, ds)})) | |
| .sort((a,b)=>(b.mean??-1)-(a.mean??-1)); | |
| models.forEach((m, i)=>{ | |
| const sMap=Object.fromEntries(m.scores.map(s=>[s.dataset_name, s])); | |
| const id=slug(m.provider+"-"+m.name); | |
| const favOn=state.compare.has(id); | |
| const row=document.createElement('tr'); | |
| row.className='border-b'; row.style.borderColor=getComputedStyle(document.documentElement).getPropertyValue('--border'); | |
| row.innerHTML = ` | |
| <td class="p-3 rank-col sticky-rank">${i+1}</td> | |
| <td class="p-3 provider-col sticky-provider border-l"> | |
| <div class="font-medium">${m.provider}</div> | |
| </td> | |
| <td class="p-3 model-col sticky-model border-l"> | |
| <div class="flex items-center gap-2"> | |
| <button class="text-lg" data-fav="${id}" title="Add to compare">${favOn?'⭐':'☆'}</button> | |
| <div class="font-semibold" style="color: var(--text)">${m.name}</div> | |
| </div> | |
| <div class="text-xs text-slate-500">${m.repo}</div> | |
| </td> | |
| <td class="p-3 mean-col sticky-mean border-l"> | |
| <div class="text-base font-semibold text-blue-500">${m.mean==null?'—':fmt(m.mean,2)}</div> | |
| <!--div class="text-[11px] text-slate-500">mean across selected</div--> | |
| </td> | |
| `; | |
| ds.forEach(d=>{ | |
| const s=sMap[d]; const mt=s?.metric_type; | |
| // const badge= mt==='llm-as-judge' ? 'metric-badge-judge' : 'metric-badge'; | |
| const title= s ? `Energy: ${fmt(s.energy_consumed,3)} kWh\nCO₂: ${fmt(s.co2_consumed,3)} kg` : ''; | |
| const cell=document.createElement('td'); | |
| cell.className='p-3 ds-col'; cell.title=title; | |
| cell.innerHTML = s ? ( | |
| mt==='llm-as-judge' ? ` | |
| <div class="flex item-center"> | |
| <div class="text-base font-semibold" style="color: var(--text)">${fmt(s.score,2)}</div> | |
| <div class="pill metric-badge-judge ms-2">${mt}</div> | |
| </div> | |
| ` : | |
| ` | |
| <div class="flex item-center"> | |
| <div class="text-base font-semibold" style="color: var(--text)">${fmt(s.score,2)}</div> | |
| </div> | |
| ` | |
| ) : '—'; | |
| row.appendChild(cell); | |
| }); | |
| tb.appendChild(row); | |
| }); | |
| $("#kpiModels").textContent=models.length; | |
| $$('button[data-fav]').forEach(b=>{ | |
| b.onclick=()=>{ | |
| const id=b.dataset.fav; | |
| if(state.compare.has(id)) state.compare.delete(id); | |
| else{ | |
| if(state.compare.size>=MAX_COMPARE){ showToast(`You can compare up to ${MAX_COMPARE} models.`); return; } | |
| state.compare.add(id); | |
| } | |
| savePrefs(); renderTable(); updateChart(); | |
| }; | |
| }); | |
| } | |
| // ===== Chart ===== | |
| let chart; | |
| function updateChart(){ | |
| const ds=visibleDatasets(); | |
| const labels=ds; | |
| const datasets=[]; | |
| const chosen=state.models.filter(m=>state.compare.has(slug(m.provider+"-"+m.name))); | |
| chosen.forEach(m=>{ | |
| const map=Object.fromEntries(m.scores.map(s=>[s.dataset_name, s.score])); | |
| datasets.push({ | |
| label:`${m.provider} / ${m.name}`, | |
| data: labels.map(d=>map[d] ?? null), | |
| tension:.25, spanGaps:true, borderWidth:3, pointRadius:3, pointHoverRadius:5, | |
| }); | |
| }); | |
| const ctx=document.getElementById('scoresChart').getContext('2d'); | |
| if(!chart){ | |
| chart=new Chart(ctx,{ | |
| type:'line', | |
| data:{labels, datasets}, | |
| options:{ | |
| responsive:true, | |
| maintainAspectRatio:false, // uses #chartWrap height (clamp -> responsive) | |
| plugins:{ | |
| legend:{ display:true, labels:{ boxWidth:18, usePointStyle:true }}, | |
| tooltip:{ mode:'index', intersect:false } | |
| }, | |
| interaction:{ mode:'nearest', intersect:false }, | |
| scales:{ | |
| y:{ beginAtZero:true, max:100, title:{display:true, text:'Score'}, grid:{ drawBorder:false }}, | |
| x:{ grid:{ display:false } } | |
| }, | |
| layout:{ padding:0 } | |
| } | |
| }); | |
| }else{ | |
| chart.data.labels=labels; | |
| chart.data.datasets=datasets; | |
| chart.update(); | |
| } | |
| } | |
| // ===== Tabs / Theme / Export ===== | |
| function bindTabs(){ | |
| $$(".tab").forEach(btn=>{ | |
| btn.addEventListener('click', ()=>{ | |
| const t=btn.dataset.tab; | |
| $$(".tab").forEach(b=>b.classList.remove('tab-active')); btn.classList.add('tab-active'); | |
| ["leaderboard","submit","about"].forEach(x=>{ | |
| const el=document.getElementById(`tab-${x}`); (x===t)?el.classList.remove('hidden'):el.classList.add('hidden'); | |
| }); | |
| }); | |
| }); | |
| } | |
| function applyTheme(){ | |
| document.documentElement.classList.toggle('dark', state.dark); | |
| $("#themeToggle").textContent= state.dark ? '☀️' : '🌙'; savePrefs(); | |
| } | |
| function exportCSV(){ | |
| const ds=visibleDatasets(); | |
| const headers=['Rank','Provider','Model','Repo','Mean',...ds]; | |
| const rows=[]; | |
| let models=state.models.map(m=>({...m, mean:modelRowMean(m, ds)})).sort((a,b)=>(b.mean??-1)-(a.mean??-1)); | |
| models=models.filter(m=>{ | |
| const text=(m.provider+" "+m.name+" "+m.repo).toLowerCase(); | |
| const okProvider=!state.providerFilter || m.provider===state.providerFilter; | |
| const okSearch=!state.search || text.includes(state.search.toLowerCase()); | |
| return okProvider && okSearch; | |
| }); | |
| models.forEach((m,i)=>{ | |
| const map=Object.fromEntries(m.scores.map(s=>[s.dataset_name, s])); | |
| const row=[i+1, m.provider, m.name, m.repo, fmt(m.mean,2)]; | |
| ds.forEach(d=>{ const s=map[d]; row.push(s? `${fmt(s.score,2)} (${s.metric_type})`:''); }); | |
| rows.push(row); | |
| }); | |
| const csv=[headers, ...rows].map(r=> r.map(x=>'"'+String(x).replaceAll('"','""')+'"').join(',')).join('\n'); | |
| const blob=new Blob([csv],{type:'text/csv;charset=utf-8;'}); const url=URL.createObjectURL(blob); | |
| const a=document.createElement('a'); a.href=url; a.download='telecom-llm-leaderboard.csv'; a.click(); URL.revokeObjectURL(url); | |
| } | |
| // ===== Controls & Submit ===== | |
| function bindControls(){ | |
| $("#selectAllBtn").onclick=()=>{ state.selectedDatasets=new Set(state.datasets); renderDatasetFilters(); renderTable(); updateChart(); savePrefs(); }; | |
| $("#clearAllBtn").onclick=()=>{ state.selectedDatasets=new Set(); renderDatasetFilters(); renderTable(); updateChart(); savePrefs(); }; | |
| $("#searchInput").addEventListener('input', (e)=>{ state.search=e.target.value; savePrefs(); renderTable(); }); | |
| $("#providerSelect").addEventListener('change', (e)=>{ state.providerFilter=e.target.value; savePrefs(); renderTable(); }); | |
| $("#refreshBtn").onclick=init; | |
| $("#exportCsvBtn").onclick=exportCSV; | |
| $("#themeToggle").onclick=()=>{ state.dark=!state.dark; applyTheme(); }; | |
| $("#submitForm").addEventListener('submit', async (e)=>{ | |
| e.preventDefault(); | |
| const payload={ | |
| model_provider: $("#modelProvider").value.trim(), | |
| model_name: $("#modelName").value.trim(), | |
| hf_repo: $("#hfRepo").value.trim(), | |
| contact_email: $("#contactEmail").value.trim(), | |
| notes: $("#notes").value.trim(), | |
| }; | |
| $("#submitStatus").textContent='Submitting…'; | |
| try{ | |
| const res=await apiPost('/api/submit', payload); | |
| $("#submitStatus").innerHTML=`Status: <span class="font-semibold">${res.status}</span> (id: ${res.id})`; | |
| showToast('Submission received — queued as pending'); e.target.reset(); | |
| }catch(err){ $("#submitStatus").textContent='Submission failed. See console.'; } | |
| }); | |
| } | |
| // ===== Init ===== | |
| async function init(){ | |
| loadPrefs(); applyTheme(); bindTabs(); bindControls(); | |
| try{ state.datasets=(await apiGet('/api/datasets')).datasets; } catch{ state.datasets=mockDatasets(); } | |
| try{ state.models=(await apiGet('/api/results')).models; }catch{ state.models=mockResults().models; } | |
| const last=state.models.map(m=>new Date(m.updated_at)).sort((a,b)=>b-a)[0]; | |
| if(last) $("#lastUpdated").textContent='Last updated '+last.toLocaleString(); | |
| if (!state.selectedDatasets.size) { | |
| state.selectedDatasets = new Set (state.datasets); | |
| } | |
| renderDatasetFilters(); renderProviders(); renderTable(); updateChart(); | |
| } | |
| init(); | |
| </script> | |
| </body> | |
| </html> |