Spaces:

fhueni
/

on-device-vs-cloud-llm-inference

Running

App Files Files Community

fhueni commited on 30 days ago

Commit

9674cf0

1 Parent(s): b1ed689

feat: implement OpenRouter as cloud model provider, optimize UI, fix some issues

Browse files

Files changed (6) hide show

index.html +51 -38
src/main.js +4 -3
src/requestManager.js +3 -1
src/scheduler.js +1 -1
src/services/cloudService.js +12 -4
src/services/onDeviceService.js +4 -3

index.html CHANGED Viewed

@@ -4,70 +4,83 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1" />
     <title>Browser LLM Evaluation</title>
-    <link rel="stylesheet" href="styles.css">
-    <!-- Xenova transformers.js module import (no global script) -->
-    <!-- Remove HuggingFace CDN, use ES module import in main.js -->
 </head>
-<body>
-<main class="container">
-    <h1>Browser LLM Evaluation</h1>
-    <section class="grid">
-        <div class="card">
-            <h2>Cloud (OpenRouter)</h2>
-            <label>API Key <input id="apiKey" type="password" placeholder="sk-..." /></label>
-            <label>Model <input id="cloudModel" value="gpt-4o-mini" /></label>
         </div>
-        <div class="card">
-            <h2>On-Device</h2>
-            <label>Model (transformers.js) <input id="deviceModel" value="Xenova/distilgpt2" /></label>
-            <div id="deviceStatus">Not loaded</div>
-            <div id="deviceLoadingContainer" style="margin:8px 0; width:100%; max-width:300px;">
-                <div id="deviceLoadingBar" style="width:0%;height:8px;background:#4caf50;transition:width 0.2s;"></div>
-                <span id="deviceLoadingText" style="font-size:0.9em;"></span>
             </div>
-            <button id="loadDeviceModelBtn">Load Model</button>
         </div>
-        <div class="card">
-            <h2>Request Pattern</h2>
-            <select id="patternSelect">
                 <option value="once-per-sec">1 request / sec</option>
                 <option value="every-ten-sec">Every 10 sec 1 request</option>
                 <option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
                 <option disabled value="burst">(not implemented) Burst: 50 then idle</option>
             </select>
-            <label>Route strategy
-                <select id="routeStrategy">
                     <option value="roundrobin">Round Robin</option>
                     <option value="probabilistic">Probabilistic (p to cloud)</option>
-                    <option value="always_cloud">Always cloud</option>
                     <option value="always_device">Always device</option>
                 </select>
             </label>
-            <label>Cloud probability (for probabilistic) <input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5"/></label>
-            <div class="buttons">
-                <button id="startBtn">Start</button>
-                <button id="stopBtn" disabled>Stop</button>
             </div>
         </div>
-        <div class="card wide">
-            <h2>Live Log & Results</h2>
-            <div id="log" class="log"></div>
-            <div id="stats"></div>
-            <button id="downloadStats">Download Statistics</button>
         </div>
     </section>
 </main>
 <script type="module" src="./src/main.js"></script>
 </body>
-</html>

     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1" />
     <title>Browser LLM Evaluation</title>
+    <script src="https://cdn.tailwindcss.com"></script>
 </head>
+<body class="bg-gray-100 text-gray-900 min-h-screen">
+<main class="max-w-6xl mx-auto p-6">
+    <h1 class="text-3xl font-bold mb-6 text-center">Browser LLM Evaluation</h1>
+    <section class="grid grid-cols-1 md:grid-cols-3 gap-6">
+        <!-- Cloud Card -->
+        <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
+            <h2 class="text-xl font-semibold mb-4">Cloud (OpenRouter)</h2>
+            <label class="block mb-4 text-sm font-medium">API Key
+                <input id="cloudApiKey" type="text" placeholder="Key..." class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
+            </label>
+            <select id="cloudModel" class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
+                <option value="openai/gpt-4o-mini">openai/gpt-4o-mini</option>
+            </select>
         </div>
+        <!-- On-Device Card -->
+        <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
+            <h2 class="text-xl font-semibold mb-4">On-Device</h2>
+            <label class="block mb-4 text-sm font-medium">Model (transformers.js)
+                <input id="deviceModel" value="Xenova/distilgpt2" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
+            </label>
+            <div id="deviceStatus" class="text-gray-700 text-sm mb-2">Not loaded</div>
+            <button id="loadDeviceModelBtn" class="mt-4 w-full bg-blue-600 text-white py-2 rounded-lg hover:bg-blue-700 transition">Load Model</button>
+            <div id="deviceLoadingContainer" class="w-full max-w-xs my-2">
+                <div id="deviceLoadingBar" class="h-2 bg-green-500 transition-all duration-200 w-0"></div>
+                <span id="deviceLoadingText" class="text-xs text-gray-600"></span>
             </div>
         </div>
+        <!-- Request Pattern Card -->
+        <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
+            <h2 class="text-xl font-semibold mb-4">Request Pattern</h2>
+            <select id="patternSelect" class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
                 <option value="once-per-sec">1 request / sec</option>
                 <option value="every-ten-sec">Every 10 sec 1 request</option>
                 <option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
                 <option disabled value="burst">(not implemented) Burst: 50 then idle</option>
             </select>
+            <label class="block mb-4 text-sm font-medium">Route strategy
+                <select id="routeStrategy" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
+                    <option value="always_cloud">Always cloud</option>
                     <option value="roundrobin">Round Robin</option>
                     <option value="probabilistic">Probabilistic (p to cloud)</option>
                     <option value="always_device">Always device</option>
                 </select>
             </label>
+            <label class="block mb-4 text-sm font-medium">Cloud probability (for probabilistic)
+                <input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
+            </label>
+            <div class="flex gap-3 mt-4">
+                <button id="startBtn" class="flex-1 bg-green-600 text-white py-2 rounded-lg hover:bg-green-700 transition">Start</button>
+                <button id="stopBtn" disabled class="flex-1 bg-gray-400 text-white py-2 rounded-lg">Stop</button>
             </div>
         </div>
+        <!-- Log Card -->
+        <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200 md:col-span-3">
+            <h2 class="text-xl font-semibold mb-4">Live Log & Results</h2>
+            <div id="log" class="h-64 overflow-auto bg-gray-50 p-3 rounded-lg border border-gray-200 text-sm"></div>
+            <div id="stats" class="mt-4 text-sm text-gray-800"></div>
+            <button id="downloadStats" class="mt-4 w-full bg-purple-600 text-white py-2 rounded-lg hover:bg-purple-700 transition">Download Statistics</button>
         </div>
     </section>
 </main>
 <script type="module" src="./src/main.js"></script>
 </body>
+</html>

src/main.js CHANGED Viewed

@@ -14,13 +14,13 @@ const deviceStatusEl = document.getElementById('deviceStatus');
 // instantiate services and components
 const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
-const cloudInferenceService = new CloudService({apiKey: '', model: document.getElementById('cloudModel').value});
 const evaluator = new Evaluator();
 const requestManager = new RequestManager({
     deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
-        logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
         updateStats();
     }
 });
@@ -42,7 +42,7 @@ document.getElementById('deviceModel').addEventListener('change', (e) =>
 document.getElementById('cloudModel').addEventListener('change', (e) =>
     cloudInferenceService.updateConfig({model: e.target.value})
 );
-document.getElementById('apiKey').addEventListener('input', (e) =>
     cloudInferenceService.updateConfig({apiKey: e.target.value})
 );
@@ -90,6 +90,7 @@ document.getElementById('loadDeviceModelBtn').addEventListener('click', () => {
 async function loadDeviceModel() {
     deviceStatusEl.textContent = 'Loading...';
     document.getElementById('loadDeviceModelBtn').disabled = true;
     const loadingBar = document.getElementById('deviceLoadingBar');
     const loadingText = document.getElementById('deviceLoadingText');
     loadingBar.style.width = '0%';

 // instantiate services and components
 const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
+const cloudInferenceService = new CloudService({apiKey: document.getElementById('cloudApiKey').value, model: document.getElementById('cloudModel').value});
 const evaluator = new Evaluator();
 const requestManager = new RequestManager({
     deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
+        logTo(logEl, `${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
         updateStats();
     }
 });
 document.getElementById('cloudModel').addEventListener('change', (e) =>
     cloudInferenceService.updateConfig({model: e.target.value})
 );
+document.getElementById('cloudApiKey').addEventListener('input', (e) =>
     cloudInferenceService.updateConfig({apiKey: e.target.value})
 );
 async function loadDeviceModel() {
     deviceStatusEl.textContent = 'Loading...';
     document.getElementById('loadDeviceModelBtn').disabled = true;
+    document.getElementById('loadDeviceModelBtn').textContent = 'Loading Model...';
     const loadingBar = document.getElementById('deviceLoadingBar');
     const loadingText = document.getElementById('deviceLoadingText');
     loadingBar.style.width = '0%';

src/requestManager.js CHANGED Viewed

@@ -95,9 +95,11 @@ export class RequestManager {
         const route = this._choose(job);
         const service = this._getInferenceService(route);
         let text, latencyMs;
         try {
-            const {res, ms} = await measureAsync(() => service.infer(job.prompt));
             text = res;
             latencyMs = ms;
         } catch (err) {

         const route = this._choose(job);
         const service = this._getInferenceService(route);
+        const full_prompt = "Please answer the following question with True or False: " + job.prompt + "\nAnswer: "; // ensure string input
         let text, latencyMs;
         try {
+            const {res, ms} = await measureAsync(() => service.infer(full_prompt));
             text = res;
             latencyMs = ms;
         } catch (err) {

src/scheduler.js CHANGED Viewed

@@ -42,7 +42,7 @@ export class JobScheduler {
             }
         } else if (patternName === 'every-ten-sec') {
             let i = 0;
-            const interval = 100; // ms
             while (this._dataset.length > 0 && this.running) {
                 const item = this._dataset.pop();
                 this._emit(item);

             }
         } else if (patternName === 'every-ten-sec') {
             let i = 0;
+            const interval = 10000; // ms
             while (this._dataset.length > 0 && this.running) {
                 const item = this._dataset.pop();
                 this._emit(item);

src/services/cloudService.js CHANGED Viewed

@@ -1,5 +1,4 @@
 // CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
 /**
  * Cloud inference service using a remote API from OpenRouter to access different models over one API.
  *
@@ -7,7 +6,7 @@
 export class CloudService {
     constructor({apiKey, model} = {}) {
         this.apiKey = apiKey;
-        this.model = model || 'gpt-4o-mini';
     }
@@ -32,13 +31,15 @@ export class CloudService {
     async infer(prompt) {
         if (!this.apiKey) throw new Error('No API key set for CloudService');
         const payload = {
             model: this.model,
             messages: [{role: 'user', content: prompt}]
         };
         // call the api
-        const resp = await fetch('https://api.openrouter.ai/v1/chat/completions', {
             method: 'POST',
             headers: {
                 'Content-Type': 'application/json',
@@ -66,6 +67,13 @@ export class CloudService {
         } catch (e) {
             text = JSON.stringify(json).slice(0, 200);
         }
-        return text;
     }
 }

 // CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
 /**
  * Cloud inference service using a remote API from OpenRouter to access different models over one API.
  *
 export class CloudService {
     constructor({apiKey, model} = {}) {
         this.apiKey = apiKey;
+        this.model = model;
     }
     async infer(prompt) {
         if (!this.apiKey) throw new Error('No API key set for CloudService');
+        // prepare payload with prompt
         const payload = {
             model: this.model,
+            max_tokens: 50,
             messages: [{role: 'user', content: prompt}]
         };
         // call the api
+        const resp = await fetch('https://openrouter.ai/api/v1/chat/completions', {
             method: 'POST',
             headers: {
                 'Content-Type': 'application/json',
         } catch (e) {
             text = JSON.stringify(json).slice(0, 200);
         }
+        return {
+            answer: text,
+            stats: {
+                input_tokens: json.usage?.prompt_tokens || 0,
+                output_tokens: json.usage?.completion_tokens || 0
+            }
+        };
     }
 }

src/services/onDeviceService.js CHANGED Viewed

@@ -70,7 +70,6 @@ export class OnDeviceService {
             console.log("model not ready:" , this._ready, this._model);
             throw new Error('Model not loaded. Call load() first.');
         }
-        prompt = "Please answer the following question: " + prompt + "\nAnswer: "; // ensure string input
         console.log("running inference on-device:\n", prompt);
         const output = await this._model(prompt, {
@@ -82,8 +81,10 @@ export class OnDeviceService {
             num_return_sequences: 1,
         });
-        // Return generated text
-        return output[0]?.generated_text?.trim() || '';
     }
     /**

             console.log("model not ready:" , this._ready, this._model);
             throw new Error('Model not loaded. Call load() first.');
         }
         console.log("running inference on-device:\n", prompt);
         const output = await this._model(prompt, {
             num_return_sequences: 1,
         });
+        const text = output[0]?.generated_text?.trim() || '';
+        // todo calculate input and output tokens
+        return {answer: text, stats: {input_tokens: undefined, output_tokens: undefined}};
     }
     /**