fhueni commited on
Commit
9674cf0
·
1 Parent(s): b1ed689

feat: implement OpenRouter as cloud model provider, optimize UI, fix some issues

Browse files
index.html CHANGED
@@ -4,70 +4,83 @@
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1" />
6
  <title>Browser LLM Evaluation</title>
7
- <link rel="stylesheet" href="styles.css">
8
- <!-- Xenova transformers.js module import (no global script) -->
9
- <!-- Remove HuggingFace CDN, use ES module import in main.js -->
10
  </head>
11
- <body>
12
- <main class="container">
13
- <h1>Browser LLM Evaluation</h1>
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- <section class="grid">
17
- <div class="card">
18
- <h2>Cloud (OpenRouter)</h2>
19
- <label>API Key <input id="apiKey" type="password" placeholder="sk-..." /></label>
20
- <label>Model <input id="cloudModel" value="gpt-4o-mini" /></label>
21
  </div>
22
 
 
 
 
 
 
 
 
 
23
 
24
- <div class="card">
25
- <h2>On-Device</h2>
26
- <label>Model (transformers.js) <input id="deviceModel" value="Xenova/distilgpt2" /></label>
27
- <div id="deviceStatus">Not loaded</div>
28
- <div id="deviceLoadingContainer" style="margin:8px 0; width:100%; max-width:300px;">
29
- <div id="deviceLoadingBar" style="width:0%;height:8px;background:#4caf50;transition:width 0.2s;"></div>
30
- <span id="deviceLoadingText" style="font-size:0.9em;"></span>
31
  </div>
32
- <button id="loadDeviceModelBtn">Load Model</button>
33
  </div>
34
 
 
 
 
35
 
36
- <div class="card">
37
- <h2>Request Pattern</h2>
38
- <select id="patternSelect">
39
  <option value="once-per-sec">1 request / sec</option>
40
  <option value="every-ten-sec">Every 10 sec 1 request</option>
41
  <option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
42
  <option disabled value="burst">(not implemented) Burst: 50 then idle</option>
43
  </select>
44
- <label>Route strategy
45
- <select id="routeStrategy">
 
 
46
  <option value="roundrobin">Round Robin</option>
47
  <option value="probabilistic">Probabilistic (p to cloud)</option>
48
- <option value="always_cloud">Always cloud</option>
49
  <option value="always_device">Always device</option>
50
  </select>
51
  </label>
52
- <label>Cloud probability (for probabilistic) <input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5"/></label>
53
- <div class="buttons">
54
- <button id="startBtn">Start</button>
55
- <button id="stopBtn" disabled>Stop</button>
 
 
 
 
56
  </div>
57
  </div>
58
 
59
- <div class="card wide">
60
- <h2>Live Log & Results</h2>
61
- <div id="log" class="log"></div>
62
- <div id="stats"></div>
63
- <button id="downloadStats">Download Statistics</button>
 
64
  </div>
65
  </section>
66
-
67
-
68
  </main>
69
 
70
-
71
  <script type="module" src="./src/main.js"></script>
72
  </body>
73
- </html>
 
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1" />
6
  <title>Browser LLM Evaluation</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
 
 
8
  </head>
9
+ <body class="bg-gray-100 text-gray-900 min-h-screen">
10
+ <main class="max-w-6xl mx-auto p-6">
11
+ <h1 class="text-3xl font-bold mb-6 text-center">Browser LLM Evaluation</h1>
12
 
13
+ <section class="grid grid-cols-1 md:grid-cols-3 gap-6">
14
+ <!-- Cloud Card -->
15
+ <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
16
+ <h2 class="text-xl font-semibold mb-4">Cloud (OpenRouter)</h2>
17
+ <label class="block mb-4 text-sm font-medium">API Key
18
+ <input id="cloudApiKey" type="text" placeholder="Key..." class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
19
+ </label>
20
+ <select id="cloudModel" class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
21
+ <option value="openai/gpt-4o-mini">openai/gpt-4o-mini</option>
22
+ </select>
23
 
 
 
 
 
 
24
  </div>
25
 
26
+ <!-- On-Device Card -->
27
+ <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
28
+ <h2 class="text-xl font-semibold mb-4">On-Device</h2>
29
+ <label class="block mb-4 text-sm font-medium">Model (transformers.js)
30
+ <input id="deviceModel" value="Xenova/distilgpt2" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
31
+ </label>
32
+
33
+ <div id="deviceStatus" class="text-gray-700 text-sm mb-2">Not loaded</div>
34
 
35
+ <button id="loadDeviceModelBtn" class="mt-4 w-full bg-blue-600 text-white py-2 rounded-lg hover:bg-blue-700 transition">Load Model</button>
36
+
37
+ <div id="deviceLoadingContainer" class="w-full max-w-xs my-2">
38
+ <div id="deviceLoadingBar" class="h-2 bg-green-500 transition-all duration-200 w-0"></div>
39
+ <span id="deviceLoadingText" class="text-xs text-gray-600"></span>
 
 
40
  </div>
41
+
42
  </div>
43
 
44
+ <!-- Request Pattern Card -->
45
+ <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
46
+ <h2 class="text-xl font-semibold mb-4">Request Pattern</h2>
47
 
48
+ <select id="patternSelect" class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
 
 
49
  <option value="once-per-sec">1 request / sec</option>
50
  <option value="every-ten-sec">Every 10 sec 1 request</option>
51
  <option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
52
  <option disabled value="burst">(not implemented) Burst: 50 then idle</option>
53
  </select>
54
+
55
+ <label class="block mb-4 text-sm font-medium">Route strategy
56
+ <select id="routeStrategy" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
57
+ <option value="always_cloud">Always cloud</option>
58
  <option value="roundrobin">Round Robin</option>
59
  <option value="probabilistic">Probabilistic (p to cloud)</option>
 
60
  <option value="always_device">Always device</option>
61
  </select>
62
  </label>
63
+
64
+ <label class="block mb-4 text-sm font-medium">Cloud probability (for probabilistic)
65
+ <input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
66
+ </label>
67
+
68
+ <div class="flex gap-3 mt-4">
69
+ <button id="startBtn" class="flex-1 bg-green-600 text-white py-2 rounded-lg hover:bg-green-700 transition">Start</button>
70
+ <button id="stopBtn" disabled class="flex-1 bg-gray-400 text-white py-2 rounded-lg">Stop</button>
71
  </div>
72
  </div>
73
 
74
+ <!-- Log Card -->
75
+ <div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200 md:col-span-3">
76
+ <h2 class="text-xl font-semibold mb-4">Live Log & Results</h2>
77
+ <div id="log" class="h-64 overflow-auto bg-gray-50 p-3 rounded-lg border border-gray-200 text-sm"></div>
78
+ <div id="stats" class="mt-4 text-sm text-gray-800"></div>
79
+ <button id="downloadStats" class="mt-4 w-full bg-purple-600 text-white py-2 rounded-lg hover:bg-purple-700 transition">Download Statistics</button>
80
  </div>
81
  </section>
 
 
82
  </main>
83
 
 
84
  <script type="module" src="./src/main.js"></script>
85
  </body>
86
+ </html>
src/main.js CHANGED
@@ -14,13 +14,13 @@ const deviceStatusEl = document.getElementById('deviceStatus');
14
 
15
  // instantiate services and components
16
  const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
17
- const cloudInferenceService = new CloudService({apiKey: '', model: document.getElementById('cloudModel').value});
18
  const evaluator = new Evaluator();
19
 
20
 
21
  const requestManager = new RequestManager({
22
  deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
23
- logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
24
  updateStats();
25
  }
26
  });
@@ -42,7 +42,7 @@ document.getElementById('deviceModel').addEventListener('change', (e) =>
42
  document.getElementById('cloudModel').addEventListener('change', (e) =>
43
  cloudInferenceService.updateConfig({model: e.target.value})
44
  );
45
- document.getElementById('apiKey').addEventListener('input', (e) =>
46
  cloudInferenceService.updateConfig({apiKey: e.target.value})
47
  );
48
 
@@ -90,6 +90,7 @@ document.getElementById('loadDeviceModelBtn').addEventListener('click', () => {
90
  async function loadDeviceModel() {
91
  deviceStatusEl.textContent = 'Loading...';
92
  document.getElementById('loadDeviceModelBtn').disabled = true;
 
93
  const loadingBar = document.getElementById('deviceLoadingBar');
94
  const loadingText = document.getElementById('deviceLoadingText');
95
  loadingBar.style.width = '0%';
 
14
 
15
  // instantiate services and components
16
  const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
17
+ const cloudInferenceService = new CloudService({apiKey: document.getElementById('cloudApiKey').value, model: document.getElementById('cloudModel').value});
18
  const evaluator = new Evaluator();
19
 
20
 
21
  const requestManager = new RequestManager({
22
  deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
23
+ logTo(logEl, `${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
24
  updateStats();
25
  }
26
  });
 
42
  document.getElementById('cloudModel').addEventListener('change', (e) =>
43
  cloudInferenceService.updateConfig({model: e.target.value})
44
  );
45
+ document.getElementById('cloudApiKey').addEventListener('input', (e) =>
46
  cloudInferenceService.updateConfig({apiKey: e.target.value})
47
  );
48
 
 
90
  async function loadDeviceModel() {
91
  deviceStatusEl.textContent = 'Loading...';
92
  document.getElementById('loadDeviceModelBtn').disabled = true;
93
+ document.getElementById('loadDeviceModelBtn').textContent = 'Loading Model...';
94
  const loadingBar = document.getElementById('deviceLoadingBar');
95
  const loadingText = document.getElementById('deviceLoadingText');
96
  loadingBar.style.width = '0%';
src/requestManager.js CHANGED
@@ -95,9 +95,11 @@ export class RequestManager {
95
  const route = this._choose(job);
96
  const service = this._getInferenceService(route);
97
 
 
 
98
  let text, latencyMs;
99
  try {
100
- const {res, ms} = await measureAsync(() => service.infer(job.prompt));
101
  text = res;
102
  latencyMs = ms;
103
  } catch (err) {
 
95
  const route = this._choose(job);
96
  const service = this._getInferenceService(route);
97
 
98
+ const full_prompt = "Please answer the following question with True or False: " + job.prompt + "\nAnswer: "; // ensure string input
99
+
100
  let text, latencyMs;
101
  try {
102
+ const {res, ms} = await measureAsync(() => service.infer(full_prompt));
103
  text = res;
104
  latencyMs = ms;
105
  } catch (err) {
src/scheduler.js CHANGED
@@ -42,7 +42,7 @@ export class JobScheduler {
42
  }
43
  } else if (patternName === 'every-ten-sec') {
44
  let i = 0;
45
- const interval = 100; // ms
46
  while (this._dataset.length > 0 && this.running) {
47
  const item = this._dataset.pop();
48
  this._emit(item);
 
42
  }
43
  } else if (patternName === 'every-ten-sec') {
44
  let i = 0;
45
+ const interval = 10000; // ms
46
  while (this._dataset.length > 0 && this.running) {
47
  const item = this._dataset.pop();
48
  this._emit(item);
src/services/cloudService.js CHANGED
@@ -1,5 +1,4 @@
1
  // CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
2
-
3
  /**
4
  * Cloud inference service using a remote API from OpenRouter to access different models over one API.
5
  *
@@ -7,7 +6,7 @@
7
  export class CloudService {
8
  constructor({apiKey, model} = {}) {
9
  this.apiKey = apiKey;
10
- this.model = model || 'gpt-4o-mini';
11
  }
12
 
13
 
@@ -32,13 +31,15 @@ export class CloudService {
32
  async infer(prompt) {
33
  if (!this.apiKey) throw new Error('No API key set for CloudService');
34
 
 
35
  const payload = {
36
  model: this.model,
 
37
  messages: [{role: 'user', content: prompt}]
38
  };
39
 
40
  // call the api
41
- const resp = await fetch('https://api.openrouter.ai/v1/chat/completions', {
42
  method: 'POST',
43
  headers: {
44
  'Content-Type': 'application/json',
@@ -66,6 +67,13 @@ export class CloudService {
66
  } catch (e) {
67
  text = JSON.stringify(json).slice(0, 200);
68
  }
69
- return text;
 
 
 
 
 
 
 
70
  }
71
  }
 
1
  // CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
 
2
  /**
3
  * Cloud inference service using a remote API from OpenRouter to access different models over one API.
4
  *
 
6
  export class CloudService {
7
  constructor({apiKey, model} = {}) {
8
  this.apiKey = apiKey;
9
+ this.model = model;
10
  }
11
 
12
 
 
31
  async infer(prompt) {
32
  if (!this.apiKey) throw new Error('No API key set for CloudService');
33
 
34
+ // prepare payload with prompt
35
  const payload = {
36
  model: this.model,
37
+ max_tokens: 50,
38
  messages: [{role: 'user', content: prompt}]
39
  };
40
 
41
  // call the api
42
+ const resp = await fetch('https://openrouter.ai/api/v1/chat/completions', {
43
  method: 'POST',
44
  headers: {
45
  'Content-Type': 'application/json',
 
67
  } catch (e) {
68
  text = JSON.stringify(json).slice(0, 200);
69
  }
70
+
71
+ return {
72
+ answer: text,
73
+ stats: {
74
+ input_tokens: json.usage?.prompt_tokens || 0,
75
+ output_tokens: json.usage?.completion_tokens || 0
76
+ }
77
+ };
78
  }
79
  }
src/services/onDeviceService.js CHANGED
@@ -70,7 +70,6 @@ export class OnDeviceService {
70
  console.log("model not ready:" , this._ready, this._model);
71
  throw new Error('Model not loaded. Call load() first.');
72
  }
73
- prompt = "Please answer the following question: " + prompt + "\nAnswer: "; // ensure string input
74
  console.log("running inference on-device:\n", prompt);
75
 
76
  const output = await this._model(prompt, {
@@ -82,8 +81,10 @@ export class OnDeviceService {
82
  num_return_sequences: 1,
83
  });
84
 
85
- // Return generated text
86
- return output[0]?.generated_text?.trim() || '';
 
 
87
  }
88
 
89
  /**
 
70
  console.log("model not ready:" , this._ready, this._model);
71
  throw new Error('Model not loaded. Call load() first.');
72
  }
 
73
  console.log("running inference on-device:\n", prompt);
74
 
75
  const output = await this._model(prompt, {
 
81
  num_return_sequences: 1,
82
  });
83
 
84
+ const text = output[0]?.generated_text?.trim() || '';
85
+
86
+ // todo calculate input and output tokens
87
+ return {answer: text, stats: {input_tokens: undefined, output_tokens: undefined}};
88
  }
89
 
90
  /**