feat: implement OpenRouter as cloud model provider, optimize UI, fix some issues
Browse files- index.html +51 -38
- src/main.js +4 -3
- src/requestManager.js +3 -1
- src/scheduler.js +1 -1
- src/services/cloudService.js +12 -4
- src/services/onDeviceService.js +4 -3
index.html
CHANGED
|
@@ -4,70 +4,83 @@
|
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
<title>Browser LLM Evaluation</title>
|
| 7 |
-
<
|
| 8 |
-
<!-- Xenova transformers.js module import (no global script) -->
|
| 9 |
-
<!-- Remove HuggingFace CDN, use ES module import in main.js -->
|
| 10 |
</head>
|
| 11 |
-
<body>
|
| 12 |
-
<main class="
|
| 13 |
-
<h1>Browser LLM Evaluation</h1>
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
<section class="grid">
|
| 17 |
-
<div class="card">
|
| 18 |
-
<h2>Cloud (OpenRouter)</h2>
|
| 19 |
-
<label>API Key <input id="apiKey" type="password" placeholder="sk-..." /></label>
|
| 20 |
-
<label>Model <input id="cloudModel" value="gpt-4o-mini" /></label>
|
| 21 |
</div>
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
<
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
<div id="deviceLoadingBar" style="width:0%;height:8px;background:#4caf50;transition:width 0.2s;"></div>
|
| 30 |
-
<span id="deviceLoadingText" style="font-size:0.9em;"></span>
|
| 31 |
</div>
|
| 32 |
-
|
| 33 |
</div>
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
<h2>Request Pattern</h2>
|
| 38 |
-
<select id="patternSelect">
|
| 39 |
<option value="once-per-sec">1 request / sec</option>
|
| 40 |
<option value="every-ten-sec">Every 10 sec 1 request</option>
|
| 41 |
<option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
|
| 42 |
<option disabled value="burst">(not implemented) Burst: 50 then idle</option>
|
| 43 |
</select>
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
<option value="roundrobin">Round Robin</option>
|
| 47 |
<option value="probabilistic">Probabilistic (p to cloud)</option>
|
| 48 |
-
<option value="always_cloud">Always cloud</option>
|
| 49 |
<option value="always_device">Always device</option>
|
| 50 |
</select>
|
| 51 |
</label>
|
| 52 |
-
|
| 53 |
-
<
|
| 54 |
-
<
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
</div>
|
| 57 |
</div>
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
<
|
| 62 |
-
<div id="
|
| 63 |
-
<
|
|
|
|
| 64 |
</div>
|
| 65 |
</section>
|
| 66 |
-
|
| 67 |
-
|
| 68 |
</main>
|
| 69 |
|
| 70 |
-
|
| 71 |
<script type="module" src="./src/main.js"></script>
|
| 72 |
</body>
|
| 73 |
-
</html>
|
|
|
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
<title>Browser LLM Evaluation</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
|
|
|
|
|
|
| 8 |
</head>
|
| 9 |
+
<body class="bg-gray-100 text-gray-900 min-h-screen">
|
| 10 |
+
<main class="max-w-6xl mx-auto p-6">
|
| 11 |
+
<h1 class="text-3xl font-bold mb-6 text-center">Browser LLM Evaluation</h1>
|
| 12 |
|
| 13 |
+
<section class="grid grid-cols-1 md:grid-cols-3 gap-6">
|
| 14 |
+
<!-- Cloud Card -->
|
| 15 |
+
<div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
|
| 16 |
+
<h2 class="text-xl font-semibold mb-4">Cloud (OpenRouter)</h2>
|
| 17 |
+
<label class="block mb-4 text-sm font-medium">API Key
|
| 18 |
+
<input id="cloudApiKey" type="text" placeholder="Key..." class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
|
| 19 |
+
</label>
|
| 20 |
+
<select id="cloudModel" class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
|
| 21 |
+
<option value="openai/gpt-4o-mini">openai/gpt-4o-mini</option>
|
| 22 |
+
</select>
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
</div>
|
| 25 |
|
| 26 |
+
<!-- On-Device Card -->
|
| 27 |
+
<div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
|
| 28 |
+
<h2 class="text-xl font-semibold mb-4">On-Device</h2>
|
| 29 |
+
<label class="block mb-4 text-sm font-medium">Model (transformers.js)
|
| 30 |
+
<input id="deviceModel" value="Xenova/distilgpt2" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
|
| 31 |
+
</label>
|
| 32 |
+
|
| 33 |
+
<div id="deviceStatus" class="text-gray-700 text-sm mb-2">Not loaded</div>
|
| 34 |
|
| 35 |
+
<button id="loadDeviceModelBtn" class="mt-4 w-full bg-blue-600 text-white py-2 rounded-lg hover:bg-blue-700 transition">Load Model</button>
|
| 36 |
+
|
| 37 |
+
<div id="deviceLoadingContainer" class="w-full max-w-xs my-2">
|
| 38 |
+
<div id="deviceLoadingBar" class="h-2 bg-green-500 transition-all duration-200 w-0"></div>
|
| 39 |
+
<span id="deviceLoadingText" class="text-xs text-gray-600"></span>
|
|
|
|
|
|
|
| 40 |
</div>
|
| 41 |
+
|
| 42 |
</div>
|
| 43 |
|
| 44 |
+
<!-- Request Pattern Card -->
|
| 45 |
+
<div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200">
|
| 46 |
+
<h2 class="text-xl font-semibold mb-4">Request Pattern</h2>
|
| 47 |
|
| 48 |
+
<select id="patternSelect" class="w-full mb-4 px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
|
|
|
|
|
|
|
| 49 |
<option value="once-per-sec">1 request / sec</option>
|
| 50 |
<option value="every-ten-sec">Every 10 sec 1 request</option>
|
| 51 |
<option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
|
| 52 |
<option disabled value="burst">(not implemented) Burst: 50 then idle</option>
|
| 53 |
</select>
|
| 54 |
+
|
| 55 |
+
<label class="block mb-4 text-sm font-medium">Route strategy
|
| 56 |
+
<select id="routeStrategy" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none">
|
| 57 |
+
<option value="always_cloud">Always cloud</option>
|
| 58 |
<option value="roundrobin">Round Robin</option>
|
| 59 |
<option value="probabilistic">Probabilistic (p to cloud)</option>
|
|
|
|
| 60 |
<option value="always_device">Always device</option>
|
| 61 |
</select>
|
| 62 |
</label>
|
| 63 |
+
|
| 64 |
+
<label class="block mb-4 text-sm font-medium">Cloud probability (for probabilistic)
|
| 65 |
+
<input id="cloudProb" type="number" min="0" max="1" step="0.1" value="0.5" class="mt-1 w-full px-3 py-2 rounded-lg border border-gray-300 focus:ring-2 focus:ring-blue-500 focus:outline-none" />
|
| 66 |
+
</label>
|
| 67 |
+
|
| 68 |
+
<div class="flex gap-3 mt-4">
|
| 69 |
+
<button id="startBtn" class="flex-1 bg-green-600 text-white py-2 rounded-lg hover:bg-green-700 transition">Start</button>
|
| 70 |
+
<button id="stopBtn" disabled class="flex-1 bg-gray-400 text-white py-2 rounded-lg">Stop</button>
|
| 71 |
</div>
|
| 72 |
</div>
|
| 73 |
|
| 74 |
+
<!-- Log Card -->
|
| 75 |
+
<div class="bg-white p-6 rounded-2xl shadow-xl border border-gray-200 md:col-span-3">
|
| 76 |
+
<h2 class="text-xl font-semibold mb-4">Live Log & Results</h2>
|
| 77 |
+
<div id="log" class="h-64 overflow-auto bg-gray-50 p-3 rounded-lg border border-gray-200 text-sm"></div>
|
| 78 |
+
<div id="stats" class="mt-4 text-sm text-gray-800"></div>
|
| 79 |
+
<button id="downloadStats" class="mt-4 w-full bg-purple-600 text-white py-2 rounded-lg hover:bg-purple-700 transition">Download Statistics</button>
|
| 80 |
</div>
|
| 81 |
</section>
|
|
|
|
|
|
|
| 82 |
</main>
|
| 83 |
|
|
|
|
| 84 |
<script type="module" src="./src/main.js"></script>
|
| 85 |
</body>
|
| 86 |
+
</html>
|
src/main.js
CHANGED
|
@@ -14,13 +14,13 @@ const deviceStatusEl = document.getElementById('deviceStatus');
|
|
| 14 |
|
| 15 |
// instantiate services and components
|
| 16 |
const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
|
| 17 |
-
const cloudInferenceService = new CloudService({apiKey: '', model: document.getElementById('cloudModel').value});
|
| 18 |
const evaluator = new Evaluator();
|
| 19 |
|
| 20 |
|
| 21 |
const requestManager = new RequestManager({
|
| 22 |
deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
|
| 23 |
-
logTo(logEl, `${evt.
|
| 24 |
updateStats();
|
| 25 |
}
|
| 26 |
});
|
|
@@ -42,7 +42,7 @@ document.getElementById('deviceModel').addEventListener('change', (e) =>
|
|
| 42 |
document.getElementById('cloudModel').addEventListener('change', (e) =>
|
| 43 |
cloudInferenceService.updateConfig({model: e.target.value})
|
| 44 |
);
|
| 45 |
-
document.getElementById('
|
| 46 |
cloudInferenceService.updateConfig({apiKey: e.target.value})
|
| 47 |
);
|
| 48 |
|
|
@@ -90,6 +90,7 @@ document.getElementById('loadDeviceModelBtn').addEventListener('click', () => {
|
|
| 90 |
async function loadDeviceModel() {
|
| 91 |
deviceStatusEl.textContent = 'Loading...';
|
| 92 |
document.getElementById('loadDeviceModelBtn').disabled = true;
|
|
|
|
| 93 |
const loadingBar = document.getElementById('deviceLoadingBar');
|
| 94 |
const loadingText = document.getElementById('deviceLoadingText');
|
| 95 |
loadingBar.style.width = '0%';
|
|
|
|
| 14 |
|
| 15 |
// instantiate services and components
|
| 16 |
const onDeviceInferenceService = new OnDeviceService({modelName: document.getElementById('deviceModel').value});
|
| 17 |
+
const cloudInferenceService = new CloudService({apiKey: document.getElementById('cloudApiKey').value, model: document.getElementById('cloudModel').value});
|
| 18 |
const evaluator = new Evaluator();
|
| 19 |
|
| 20 |
|
| 21 |
const requestManager = new RequestManager({
|
| 22 |
deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
|
| 23 |
+
logTo(logEl, `${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
|
| 24 |
updateStats();
|
| 25 |
}
|
| 26 |
});
|
|
|
|
| 42 |
document.getElementById('cloudModel').addEventListener('change', (e) =>
|
| 43 |
cloudInferenceService.updateConfig({model: e.target.value})
|
| 44 |
);
|
| 45 |
+
document.getElementById('cloudApiKey').addEventListener('input', (e) =>
|
| 46 |
cloudInferenceService.updateConfig({apiKey: e.target.value})
|
| 47 |
);
|
| 48 |
|
|
|
|
| 90 |
async function loadDeviceModel() {
|
| 91 |
deviceStatusEl.textContent = 'Loading...';
|
| 92 |
document.getElementById('loadDeviceModelBtn').disabled = true;
|
| 93 |
+
document.getElementById('loadDeviceModelBtn').textContent = 'Loading Model...';
|
| 94 |
const loadingBar = document.getElementById('deviceLoadingBar');
|
| 95 |
const loadingText = document.getElementById('deviceLoadingText');
|
| 96 |
loadingBar.style.width = '0%';
|
src/requestManager.js
CHANGED
|
@@ -95,9 +95,11 @@ export class RequestManager {
|
|
| 95 |
const route = this._choose(job);
|
| 96 |
const service = this._getInferenceService(route);
|
| 97 |
|
|
|
|
|
|
|
| 98 |
let text, latencyMs;
|
| 99 |
try {
|
| 100 |
-
const {res, ms} = await measureAsync(() => service.infer(
|
| 101 |
text = res;
|
| 102 |
latencyMs = ms;
|
| 103 |
} catch (err) {
|
|
|
|
| 95 |
const route = this._choose(job);
|
| 96 |
const service = this._getInferenceService(route);
|
| 97 |
|
| 98 |
+
const full_prompt = "Please answer the following question with True or False: " + job.prompt + "\nAnswer: "; // ensure string input
|
| 99 |
+
|
| 100 |
let text, latencyMs;
|
| 101 |
try {
|
| 102 |
+
const {res, ms} = await measureAsync(() => service.infer(full_prompt));
|
| 103 |
text = res;
|
| 104 |
latencyMs = ms;
|
| 105 |
} catch (err) {
|
src/scheduler.js
CHANGED
|
@@ -42,7 +42,7 @@ export class JobScheduler {
|
|
| 42 |
}
|
| 43 |
} else if (patternName === 'every-ten-sec') {
|
| 44 |
let i = 0;
|
| 45 |
-
const interval =
|
| 46 |
while (this._dataset.length > 0 && this.running) {
|
| 47 |
const item = this._dataset.pop();
|
| 48 |
this._emit(item);
|
|
|
|
| 42 |
}
|
| 43 |
} else if (patternName === 'every-ten-sec') {
|
| 44 |
let i = 0;
|
| 45 |
+
const interval = 10000; // ms
|
| 46 |
while (this._dataset.length > 0 && this.running) {
|
| 47 |
const item = this._dataset.pop();
|
| 48 |
this._emit(item);
|
src/services/cloudService.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
// CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
|
| 2 |
-
|
| 3 |
/**
|
| 4 |
* Cloud inference service using a remote API from OpenRouter to access different models over one API.
|
| 5 |
*
|
|
@@ -7,7 +6,7 @@
|
|
| 7 |
export class CloudService {
|
| 8 |
constructor({apiKey, model} = {}) {
|
| 9 |
this.apiKey = apiKey;
|
| 10 |
-
this.model = model
|
| 11 |
}
|
| 12 |
|
| 13 |
|
|
@@ -32,13 +31,15 @@ export class CloudService {
|
|
| 32 |
async infer(prompt) {
|
| 33 |
if (!this.apiKey) throw new Error('No API key set for CloudService');
|
| 34 |
|
|
|
|
| 35 |
const payload = {
|
| 36 |
model: this.model,
|
|
|
|
| 37 |
messages: [{role: 'user', content: prompt}]
|
| 38 |
};
|
| 39 |
|
| 40 |
// call the api
|
| 41 |
-
const resp = await fetch('https://
|
| 42 |
method: 'POST',
|
| 43 |
headers: {
|
| 44 |
'Content-Type': 'application/json',
|
|
@@ -66,6 +67,13 @@ export class CloudService {
|
|
| 66 |
} catch (e) {
|
| 67 |
text = JSON.stringify(json).slice(0, 200);
|
| 68 |
}
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
}
|
| 71 |
}
|
|
|
|
| 1 |
// CloudService: example OpenRouter integration. Replace endpoint/payload per provider.
|
|
|
|
| 2 |
/**
|
| 3 |
* Cloud inference service using a remote API from OpenRouter to access different models over one API.
|
| 4 |
*
|
|
|
|
| 6 |
export class CloudService {
|
| 7 |
constructor({apiKey, model} = {}) {
|
| 8 |
this.apiKey = apiKey;
|
| 9 |
+
this.model = model;
|
| 10 |
}
|
| 11 |
|
| 12 |
|
|
|
|
| 31 |
async infer(prompt) {
|
| 32 |
if (!this.apiKey) throw new Error('No API key set for CloudService');
|
| 33 |
|
| 34 |
+
// prepare payload with prompt
|
| 35 |
const payload = {
|
| 36 |
model: this.model,
|
| 37 |
+
max_tokens: 50,
|
| 38 |
messages: [{role: 'user', content: prompt}]
|
| 39 |
};
|
| 40 |
|
| 41 |
// call the api
|
| 42 |
+
const resp = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
| 43 |
method: 'POST',
|
| 44 |
headers: {
|
| 45 |
'Content-Type': 'application/json',
|
|
|
|
| 67 |
} catch (e) {
|
| 68 |
text = JSON.stringify(json).slice(0, 200);
|
| 69 |
}
|
| 70 |
+
|
| 71 |
+
return {
|
| 72 |
+
answer: text,
|
| 73 |
+
stats: {
|
| 74 |
+
input_tokens: json.usage?.prompt_tokens || 0,
|
| 75 |
+
output_tokens: json.usage?.completion_tokens || 0
|
| 76 |
+
}
|
| 77 |
+
};
|
| 78 |
}
|
| 79 |
}
|
src/services/onDeviceService.js
CHANGED
|
@@ -70,7 +70,6 @@ export class OnDeviceService {
|
|
| 70 |
console.log("model not ready:" , this._ready, this._model);
|
| 71 |
throw new Error('Model not loaded. Call load() first.');
|
| 72 |
}
|
| 73 |
-
prompt = "Please answer the following question: " + prompt + "\nAnswer: "; // ensure string input
|
| 74 |
console.log("running inference on-device:\n", prompt);
|
| 75 |
|
| 76 |
const output = await this._model(prompt, {
|
|
@@ -82,8 +81,10 @@ export class OnDeviceService {
|
|
| 82 |
num_return_sequences: 1,
|
| 83 |
});
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
| 87 |
}
|
| 88 |
|
| 89 |
/**
|
|
|
|
| 70 |
console.log("model not ready:" , this._ready, this._model);
|
| 71 |
throw new Error('Model not loaded. Call load() first.');
|
| 72 |
}
|
|
|
|
| 73 |
console.log("running inference on-device:\n", prompt);
|
| 74 |
|
| 75 |
const output = await this._model(prompt, {
|
|
|
|
| 81 |
num_return_sequences: 1,
|
| 82 |
});
|
| 83 |
|
| 84 |
+
const text = output[0]?.generated_text?.trim() || '';
|
| 85 |
+
|
| 86 |
+
// todo calculate input and output tokens
|
| 87 |
+
return {answer: text, stats: {input_tokens: undefined, output_tokens: undefined}};
|
| 88 |
}
|
| 89 |
|
| 90 |
/**
|