ggerganov commited on
Commit
04da8a6
·
unverified ·
1 Parent(s): 722327a

close #109 : add fetching of the model over HTTP (whisper.wasm)

Browse files
Files changed (1) hide show
  1. examples/whisper.wasm/index-tmpl.html +207 -11
examples/whisper.wasm/index-tmpl.html CHANGED
@@ -46,7 +46,12 @@
46
 
47
  <div id="model">
48
  Model:
49
- <input type="file" id="file" name="file" onchange="loadFile(event, 'ggml.bin')" />
 
 
 
 
 
50
  </div>
51
 
52
  <br>
@@ -258,6 +263,25 @@
258
  // load model
259
  //
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  function loadFile(event, fname) {
262
  var file = event.target.files[0] || null;
263
  if (file == null) {
@@ -270,19 +294,191 @@
270
  var reader = new FileReader();
271
  reader.onload = function(event) {
272
  var buf = new Uint8Array(reader.result);
 
 
 
 
273
 
274
- // write to WASM file using whisper.FS_createDataFile
275
- // if the file exists, delete it
276
- try {
277
- Module.FS_unlink(fname);
278
- } catch (e) {
 
 
 
 
 
 
279
  }
280
- Module.FS_createDataFile("/", fname, buf, true, true);
281
 
282
- model_fname = file.name;
283
- printTextarea('js: loaded model: ' + model_fname + ' size: ' + buf.length);
 
284
  }
285
- reader.readAsArrayBuffer(file);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
287
 
288
  //
@@ -446,7 +642,7 @@
446
 
447
  function onProcess(translate) {
448
  if (!instance) {
449
- instance = Module.init('ggml.bin');
450
 
451
  if (instance) {
452
  printTextarea("js: whisper initialized, instance: " + instance);
 
46
 
47
  <div id="model">
48
  Model:
49
+ <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
50
+ <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
51
+ <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
52
+ <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
53
+ <span id="fetch-whisper-progress"></span>
54
+ <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
55
  </div>
56
 
57
  <br>
 
263
  // load model
264
  //
265
 
266
+ let dbVersion = 1
267
+ let dbName = 'whisper.ggerganov.com';
268
+ let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
269
+
270
+ function storeFS(fname, buf) {
271
+ // write to WASM file using FS_createDataFile
272
+ // if the file exists, delete it
273
+ try {
274
+ Module.FS_unlink(fname);
275
+ } catch (e) {
276
+ // ignore
277
+ }
278
+
279
+ Module.FS_createDataFile("/", fname, buf, true, true);
280
+
281
+ model_fname = fname;
282
+ printTextarea('js: stored model: ' + fname + ' size: ' + buf.length);
283
+ }
284
+
285
  function loadFile(event, fname) {
286
  var file = event.target.files[0] || null;
287
  if (file == null) {
 
294
  var reader = new FileReader();
295
  reader.onload = function(event) {
296
  var buf = new Uint8Array(reader.result);
297
+ storeFS(fname, buf);
298
+ }
299
+ reader.readAsArrayBuffer(file);
300
+ }
301
 
302
+ // fetch a remote file from remote URL using the Fetch API
303
+ async function fetchRemote(url, elProgress) {
304
+ printTextarea('js: downloading with fetch()...');
305
+
306
+ const response = await fetch(
307
+ url,
308
+ {
309
+ method: 'GET',
310
+ headers: {
311
+ 'Content-Type': 'application/octet-stream',
312
+ },
313
  }
314
+ );
315
 
316
+ if (!response.ok) {
317
+ printTextarea('js: failed to fetch ' + url);
318
+ return;
319
  }
320
+
321
+ const contentLength = response.headers.get('content-length');
322
+ const total = parseInt(contentLength, 10);
323
+ const reader = response.body.getReader();
324
+
325
+ var chunks = [];
326
+ var receivedLength = 0;
327
+ var progressLast = -1;
328
+
329
+ while (true) {
330
+ const { done, value } = await reader.read();
331
+
332
+ if (done) {
333
+ break;
334
+ }
335
+
336
+ chunks.push(value);
337
+ receivedLength += value.length;
338
+
339
+ if (contentLength) {
340
+ // update progress bar element with the new percentage
341
+ elProgress.innerHTML = Math.round((receivedLength / total) * 100) + '%';
342
+
343
+ var progressCur = Math.round((receivedLength / total) * 10);
344
+ if (progressCur != progressLast) {
345
+ printTextarea('js: fetching ' + 10*progressCur + '% ...');
346
+ progressLast = progressCur;
347
+ }
348
+ }
349
+ }
350
+
351
+ var chunksAll = new Uint8Array(receivedLength);
352
+ var position = 0;
353
+ for (var chunk of chunks) {
354
+ chunksAll.set(chunk, position);
355
+ position += chunk.length;
356
+ }
357
+
358
+ return chunksAll;
359
+ }
360
+
361
+ // load remote data
362
+ // - check if the data is already in the IndexedDB
363
+ // - if not, fetch it from the remote URL and store it in the IndexedDB
364
+ // - store it in WASM memory
365
+ function loadRemote(url, dst, elProgress, size_mb) {
366
+ // query the storage quota and print it
367
+ navigator.storage.estimate().then(function (estimate) {
368
+ printTextarea('js: storage quota: ' + estimate.quota + ' bytes');
369
+ printTextarea('js: storage usage: ' + estimate.usage + ' bytes');
370
+ });
371
+
372
+ // check if the data is already in the IndexedDB
373
+ var request = indexedDB.open(dbName, dbVersion);
374
+
375
+ request.onupgradeneeded = function (event) {
376
+ var db = event.target.result;
377
+ if (db.version == 1) {
378
+ var objectStore = db.createObjectStore('models', { autoIncrement: false });
379
+ printTextarea('js: created IndexedDB ' + db.name + ' version ' + db.version);
380
+ } else {
381
+ // clear the database
382
+ var objectStore = event.currentTarget.transaction.objectStore('models');
383
+ objectStore.clear();
384
+ printTextarea('js: cleared IndexedDB ' + db.name + ' version ' + db.version);
385
+ }
386
+ };
387
+
388
+ request.onsuccess = function (event) {
389
+ var db = event.target.result;
390
+ var transaction = db.transaction(['models'], 'readonly');
391
+ var objectStore = transaction.objectStore('models');
392
+ var request = objectStore.get(url);
393
+
394
+ request.onsuccess = function (event) {
395
+ if (request.result) {
396
+ printTextarea('js: "' + url + '" is already in the IndexedDB');
397
+ storeFS(dst, request.result);
398
+ } else {
399
+ // data is not in the IndexedDB
400
+ printTextarea('js: "' + url + '" is not in the IndexedDB');
401
+
402
+ // alert and ask the user to confirm
403
+ if (!confirm('You are about to download ' + size_mb + ' MB of data.\nThe model data will be cached in the browser for future use.\n\nPress OK to continue.')) {
404
+ var el;
405
+ el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
406
+ el = document.getElementById('fetch-whisper-tiny'); if (el) el.style.display = 'inline-block';
407
+ el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
408
+ el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
409
+ return;
410
+ }
411
+
412
+ fetchRemote(url, elProgress).then(function (data) {
413
+ if (data) {
414
+ // store the data in the IndexedDB
415
+ var request = indexedDB.open(dbName, dbVersion);
416
+ request.onsuccess = function (event) {
417
+ var db = event.target.result;
418
+ var transaction = db.transaction(['models'], 'readwrite');
419
+ var objectStore = transaction.objectStore('models');
420
+ var request = objectStore.put(data, url);
421
+
422
+ request.onsuccess = function (event) {
423
+ printTextarea('js: "' + url + '" stored in the IndexedDB');
424
+ storeFS(dst, data);
425
+ };
426
+
427
+ request.onerror = function (event) {
428
+ printTextarea('js: failed to store "' + url + '" in the IndexedDB');
429
+ };
430
+ };
431
+ }
432
+ });
433
+ }
434
+ };
435
+
436
+ request.onerror = function (event) {
437
+ printTextarea('js: failed to get data from the IndexedDB');
438
+ };
439
+ };
440
+
441
+ request.onerror = function (event) {
442
+ printTextarea('js: failed to open IndexedDB');
443
+ };
444
+
445
+ request.onblocked = function (event) {
446
+ printTextarea('js: failed to open IndexedDB: blocked');
447
+ };
448
+
449
+ request.onabort = function (event) {
450
+ printTextarea('js: failed to open IndexedDB: abort');
451
+ };
452
+ }
453
+
454
+ function loadWhisper(model) {
455
+ let urls = {
456
+ 'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
457
+ 'tiny': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
458
+ 'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
459
+ 'base': 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
460
+ };
461
+
462
+ let sizes = {
463
+ 'tiny.en': 75,
464
+ 'tiny': 75,
465
+ 'base.en': 142,
466
+ 'base': 142,
467
+ };
468
+
469
+ let url = urls[model];
470
+ let dst = 'whisper.bin';
471
+ let el = document.getElementById('fetch-whisper-progress');
472
+ let size_mb = sizes[model];
473
+
474
+ model_whisper = model;
475
+
476
+ document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
477
+ document.getElementById('fetch-whisper-base-en').style.display = 'none';
478
+ document.getElementById('fetch-whisper-tiny').style.display = 'none';
479
+ document.getElementById('fetch-whisper-base').style.display = 'none';
480
+
481
+ loadRemote(url, dst, el, size_mb);
482
  }
483
 
484
  //
 
642
 
643
  function onProcess(translate) {
644
  if (!instance) {
645
+ instance = Module.init('whisper.bin');
646
 
647
  if (instance) {
648
  printTextarea("js: whisper initialized, instance: " + instance);