ggerganov commited on
Commit
936213e
·
unverified ·
1 Parent(s): d0b1d9e

stream.wasm : add web-based real-time transcription (#112)

Browse files
examples/CMakeLists.txt CHANGED
@@ -20,6 +20,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
20
 
21
  if (EMSCRIPTEN)
22
  add_subdirectory(whisper.wasm)
 
23
  add_subdirectory(talk.wasm)
24
  else()
25
  add_subdirectory(main)
 
20
 
21
  if (EMSCRIPTEN)
22
  add_subdirectory(whisper.wasm)
23
+ add_subdirectory(stream.wasm)
24
  add_subdirectory(talk.wasm)
25
  else()
26
  add_subdirectory(main)
examples/helpers.js CHANGED
@@ -19,6 +19,12 @@ var printTextarea = (function() {
19
  };
20
  })();
21
 
 
 
 
 
 
 
22
  // fetch a remote file from remote URL using the Fetch API
23
  async function fetchRemote(url, cbProgress, cbPrint) {
24
  cbPrint('fetchRemote: downloading with fetch()...');
 
19
  };
20
  })();
21
 
22
+ async function clearCache() {
23
+ if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
24
+ indexedDB.deleteDatabase(dbName);
25
+ }
26
+ }
27
+
28
  // fetch a remote file from remote URL using the Fetch API
29
  async function fetchRemote(url, cbProgress, cbPrint) {
30
  cbPrint('fetchRemote: downloading with fetch()...');
examples/stream.wasm/CMakeLists.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # libstream
3
+ #
4
+
5
+ set(TARGET libstream)
6
+
7
+ add_executable(${TARGET}
8
+ emscripten.cpp
9
+ )
10
+
11
+ target_link_libraries(${TARGET} PRIVATE
12
+ whisper
13
+ )
14
+
15
+ unset(EXTRA_FLAGS)
16
+
17
+ if (WHISPER_WASM_SINGLE_FILE)
18
+ set(EXTRA_FLAGS "-s SINGLE_FILE=1")
19
+ message(STATUS "Embedding WASM inside stream.js")
20
+
21
+ add_custom_command(
22
+ TARGET ${TARGET} POST_BUILD
23
+ COMMAND ${CMAKE_COMMAND} -E copy
24
+ ${CMAKE_BINARY_DIR}/bin/libstream.js
25
+ ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/stream.wasm/stream.js
26
+ )
27
+ endif()
28
+
29
+ set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
30
+ --bind \
31
+ -s USE_PTHREADS=1 \
32
+ -s PTHREAD_POOL_SIZE=8 \
33
+ -s INITIAL_MEMORY=1024MB \
34
+ -s TOTAL_MEMORY=1024MB \
35
+ -s FORCE_FILESYSTEM=1 \
36
+ -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
37
+ ${EXTRA_FLAGS} \
38
+ ")
39
+
40
+ #
41
+ # stream.wasm
42
+ #
43
+
44
+ set(TARGET stream.wasm)
45
+
46
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
47
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
examples/stream.wasm/README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # stream.wasm
2
+
3
+ Real-time transcription in the browser using WebAssembly
4
+
5
+ Online demo: https://whisper.ggerganov.com/stream/
6
+
7
+ ## Build instructions
8
+
9
+ ```bash
10
+ # build using Emscripten (v3.1.2)
11
+ git clone https://github.com/ggerganov/whisper.cpp
12
+ cd whisper.cpp
13
+ mkdir build-em && cd build-em
14
+ emcmake cmake ..
15
+ make -j
16
+
17
+ # copy the produced page to your HTTP path
18
+ cp bin/stream.wasm/* /path/to/html/
19
+ cp bin/libstream.worker.js /path/to/html/
20
+ ```
examples/stream.wasm/emscripten.cpp ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "ggml.h"
2
+ #include "whisper.h"
3
+
4
+ #include <emscripten.h>
5
+ #include <emscripten/bind.h>
6
+
7
+ #include <atomic>
8
+ #include <cmath>
9
+ #include <mutex>
10
+ #include <string>
11
+ #include <thread>
12
+ #include <vector>
13
+
14
+ constexpr int N_THREAD = 8;
15
+
16
+ std::vector<struct whisper_context *> g_contexts(4, nullptr);
17
+
18
+ std::mutex g_mutex;
19
+ std::thread g_worker;
20
+
21
+ std::atomic<bool> g_running(false);
22
+
23
+ std::string g_status = "";
24
+ std::string g_status_forced = "";
25
+ std::string g_transcribed = "";
26
+
27
+ std::vector<float> g_pcmf32;
28
+
29
+ void stream_set_status(const std::string & status) {
30
+ std::lock_guard<std::mutex> lock(g_mutex);
31
+ g_status = status;
32
+ }
33
+
34
+ void stream_main(size_t index) {
35
+ stream_set_status("loading data ...");
36
+
37
+ struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
38
+
39
+ wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
40
+ wparams.offset_ms = 0;
41
+ wparams.translate = false;
42
+ wparams.no_context = true;
43
+ wparams.single_segment = true;
44
+ wparams.print_realtime = false;
45
+ wparams.print_progress = false;
46
+ wparams.print_timestamps = true;
47
+ wparams.print_special = false;
48
+
49
+ wparams.max_tokens = 32;
50
+ wparams.audio_ctx = 768; // partial encoder context for better performance
51
+
52
+ wparams.language = "en";
53
+
54
+ printf("stream: using %d threads\n", N_THREAD);
55
+
56
+ std::vector<float> pcmf32;
57
+
58
+ // whisper context
59
+ auto & ctx = g_contexts[index];
60
+
61
+ // 5 seconds interval
62
+ const int64_t window_samples = 5*WHISPER_SAMPLE_RATE;
63
+
64
+ while (g_running) {
65
+ stream_set_status("waiting for audio ...");
66
+
67
+ {
68
+ std::unique_lock<std::mutex> lock(g_mutex);
69
+
70
+ if (g_pcmf32.size() < 1024) {
71
+ lock.unlock();
72
+
73
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
74
+
75
+ continue;
76
+ }
77
+
78
+ pcmf32 = std::vector<float>(g_pcmf32.end() - std::min((int64_t) g_pcmf32.size(), window_samples), g_pcmf32.end());
79
+ g_pcmf32.clear();
80
+ }
81
+
82
+ {
83
+ const auto t_start = std::chrono::high_resolution_clock::now();
84
+
85
+ stream_set_status("running whisper ...");
86
+
87
+ int ret = whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size());
88
+ if (ret != 0) {
89
+ printf("whisper_full() failed: %d\n", ret);
90
+ break;
91
+ }
92
+
93
+ const auto t_end = std::chrono::high_resolution_clock::now();
94
+
95
+ printf("stream: whisper_full() returned %d in %f seconds\n", ret, std::chrono::duration<double>(t_end - t_start).count());
96
+ }
97
+
98
+ {
99
+ std::string text_heard;
100
+
101
+ {
102
+ const int n_segments = whisper_full_n_segments(ctx);
103
+ for (int i = n_segments - 1; i < n_segments; ++i) {
104
+ const char * text = whisper_full_get_segment_text(ctx, i);
105
+
106
+ const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
107
+ const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
108
+
109
+ printf("transcribed: %s\n", text);
110
+
111
+ text_heard += text;
112
+ }
113
+ }
114
+
115
+ {
116
+ std::lock_guard<std::mutex> lock(g_mutex);
117
+ g_transcribed = text_heard;
118
+ }
119
+ }
120
+ }
121
+
122
+ if (index < g_contexts.size()) {
123
+ whisper_free(g_contexts[index]);
124
+ g_contexts[index] = nullptr;
125
+ }
126
+ }
127
+
128
+ EMSCRIPTEN_BINDINGS(stream) {
129
+ emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
130
+ for (size_t i = 0; i < g_contexts.size(); ++i) {
131
+ if (g_contexts[i] == nullptr) {
132
+ g_contexts[i] = whisper_init(path_model.c_str());
133
+ if (g_contexts[i] != nullptr) {
134
+ g_running = true;
135
+ if (g_worker.joinable()) {
136
+ g_worker.join();
137
+ }
138
+ g_worker = std::thread([i]() {
139
+ stream_main(i);
140
+ });
141
+
142
+ return i + 1;
143
+ } else {
144
+ return (size_t) 0;
145
+ }
146
+ }
147
+ }
148
+
149
+ return (size_t) 0;
150
+ }));
151
+
152
+ emscripten::function("free", emscripten::optional_override([](size_t index) {
153
+ if (g_running) {
154
+ g_running = false;
155
+ }
156
+ }));
157
+
158
+ emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
159
+ --index;
160
+
161
+ if (index >= g_contexts.size()) {
162
+ return -1;
163
+ }
164
+
165
+ if (g_contexts[index] == nullptr) {
166
+ return -2;
167
+ }
168
+
169
+ {
170
+ std::lock_guard<std::mutex> lock(g_mutex);
171
+ const int n = audio["length"].as<int>();
172
+
173
+ emscripten::val heap = emscripten::val::module_property("HEAPU8");
174
+ emscripten::val memory = heap["buffer"];
175
+
176
+ g_pcmf32.resize(n);
177
+
178
+ emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
179
+ memoryView.call<void>("set", audio);
180
+ }
181
+
182
+ return 0;
183
+ }));
184
+
185
+ emscripten::function("get_transcribed", emscripten::optional_override([]() {
186
+ std::string transcribed;
187
+
188
+ {
189
+ std::lock_guard<std::mutex> lock(g_mutex);
190
+ transcribed = std::move(g_transcribed);
191
+ }
192
+
193
+ return transcribed;
194
+ }));
195
+
196
+ emscripten::function("get_status", emscripten::optional_override([]() {
197
+ std::string status;
198
+
199
+ {
200
+ std::lock_guard<std::mutex> lock(g_mutex);
201
+ status = g_status_forced.empty() ? g_status : g_status_forced;
202
+ }
203
+
204
+ return status;
205
+ }));
206
+
207
+ emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
208
+ {
209
+ std::lock_guard<std::mutex> lock(g_mutex);
210
+ g_status_forced = status;
211
+ }
212
+ }));
213
+ }
examples/stream.wasm/index-tmpl.html ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en-us">
3
+ <head>
4
+ <title>stream : Real-time Whisper transcription in WebAssembly</title>
5
+
6
+ <style>
7
+ #output {
8
+ width: 100%;
9
+ height: 100%;
10
+ margin: 0 auto;
11
+ margin-top: 10px;
12
+ border-left: 0px;
13
+ border-right: 0px;
14
+ padding-left: 0px;
15
+ padding-right: 0px;
16
+ display: block;
17
+ background-color: black;
18
+ color: white;
19
+ font-size: 10px;
20
+ font-family: 'Lucida Console', Monaco, monospace;
21
+ outline: none;
22
+ white-space: pre;
23
+ overflow-wrap: normal;
24
+ overflow-x: scroll;
25
+ }
26
+ </style>
27
+ </head>
28
+ <body>
29
+ <div id="main-container">
30
+ <b>stream : Real-time Whisper transcription in WebAssembly</b>
31
+
32
+ <br><br>
33
+
34
+ You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/stream.wasm">GitHub</a>.
35
+
36
+ <br><br>
37
+
38
+ <hr>
39
+
40
+ Select the model you would like to use, click the "Start" button and start speaking
41
+
42
+ <br><br>
43
+
44
+ <div id="model-whisper">
45
+ Whisper model: <span id="model-whisper-status"></span>
46
+ <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
47
+ <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
48
+ <span id="fetch-whisper-progress"></span>
49
+
50
+ <!--
51
+ <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
52
+ -->
53
+ </div>
54
+
55
+ <br>
56
+
57
+ <div id="input">
58
+ <button id="start" onclick="onStart()" disabled>Start</button>
59
+ <button id="stop" onclick="onStop()" disabled>Stop</button>
60
+ <button id="clear" onclick="clearCache()">Clear Cache</button>
61
+ </div>
62
+
63
+ <br>
64
+
65
+ <div id="state">
66
+ Status: <b><span id="state-status">not started</span></b>
67
+
68
+ <pre id="state-transcribed">[The transcribed text will be displayed here]</pre>
69
+ </div>
70
+
71
+ <hr>
72
+
73
+ Debug output:
74
+ <textarea id="output" rows="20"></textarea>
75
+
76
+ <br>
77
+
78
+ <b>Troubleshooting</b>
79
+
80
+ <br><br>
81
+
82
+ The page does some heavy computations, so make sure:
83
+
84
+ <ul>
85
+ <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
86
+ <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
87
+ <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
88
+ </ul>
89
+
90
+ <div class="cell-version">
91
+ <span>
92
+ |
93
+ Build time: <span class="nav-link">@GIT_DATE@</span> |
94
+ Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
95
+ Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
96
+ <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/stream.wasm">Source Code</a> |
97
+ </span>
98
+ </div>
99
+ </div>
100
+
101
+ <script type="text/javascript" src="helpers.js"></script>
102
+ <script type='text/javascript'>
103
+ const kRestartRecording_s = 15;
104
+ const kSampleRate = 16000;
105
+
106
+ window.AudioContext = window.AudioContext || window.webkitAudioContext;
107
+ window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
108
+
109
+ // web audio context
110
+ var context = null;
111
+
112
+ // audio data
113
+ var audio = null;
114
+ var audio0 = null;
115
+
116
+ // the stream instance
117
+ var instance = null;
118
+
119
+ // model name
120
+ var model_whisper = null;
121
+
122
+ var Module = {
123
+ print: printTextarea,
124
+ printErr: printTextarea,
125
+ setStatus: function(text) {
126
+ printTextarea('js: ' + text);
127
+ },
128
+ monitorRunDependencies: function(left) {
129
+ },
130
+ preRun: function() {
131
+ printTextarea('js: Preparing ...');
132
+ },
133
+ postRun: function() {
134
+ printTextarea('js: Initialized successfully!');
135
+ }
136
+ };
137
+
138
+ //
139
+ // fetch models
140
+ //
141
+
142
+ let dbVersion = 1
143
+ let dbName = 'whisper.ggerganov.com';
144
+ let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
145
+
146
+ function storeFS(fname, buf) {
147
+ // write to WASM file using FS_createDataFile
148
+ // if the file exists, delete it
149
+ try {
150
+ Module.FS_unlink(fname);
151
+ } catch (e) {
152
+ // ignore
153
+ }
154
+
155
+ Module.FS_createDataFile("/", fname, buf, true, true);
156
+
157
+ printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
158
+
159
+ document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
160
+
161
+ if (model_whisper != null) {
162
+ document.getElementById('start').disabled = false;
163
+ document.getElementById('stop' ).disabled = true;
164
+ }
165
+ }
166
+
167
+ function loadWhisper(model) {
168
+ let urls = {
169
+ 'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
170
+ 'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
171
+ };
172
+
173
+ let sizes = {
174
+ 'tiny.en': 75,
175
+ 'base.en': 142,
176
+ };
177
+
178
+ let url = urls[model];
179
+ let dst = 'whisper.bin';
180
+ let size_mb = sizes[model];
181
+
182
+ model_whisper = model;
183
+
184
+ document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
185
+ document.getElementById('fetch-whisper-base-en').style.display = 'none';
186
+ document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
187
+
188
+ cbProgress = function(p) {
189
+ let el = document.getElementById('fetch-whisper-progress');
190
+ el.innerHTML = Math.round(100*p) + '%';
191
+ };
192
+
193
+ cbCancel = function() {
194
+ var el;
195
+ el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
196
+ el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
197
+ el = document.getElementById('model-whisper-status'); if (el) el.innerHTML = '';
198
+ };
199
+
200
+ loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
201
+ }
202
+
203
+ //
204
+ // microphone
205
+ //
206
+
207
+ var mediaRecorder = null;
208
+ var doRecording = false;
209
+ var startTime = 0;
210
+
211
+ function stopRecording() {
212
+ Module.set_status("paused");
213
+ doRecording = false;
214
+ audio0 = null;
215
+ audio = null;
216
+ context = null;
217
+ }
218
+
219
+ function startRecording() {
220
+ if (!context) {
221
+ context = new AudioContext({
222
+ sampleRate: 16000,
223
+ channelCount: 1,
224
+ echoCancellation: false,
225
+ autoGainControl: true,
226
+ noiseSuppression: true,
227
+ });
228
+ }
229
+
230
+ Module.set_status("");
231
+
232
+ document.getElementById('start').disabled = true;
233
+ document.getElementById('stop').disabled = false;
234
+
235
+ doRecording = true;
236
+ startTime = Date.now();
237
+
238
+ var chunks = [];
239
+ var stream = null;
240
+
241
+ navigator.mediaDevices.getUserMedia({audio: true, video: false})
242
+ .then(function(s) {
243
+ stream = s;
244
+ mediaRecorder = new MediaRecorder(stream);
245
+ mediaRecorder.ondataavailable = function(e) {
246
+ chunks.push(e.data);
247
+
248
+ var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
249
+ var reader = new FileReader();
250
+
251
+ reader.onload = function(event) {
252
+ var buf = new Uint8Array(reader.result);
253
+
254
+ if (!context) {
255
+ return;
256
+ }
257
+ context.decodeAudioData(buf.buffer, function(audioBuffer) {
258
+ var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
259
+ var source = offlineContext.createBufferSource();
260
+ source.buffer = audioBuffer;
261
+ source.connect(offlineContext.destination);
262
+ source.start(0);
263
+
264
+ offlineContext.startRendering().then(function(renderedBuffer) {
265
+ audio = renderedBuffer.getChannelData(0);
266
+
267
+ //printTextarea('js: audio recorded, size: ' + audio.length + ', old size: ' + (audio0 == null ? 0 : audio0.length));
268
+
269
+ var audioAll = new Float32Array(audio0 == null ? audio.length : audio0.length + audio.length);
270
+ if (audio0 != null) {
271
+ audioAll.set(audio0, 0);
272
+ }
273
+ audioAll.set(audio, audio0 == null ? 0 : audio0.length);
274
+
275
+ if (instance) {
276
+ Module.set_audio(instance, audioAll);
277
+ }
278
+ });
279
+ }, function(e) {
280
+ audio = null;
281
+ });
282
+ }
283
+
284
+ reader.readAsArrayBuffer(blob);
285
+ };
286
+
287
+ mediaRecorder.onstop = function(e) {
288
+ if (doRecording) {
289
+ setTimeout(function() {
290
+ startRecording();
291
+ });
292
+ }
293
+ };
294
+
295
+ mediaRecorder.start(5000);
296
+ })
297
+ .catch(function(err) {
298
+ printTextarea('js: error getting audio stream: ' + err);
299
+ });
300
+
301
+ var interval = setInterval(function() {
302
+ if (!doRecording) {
303
+ clearInterval(interval);
304
+ mediaRecorder.stop();
305
+ stream.getTracks().forEach(function(track) {
306
+ track.stop();
307
+ });
308
+
309
+ document.getElementById('start').disabled = false;
310
+ document.getElementById('stop').disabled = true;
311
+
312
+ mediaRecorder = null;
313
+ }
314
+
315
+ // if audio length is more than kRestartRecording_s seconds, restart recording
316
+ if (audio != null && audio.length > kSampleRate*kRestartRecording_s) {
317
+ if (doRecording) {
318
+ //printTextarea('js: restarting recording');
319
+
320
+ clearInterval(interval);
321
+ audio0 = audio;
322
+ audio = null;
323
+ mediaRecorder.stop();
324
+ stream.getTracks().forEach(function(track) {
325
+ track.stop();
326
+ });
327
+ }
328
+ }
329
+ }, 250);
330
+ }
331
+
332
+ //
333
+ // main
334
+ //
335
+
336
+ var nLines = 0;
337
+ var intervalUpdate = null;
338
+ var transcribedAll = '';
339
+
340
+ function onStart() {
341
+ if (!instance) {
342
+ instance = Module.init('whisper.bin');
343
+
344
+ if (instance) {
345
+ printTextarea("js: whisper initialized, instance: " + instance);
346
+ }
347
+ }
348
+
349
+ if (!instance) {
350
+ printTextarea("js: failed to initialize whisper");
351
+ return;
352
+ }
353
+
354
+ startRecording();
355
+
356
+ intervalUpdate = setInterval(function() {
357
+ var transcribed = Module.get_transcribed();
358
+
359
+ if (transcribed != null && transcribed.length > 1) {
360
+ transcribedAll += transcribed + '<br>';
361
+ nLines++;
362
+
363
+ // if more than 10 lines, remove the first line
364
+ if (nLines > 10) {
365
+ var i = transcribedAll.indexOf('<br>');
366
+ if (i > 0) {
367
+ transcribedAll = transcribedAll.substring(i + 4);
368
+ nLines--;
369
+ }
370
+ }
371
+ }
372
+
373
+ document.getElementById('state-status').innerHTML = Module.get_status();
374
+ document.getElementById('state-transcribed').innerHTML = transcribedAll;
375
+ }, 100);
376
+ }
377
+
378
+ function onStop() {
379
+ stopRecording();
380
+ }
381
+
382
+ </script>
383
+ <script type="text/javascript" src="stream.js"></script>
384
+ </body>
385
+ </html>
examples/stream/README.md CHANGED
@@ -21,3 +21,7 @@ brew install sdl2
21
 
22
  make stream
23
  ```
 
 
 
 
 
21
 
22
  make stream
23
  ```
24
+
25
+ ## Web version
26
+
27
+ This tool can also run in the browser: [examples/stream.wasm](/examples/stream.wasm)
examples/talk.wasm/emscripten.cpp CHANGED
@@ -61,10 +61,10 @@ void talk_main(size_t index) {
61
  wparams.print_timestamps = true;
62
  wparams.print_special = false;
63
 
64
- wparams.max_tokens = 32;
65
- wparams.audio_ctx = 768; // partial encoder context for better performance
66
 
67
- wparams.language = "en";
68
 
69
  g_gpt2 = gpt2_init("gpt-2.bin");
70
 
 
61
  wparams.print_timestamps = true;
62
  wparams.print_special = false;
63
 
64
+ wparams.max_tokens = 32;
65
+ wparams.audio_ctx = 768; // partial encoder context for better performance
66
 
67
+ wparams.language = "en";
68
 
69
  g_gpt2 = gpt2_init("gpt-2.bin");
70
 
examples/talk.wasm/index-tmpl.html CHANGED
@@ -504,12 +504,6 @@
504
  Module.force_speak(instance);
505
  }
506
 
507
- async function clearCache() {
508
- if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
509
- indexedDB.deleteDatabase(dbName);
510
- }
511
- }
512
-
513
  //
514
  // main
515
  //
 
504
  Module.force_speak(instance);
505
  }
506
 
 
 
 
 
 
 
507
  //
508
  // main
509
  //