Spaces:
Running
Running
ruby : follow audio library change (#2851)
Browse files* Enable CPU
* Follow audio lib change
.github/workflows/bindings-ruby.yml
CHANGED
|
@@ -19,7 +19,12 @@ on:
|
|
| 19 |
- ggml/**/*.m
|
| 20 |
- ggml/**/*.metal
|
| 21 |
- scripts/get-flags.mk
|
| 22 |
-
- examples/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
pull_request:
|
| 24 |
paths:
|
| 25 |
- bindings/ruby/**
|
|
@@ -39,7 +44,12 @@ on:
|
|
| 39 |
- ggml/**/*.m
|
| 40 |
- ggml/**/*.metal
|
| 41 |
- scripts/get-flags.mk
|
| 42 |
-
- examples/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
jobs:
|
| 45 |
ubuntu-22:
|
|
|
|
| 19 |
- ggml/**/*.m
|
| 20 |
- ggml/**/*.metal
|
| 21 |
- scripts/get-flags.mk
|
| 22 |
+
- examples/common.h
|
| 23 |
+
- examples/common.cpp
|
| 24 |
+
- examples/common-whisper.h
|
| 25 |
+
- examples/common-whisper.cpp
|
| 26 |
+
- examples/stb_vorbis.c
|
| 27 |
+
- examples/miniaudio.h
|
| 28 |
pull_request:
|
| 29 |
paths:
|
| 30 |
- bindings/ruby/**
|
|
|
|
| 44 |
- ggml/**/*.m
|
| 45 |
- ggml/**/*.metal
|
| 46 |
- scripts/get-flags.mk
|
| 47 |
+
- examples/common.h
|
| 48 |
+
- examples/common.cpp
|
| 49 |
+
- examples/common-whisper.h
|
| 50 |
+
- examples/common-whisper.cpp
|
| 51 |
+
- examples/stb_vorbis.c
|
| 52 |
+
- examples/miniaudio.h
|
| 53 |
|
| 54 |
jobs:
|
| 55 |
ubuntu-22:
|
bindings/ruby/ext/extconf.rb
CHANGED
|
@@ -35,7 +35,7 @@ if $GGML_METAL
|
|
| 35 |
$GGML_METAL_EMBED_LIBRARY = true
|
| 36 |
end
|
| 37 |
|
| 38 |
-
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
|
| 39 |
$MK_CFLAGS = '-std=c11 -fPIC'
|
| 40 |
$MK_CXXFLAGS = '-std=c++17 -fPIC'
|
| 41 |
$MK_NVCCFLAGS = '-std=c++17'
|
|
@@ -171,7 +171,9 @@ $OBJ_GGML <<
|
|
| 171 |
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
|
| 172 |
|
| 173 |
$OBJ_WHISPER <<
|
| 174 |
-
'src/whisper.o'
|
|
|
|
|
|
|
| 175 |
|
| 176 |
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
| 177 |
$objs <<
|
|
|
|
| 35 |
$GGML_METAL_EMBED_LIBRARY = true
|
| 36 |
end
|
| 37 |
|
| 38 |
+
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
|
| 39 |
$MK_CFLAGS = '-std=c11 -fPIC'
|
| 40 |
$MK_CXXFLAGS = '-std=c++17 -fPIC'
|
| 41 |
$MK_NVCCFLAGS = '-std=c++17'
|
|
|
|
| 171 |
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
|
| 172 |
|
| 173 |
$OBJ_WHISPER <<
|
| 174 |
+
'src/whisper.o' <<
|
| 175 |
+
'examples/common.o' <<
|
| 176 |
+
'examples/common-whisper.o'
|
| 177 |
|
| 178 |
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
| 179 |
$objs <<
|
bindings/ruby/ext/ruby_whisper_transcribe.cpp
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
#include <ruby.h>
|
| 2 |
#include "ruby_whisper.h"
|
| 3 |
-
#
|
| 4 |
-
#include "dr_wav.h"
|
| 5 |
#include <string>
|
| 6 |
#include <vector>
|
| 7 |
|
|
@@ -47,84 +46,9 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
|
| 47 |
std::vector<float> pcmf32; // mono-channel F32 PCM
|
| 48 |
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
std::vector<uint8_t> wav_data; // used for pipe input from stdin
|
| 54 |
-
|
| 55 |
-
if (fname_inp == "-") {
|
| 56 |
-
{
|
| 57 |
-
uint8_t buf[1024];
|
| 58 |
-
while (true) {
|
| 59 |
-
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
| 60 |
-
if (n == 0) {
|
| 61 |
-
break;
|
| 62 |
-
}
|
| 63 |
-
wav_data.insert(wav_data.end(), buf, buf + n);
|
| 64 |
-
}
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
| 68 |
-
fprintf(stderr, "error: failed to open WAV file from stdin\n");
|
| 69 |
-
return self;
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
|
| 73 |
-
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
|
| 74 |
-
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
| 75 |
-
return self;
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
if (wav.channels != 1 && wav.channels != 2) {
|
| 79 |
-
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
|
| 80 |
-
return self;
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
-
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
|
| 84 |
-
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
|
| 85 |
-
return self;
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
|
| 89 |
-
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
|
| 90 |
-
return self;
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
if (wav.bitsPerSample != 16) {
|
| 94 |
-
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
|
| 95 |
-
return self;
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
|
| 99 |
-
|
| 100 |
-
std::vector<int16_t> pcm16;
|
| 101 |
-
pcm16.resize(n*wav.channels);
|
| 102 |
-
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
|
| 103 |
-
drwav_uninit(&wav);
|
| 104 |
-
|
| 105 |
-
// convert to mono, float
|
| 106 |
-
pcmf32.resize(n);
|
| 107 |
-
if (wav.channels == 1) {
|
| 108 |
-
for (uint64_t i = 0; i < n; i++) {
|
| 109 |
-
pcmf32[i] = float(pcm16[i])/32768.0f;
|
| 110 |
-
}
|
| 111 |
-
} else {
|
| 112 |
-
for (uint64_t i = 0; i < n; i++) {
|
| 113 |
-
pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
| 114 |
-
}
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
if (rwp->diarize) {
|
| 118 |
-
// convert to stereo, float
|
| 119 |
-
pcmf32s.resize(2);
|
| 120 |
-
|
| 121 |
-
pcmf32s[0].resize(n);
|
| 122 |
-
pcmf32s[1].resize(n);
|
| 123 |
-
for (uint64_t i = 0; i < n; i++) {
|
| 124 |
-
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
| 125 |
-
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
| 126 |
-
}
|
| 127 |
-
}
|
| 128 |
}
|
| 129 |
{
|
| 130 |
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
|
|
|
| 1 |
#include <ruby.h>
|
| 2 |
#include "ruby_whisper.h"
|
| 3 |
+
#include "common-whisper.h"
|
|
|
|
| 4 |
#include <string>
|
| 5 |
#include <vector>
|
| 6 |
|
|
|
|
| 46 |
std::vector<float> pcmf32; // mono-channel F32 PCM
|
| 47 |
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
| 48 |
|
| 49 |
+
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
|
| 50 |
+
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
| 51 |
+
return self;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
}
|
| 53 |
{
|
| 54 |
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|