KitaitiMakoto commited on
Commit
b94e7d3
·
unverified ·
1 Parent(s): 2e6437e

ruby : follow audio library change (#2851)

Browse files

* Enable CPU

* Follow audio lib change

.github/workflows/bindings-ruby.yml CHANGED
@@ -19,7 +19,12 @@ on:
19
  - ggml/**/*.m
20
  - ggml/**/*.metal
21
  - scripts/get-flags.mk
22
- - examples/dr_wav.h
 
 
 
 
 
23
  pull_request:
24
  paths:
25
  - bindings/ruby/**
@@ -39,7 +44,12 @@ on:
39
  - ggml/**/*.m
40
  - ggml/**/*.metal
41
  - scripts/get-flags.mk
42
- - examples/dr_wav.h
 
 
 
 
 
43
 
44
  jobs:
45
  ubuntu-22:
 
19
  - ggml/**/*.m
20
  - ggml/**/*.metal
21
  - scripts/get-flags.mk
22
+ - examples/common.h
23
+ - examples/common.cpp
24
+ - examples/common-whisper.h
25
+ - examples/common-whisper.cpp
26
+ - examples/stb_vorbis.c
27
+ - examples/miniaudio.h
28
  pull_request:
29
  paths:
30
  - bindings/ruby/**
 
44
  - ggml/**/*.m
45
  - ggml/**/*.metal
46
  - scripts/get-flags.mk
47
+ - examples/common.h
48
+ - examples/common.cpp
49
+ - examples/common-whisper.h
50
+ - examples/common-whisper.cpp
51
+ - examples/stb_vorbis.c
52
+ - examples/miniaudio.h
53
 
54
  jobs:
55
  ubuntu-22:
bindings/ruby/ext/extconf.rb CHANGED
@@ -35,7 +35,7 @@ if $GGML_METAL
35
  $GGML_METAL_EMBED_LIBRARY = true
36
  end
37
 
38
- $MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
39
  $MK_CFLAGS = '-std=c11 -fPIC'
40
  $MK_CXXFLAGS = '-std=c++17 -fPIC'
41
  $MK_NVCCFLAGS = '-std=c++17'
@@ -171,7 +171,9 @@ $OBJ_GGML <<
171
  'ggml/src/ggml-cpu/ggml-cpu-traits.o'
172
 
173
  $OBJ_WHISPER <<
174
- 'src/whisper.o'
 
 
175
 
176
  $objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
177
  $objs <<
 
35
  $GGML_METAL_EMBED_LIBRARY = true
36
  end
37
 
38
+ $MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
39
  $MK_CFLAGS = '-std=c11 -fPIC'
40
  $MK_CXXFLAGS = '-std=c++17 -fPIC'
41
  $MK_NVCCFLAGS = '-std=c++17'
 
171
  'ggml/src/ggml-cpu/ggml-cpu-traits.o'
172
 
173
  $OBJ_WHISPER <<
174
+ 'src/whisper.o' <<
175
+ 'examples/common.o' <<
176
+ 'examples/common-whisper.o'
177
 
178
  $objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
179
  $objs <<
bindings/ruby/ext/ruby_whisper_transcribe.cpp CHANGED
@@ -1,7 +1,6 @@
1
  #include <ruby.h>
2
  #include "ruby_whisper.h"
3
- #define DR_WAV_IMPLEMENTATION
4
- #include "dr_wav.h"
5
  #include <string>
6
  #include <vector>
7
 
@@ -47,84 +46,9 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
47
  std::vector<float> pcmf32; // mono-channel F32 PCM
48
  std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
49
 
50
- // WAV input - this is directly from main.cpp example
51
- {
52
- drwav wav;
53
- std::vector<uint8_t> wav_data; // used for pipe input from stdin
54
-
55
- if (fname_inp == "-") {
56
- {
57
- uint8_t buf[1024];
58
- while (true) {
59
- const size_t n = fread(buf, 1, sizeof(buf), stdin);
60
- if (n == 0) {
61
- break;
62
- }
63
- wav_data.insert(wav_data.end(), buf, buf + n);
64
- }
65
- }
66
-
67
- if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
68
- fprintf(stderr, "error: failed to open WAV file from stdin\n");
69
- return self;
70
- }
71
-
72
- fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
73
- } else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
74
- fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
75
- return self;
76
- }
77
-
78
- if (wav.channels != 1 && wav.channels != 2) {
79
- fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
80
- return self;
81
- }
82
-
83
- if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
84
- fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
85
- return self;
86
- }
87
-
88
- if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
89
- fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
90
- return self;
91
- }
92
-
93
- if (wav.bitsPerSample != 16) {
94
- fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
95
- return self;
96
- }
97
-
98
- const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
99
-
100
- std::vector<int16_t> pcm16;
101
- pcm16.resize(n*wav.channels);
102
- drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
103
- drwav_uninit(&wav);
104
-
105
- // convert to mono, float
106
- pcmf32.resize(n);
107
- if (wav.channels == 1) {
108
- for (uint64_t i = 0; i < n; i++) {
109
- pcmf32[i] = float(pcm16[i])/32768.0f;
110
- }
111
- } else {
112
- for (uint64_t i = 0; i < n; i++) {
113
- pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
114
- }
115
- }
116
-
117
- if (rwp->diarize) {
118
- // convert to stereo, float
119
- pcmf32s.resize(2);
120
-
121
- pcmf32s[0].resize(n);
122
- pcmf32s[1].resize(n);
123
- for (uint64_t i = 0; i < n; i++) {
124
- pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
125
- pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
126
- }
127
- }
128
  }
129
  {
130
  static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
 
1
  #include <ruby.h>
2
  #include "ruby_whisper.h"
3
+ #include "common-whisper.h"
 
4
  #include <string>
5
  #include <vector>
6
 
 
46
  std::vector<float> pcmf32; // mono-channel F32 PCM
47
  std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
48
 
49
+ if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
50
+ fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
51
+ return self;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
  {
54
  static bool is_aborted = false; // NOTE: this should be atomic to avoid data race