jhenhong ggerganov commited on
Commit
290abed
·
unverified ·
1 Parent(s): a570c92

whisper : add context param to disable gpu (#1293)

Browse files

* whisper : check state->ctx_metal not null

* whisper : add whisper_context_params { use_gpu }

* whisper : new API with params & deprecate old API

* examples : use no-gpu param && whisper_init_from_file_with_params

* whisper.objc : enable metal & disable on simulator

* whisper.swiftui, metal : enable metal & support load default.metallib

* whisper.android : use new API

* bindings : use new API

* addon.node : fix build & test

* bindings : updata java binding

* bindings : add missing whisper_context_default_params_by_ref WHISPER_API for java

* metal : use SWIFTPM_MODULE_BUNDLE for GGML_SWIFT and reuse library load

* metal : move bundle var into block

* metal : use SWIFT_PACKAGE instead of GGML_SWIFT

* style : minor updates

---------

Co-authored-by: Georgi Gerganov <[email protected]>

bindings/go/whisper.go CHANGED
@@ -103,7 +103,7 @@ var (
103
  func Whisper_init(path string) *Context {
104
  cPath := C.CString(path)
105
  defer C.free(unsafe.Pointer(cPath))
106
- if ctx := C.whisper_init_from_file(cPath); ctx != nil {
107
  return (*Context)(ctx)
108
  } else {
109
  return nil
 
103
  func Whisper_init(path string) *Context {
104
  cPath := C.CString(path)
105
  defer C.free(unsafe.Pointer(cPath))
106
+ if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
107
  return (*Context)(ctx)
108
  } else {
109
  return nil
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java CHANGED
@@ -4,6 +4,7 @@ import com.sun.jna.Structure;
4
  import com.sun.jna.ptr.PointerByReference;
5
  import io.github.ggerganov.whispercpp.ggml.GgmlType;
6
  import io.github.ggerganov.whispercpp.WhisperModel;
 
7
 
8
  import java.util.List;
9
 
@@ -23,8 +24,9 @@ public class WhisperContext extends Structure {
23
  public PointerByReference vocab;
24
  public PointerByReference state;
25
 
26
- /** populated by whisper_init_from_file() */
27
  String path_model;
 
28
 
29
  // public static class ByReference extends WhisperContext implements Structure.ByReference {
30
  // }
 
4
  import com.sun.jna.ptr.PointerByReference;
5
  import io.github.ggerganov.whispercpp.ggml.GgmlType;
6
  import io.github.ggerganov.whispercpp.WhisperModel;
7
+ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
8
 
9
  import java.util.List;
10
 
 
24
  public PointerByReference vocab;
25
  public PointerByReference state;
26
 
27
+ /** populated by whisper_init_from_file_with_params() */
28
  String path_model;
29
+ WhisperContextParams params;
30
 
31
  // public static class ByReference extends WhisperContext implements Structure.ByReference {
32
  // }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java CHANGED
@@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp;
2
 
3
  import com.sun.jna.Native;
4
  import com.sun.jna.Pointer;
 
5
  import io.github.ggerganov.whispercpp.params.WhisperFullParams;
6
  import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
7
 
@@ -15,8 +16,9 @@ import java.io.IOException;
15
  public class WhisperCpp implements AutoCloseable {
16
  private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
17
  private Pointer ctx = null;
18
- private Pointer greedyPointer = null;
19
- private Pointer beamPointer = null;
 
20
 
21
  public File modelDir() {
22
  String modelDirPath = System.getenv("XDG_CACHE_HOME");
@@ -31,6 +33,18 @@ public class WhisperCpp implements AutoCloseable {
31
  * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
32
  */
33
  public void initContext(String modelPath) throws FileNotFoundException {
 
 
 
 
 
 
 
 
 
 
 
 
34
  if (ctx != null) {
35
  lib.whisper_free(ctx);
36
  }
@@ -43,13 +57,26 @@ public class WhisperCpp implements AutoCloseable {
43
  modelPath = new File(modelDir(), modelPath).getAbsolutePath();
44
  }
45
 
46
- ctx = lib.whisper_init_from_file(modelPath);
47
 
48
  if (ctx == null) {
49
  throw new FileNotFoundException(modelPath);
50
  }
51
  }
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  /**
54
  * Provides default params which can be used with `whisper_full()` etc.
55
  * Because this function allocates memory for the params, the caller must call either:
@@ -63,15 +90,15 @@ public class WhisperCpp implements AutoCloseable {
63
 
64
  // whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
65
  if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
66
- if (greedyPointer == null) {
67
- greedyPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
68
  }
69
- pointer = greedyPointer;
70
  } else {
71
- if (beamPointer == null) {
72
- beamPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
73
  }
74
- pointer = beamPointer;
75
  }
76
 
77
  WhisperFullParams params = new WhisperFullParams(pointer);
@@ -93,13 +120,17 @@ public class WhisperCpp implements AutoCloseable {
93
  }
94
 
95
  private void freeParams() {
96
- if (greedyPointer != null) {
97
- Native.free(Pointer.nativeValue(greedyPointer));
98
- greedyPointer = null;
 
 
 
 
99
  }
100
- if (beamPointer != null) {
101
- Native.free(Pointer.nativeValue(beamPointer));
102
- beamPointer = null;
103
  }
104
  }
105
 
 
2
 
3
  import com.sun.jna.Native;
4
  import com.sun.jna.Pointer;
5
+ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
6
  import io.github.ggerganov.whispercpp.params.WhisperFullParams;
7
  import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
8
 
 
16
  public class WhisperCpp implements AutoCloseable {
17
  private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
18
  private Pointer ctx = null;
19
+ private Pointer paramsPointer = null;
20
+ private Pointer greedyParamsPointer = null;
21
+ private Pointer beamParamsPointer = null;
22
 
23
  public File modelDir() {
24
  String modelDirPath = System.getenv("XDG_CACHE_HOME");
 
33
  * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
34
  */
35
  public void initContext(String modelPath) throws FileNotFoundException {
36
+ initContextImpl(modelPath, getContextDefaultParams());
37
+ }
38
+
39
+ /**
40
+ * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
41
+ * @param params - params to use when initialising the context
42
+ */
43
+ public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
44
+ initContextImpl(modelPath, params);
45
+ }
46
+
47
+ private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
48
  if (ctx != null) {
49
  lib.whisper_free(ctx);
50
  }
 
57
  modelPath = new File(modelDir(), modelPath).getAbsolutePath();
58
  }
59
 
60
+ ctx = lib.whisper_init_from_file_with_params(modelPath, params);
61
 
62
  if (ctx == null) {
63
  throw new FileNotFoundException(modelPath);
64
  }
65
  }
66
 
67
+ /**
68
+ * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
69
+ * Because this function allocates memory for the params, the caller must call either:
70
+ * - call `whisper_free_context_params()`
71
+ * - `Native.free(Pointer.nativeValue(pointer));`
72
+ */
73
+ public WhisperContextParams getContextDefaultParams() {
74
+ paramsPointer = lib.whisper_context_default_params_by_ref();
75
+ WhisperContextParams params = new WhisperContextParams(paramsPointer);
76
+ params.read();
77
+ return params;
78
+ }
79
+
80
  /**
81
  * Provides default params which can be used with `whisper_full()` etc.
82
  * Because this function allocates memory for the params, the caller must call either:
 
90
 
91
  // whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
92
  if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
93
+ if (greedyParamsPointer == null) {
94
+ greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
95
  }
96
+ pointer = greedyParamsPointer;
97
  } else {
98
+ if (beamParamsPointer == null) {
99
+ beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
100
  }
101
+ pointer = beamParamsPointer;
102
  }
103
 
104
  WhisperFullParams params = new WhisperFullParams(pointer);
 
120
  }
121
 
122
  private void freeParams() {
123
+ if (paramsPointer != null) {
124
+ Native.free(Pointer.nativeValue(paramsPointer));
125
+ paramsPointer = null;
126
+ }
127
+ if (greedyParamsPointer != null) {
128
+ Native.free(Pointer.nativeValue(greedyParamsPointer));
129
+ greedyParamsPointer = null;
130
  }
131
+ if (beamParamsPointer != null) {
132
+ Native.free(Pointer.nativeValue(beamParamsPointer));
133
+ beamParamsPointer = null;
134
  }
135
  }
136
 
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java CHANGED
@@ -5,6 +5,7 @@ import com.sun.jna.Native;
5
  import com.sun.jna.Pointer;
6
  import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
7
  import io.github.ggerganov.whispercpp.model.WhisperTokenData;
 
8
  import io.github.ggerganov.whispercpp.params.WhisperFullParams;
9
 
10
  public interface WhisperCppJnaLibrary extends Library {
@@ -13,12 +14,31 @@ public interface WhisperCppJnaLibrary extends Library {
13
  String whisper_print_system_info();
14
 
15
  /**
16
- * Allocate (almost) all memory needed for the model by loading from a file.
17
  *
18
  * @param path_model Path to the model file
19
  * @return Whisper context on success, null on failure
20
  */
21
  Pointer whisper_init_from_file(String path_model);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  /**
24
  * Allocate (almost) all memory needed for the model by loading from a buffer.
 
5
  import com.sun.jna.Pointer;
6
  import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
7
  import io.github.ggerganov.whispercpp.model.WhisperTokenData;
8
+ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
9
  import io.github.ggerganov.whispercpp.params.WhisperFullParams;
10
 
11
  public interface WhisperCppJnaLibrary extends Library {
 
14
  String whisper_print_system_info();
15
 
16
  /**
17
+ * DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
18
  *
19
  * @param path_model Path to the model file
20
  * @return Whisper context on success, null on failure
21
  */
22
  Pointer whisper_init_from_file(String path_model);
23
+
24
+ /**
25
+ * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
26
+ * Because this function allocates memory for the params, the caller must call either:
27
+ * - call `whisper_free_context_params()`
28
+ * - `Native.free(Pointer.nativeValue(pointer));`
29
+ */
30
+ Pointer whisper_context_default_params_by_ref();
31
+
32
+ void whisper_free_context_params(Pointer params);
33
+
34
+ /**
35
+ * Allocate (almost) all memory needed for the model by loading from a file.
36
+ *
37
+ * @param path_model Path to the model file
38
+ * @param params Pointer to whisper_context_params
39
+ * @return Whisper context on success, null on failure
40
+ */
41
+ Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
42
 
43
  /**
44
  * Allocate (almost) all memory needed for the model by loading from a buffer.
bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.params;
2
+
3
+ import com.sun.jna.*;
4
+
5
+ import java.util.Arrays;
6
+ import java.util.List;
7
+
8
+ /**
9
+ * Parameters for the whisper_init_from_file_with_params() function.
10
+ * If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
11
+ * whisper_context_default_params()
12
+ */
13
+ public class WhisperContextParams extends Structure {
14
+
15
+ public WhisperContextParams(Pointer p) {
16
+ super(p);
17
+ }
18
+
19
+ /** Use GPU for inference Number (default = true) */
20
+ public CBool use_gpu;
21
+
22
+ /** Use GPU for inference Number (default = true) */
23
+ public void useGpu(boolean enable) {
24
+ use_gpu = enable ? CBool.TRUE : CBool.FALSE;
25
+ }
26
+
27
+ @Override
28
+ protected List<String> getFieldOrder() {
29
+ return Arrays.asList("use_gpu");
30
+ }
31
+ }
bindings/javascript/emscripten.cpp CHANGED
@@ -20,7 +20,7 @@ struct whisper_context * g_context;
20
  EMSCRIPTEN_BINDINGS(whisper) {
21
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
22
  if (g_context == nullptr) {
23
- g_context = whisper_init_from_file(path_model.c_str());
24
  if (g_context != nullptr) {
25
  return true;
26
  } else {
 
20
  EMSCRIPTEN_BINDINGS(whisper) {
21
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
22
  if (g_context == nullptr) {
23
+ g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
24
  if (g_context != nullptr) {
25
  return true;
26
  } else {
bindings/ruby/ext/ruby_whisper.cpp CHANGED
@@ -87,7 +87,7 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
87
  if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
88
  rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
89
  }
90
- rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
91
  if (rw->context == nullptr) {
92
  rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
93
  }
 
87
  if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
88
  rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
89
  }
90
+ rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
91
  if (rw->context == nullptr) {
92
  rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
93
  }
examples/addon.node/__test__/whisper.spec.js CHANGED
@@ -11,6 +11,7 @@ const whisperParamsMock = {
11
  language: "en",
12
  model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
13
  fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
 
14
  };
15
 
16
  describe("Run whisper.node", () => {
 
11
  language: "en",
12
  model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
13
  fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
14
+ use_gpu: true,
15
  };
16
 
17
  describe("Run whisper.node", () => {
examples/addon.node/addon.cpp CHANGED
@@ -36,6 +36,7 @@ struct whisper_params {
36
  bool print_colors = false;
37
  bool print_progress = false;
38
  bool no_timestamps = false;
 
39
 
40
  std::string language = "en";
41
  std::string prompt;
@@ -153,7 +154,9 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
153
 
154
  // whisper init
155
 
156
- struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
 
 
157
 
158
  if (ctx == nullptr) {
159
  fprintf(stderr, "error: failed to initialize whisper context\n");
@@ -315,10 +318,12 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
315
  std::string language = whisper_params.Get("language").As<Napi::String>();
316
  std::string model = whisper_params.Get("model").As<Napi::String>();
317
  std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
 
318
 
319
  params.language = language;
320
  params.model = model;
321
  params.fname_inp.emplace_back(input);
 
322
 
323
  Napi::Function callback = info[1].As<Napi::Function>();
324
  Worker* worker = new Worker(callback, params);
 
36
  bool print_colors = false;
37
  bool print_progress = false;
38
  bool no_timestamps = false;
39
+ bool use_gpu = true;
40
 
41
  std::string language = "en";
42
  std::string prompt;
 
154
 
155
  // whisper init
156
 
157
+ struct whisper_context_params cparams;
158
+ cparams.use_gpu = params.use_gpu;
159
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
160
 
161
  if (ctx == nullptr) {
162
  fprintf(stderr, "error: failed to initialize whisper context\n");
 
318
  std::string language = whisper_params.Get("language").As<Napi::String>();
319
  std::string model = whisper_params.Get("model").As<Napi::String>();
320
  std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
321
+ bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
322
 
323
  params.language = language;
324
  params.model = model;
325
  params.fname_inp.emplace_back(input);
326
+ params.use_gpu = use_gpu;
327
 
328
  Napi::Function callback = info[1].As<Napi::Function>();
329
  Worker* worker = new Worker(callback, params);
examples/addon.node/index.js CHANGED
@@ -11,6 +11,7 @@ const whisperParams = {
11
  language: "en",
12
  model: path.join(__dirname, "../../models/ggml-base.en.bin"),
13
  fname_inp: "../../samples/jfk.wav",
 
14
  };
15
 
16
  const arguments = process.argv.slice(2);
 
11
  language: "en",
12
  model: path.join(__dirname, "../../models/ggml-base.en.bin"),
13
  fname_inp: "../../samples/jfk.wav",
14
+ use_gpu: true,
15
  };
16
 
17
  const arguments = process.argv.slice(2);
examples/bench.wasm/emscripten.cpp CHANGED
@@ -57,7 +57,7 @@ EMSCRIPTEN_BINDINGS(bench) {
57
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
58
  for (size_t i = 0; i < g_contexts.size(); ++i) {
59
  if (g_contexts[i] == nullptr) {
60
- g_contexts[i] = whisper_init_from_file(path_model.c_str());
61
  if (g_contexts[i] != nullptr) {
62
  if (g_worker.joinable()) {
63
  g_worker.join();
 
57
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
58
  for (size_t i = 0; i < g_contexts.size(); ++i) {
59
  if (g_contexts[i] == nullptr) {
60
+ g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
61
  if (g_contexts[i] != nullptr) {
62
  if (g_worker.joinable()) {
63
  g_worker.join();
examples/bench/bench.cpp CHANGED
@@ -11,6 +11,8 @@ struct whisper_params {
11
  int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
12
 
13
  std::string model = "models/ggml-base.en.bin";
 
 
14
  };
15
 
16
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@@ -23,9 +25,10 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
23
  whisper_print_usage(argc, argv, params);
24
  exit(0);
25
  }
26
- else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
27
- else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
28
- else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
 
29
  else {
30
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
31
  whisper_print_usage(argc, argv, params);
@@ -45,6 +48,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
45
  fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
46
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
47
  fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
 
48
  fprintf(stderr, " %-7s 0 - whisper\n", "");
49
  fprintf(stderr, " %-7s 1 - memcpy\n", "");
50
  fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
@@ -54,7 +58,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
54
  int whisper_bench_full(const whisper_params & params) {
55
  // whisper init
56
 
57
- struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
 
 
 
58
 
59
  {
60
  fprintf(stderr, "\n");
 
11
  int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
12
 
13
  std::string model = "models/ggml-base.en.bin";
14
+
15
+ bool use_gpu = true;
16
  };
17
 
18
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
 
25
  whisper_print_usage(argc, argv, params);
26
  exit(0);
27
  }
28
+ else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
29
+ else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
30
+ else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
31
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
32
  else {
33
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
34
  whisper_print_usage(argc, argv, params);
 
48
  fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
49
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
50
  fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
51
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
52
  fprintf(stderr, " %-7s 0 - whisper\n", "");
53
  fprintf(stderr, " %-7s 1 - memcpy\n", "");
54
  fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
 
58
  int whisper_bench_full(const whisper_params & params) {
59
  // whisper init
60
 
61
+ struct whisper_context_params cparams;
62
+ cparams.use_gpu = params.use_gpu;
63
+
64
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
65
 
66
  {
67
  fprintf(stderr, "\n");
examples/command.wasm/emscripten.cpp CHANGED
@@ -243,7 +243,7 @@ EMSCRIPTEN_BINDINGS(command) {
243
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
244
  for (size_t i = 0; i < g_contexts.size(); ++i) {
245
  if (g_contexts[i] == nullptr) {
246
- g_contexts[i] = whisper_init_from_file(path_model.c_str());
247
  if (g_contexts[i] != nullptr) {
248
  g_running = true;
249
  if (g_worker.joinable()) {
 
243
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
244
  for (size_t i = 0; i < g_contexts.size(); ++i) {
245
  if (g_contexts[i] == nullptr) {
246
+ g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
247
  if (g_contexts[i] != nullptr) {
248
  g_running = true;
249
  if (g_worker.joinable()) {
examples/command/command.cpp CHANGED
@@ -38,6 +38,7 @@ struct whisper_params {
38
  bool print_special = false;
39
  bool print_energy = false;
40
  bool no_timestamps = true;
 
41
 
42
  std::string language = "en";
43
  std::string model = "models/ggml-base.en.bin";
@@ -68,6 +69,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
68
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
69
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
70
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
 
71
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
72
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
73
  else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
@@ -101,6 +103,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
101
  fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
102
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
103
  fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
 
104
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
105
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
106
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
@@ -610,7 +613,10 @@ int main(int argc, char ** argv) {
610
 
611
  // whisper init
612
 
613
- struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
 
 
 
614
 
615
  // print some info about the processing
616
  {
 
38
  bool print_special = false;
39
  bool print_energy = false;
40
  bool no_timestamps = true;
41
+ bool use_gpu = true;
42
 
43
  std::string language = "en";
44
  std::string model = "models/ggml-base.en.bin";
 
69
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
70
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
71
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
72
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
73
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
74
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
75
  else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
 
103
  fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
104
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
105
  fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
106
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
107
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
108
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
109
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
 
613
 
614
  // whisper init
615
 
616
+ struct whisper_context_params cparams;
617
+ cparams.use_gpu = params.use_gpu;
618
+
619
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
620
 
621
  // print some info about the processing
622
  {
examples/lsp/lsp.cpp CHANGED
@@ -30,6 +30,7 @@ struct whisper_params {
30
  bool translate = false;
31
  bool print_special = false;
32
  bool print_energy = false;
 
33
 
34
  std::string language = "en";
35
  std::string model = "models/ggml-base.en.bin";
@@ -72,6 +73,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
72
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
73
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
74
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
 
75
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
76
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
77
  else {
@@ -102,6 +104,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
102
  fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
103
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
104
  fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
 
105
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
106
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
107
  fprintf(stderr, "\n");
@@ -432,7 +435,9 @@ int main(int argc, char ** argv) {
432
  }
433
 
434
  // whisper init
435
- struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
 
 
436
  // init audio
437
 
438
  audio_async audio(30*1000);
 
30
  bool translate = false;
31
  bool print_special = false;
32
  bool print_energy = false;
33
+ bool use_gpu = true;
34
 
35
  std::string language = "en";
36
  std::string model = "models/ggml-base.en.bin";
 
73
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
74
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
75
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
76
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
77
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
78
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
79
  else {
 
104
  fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
105
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
106
  fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
107
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
108
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
109
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
110
  fprintf(stderr, "\n");
 
435
  }
436
 
437
  // whisper init
438
+ struct whisper_context_params cparams;
439
+ cparams.use_gpu = params.use_gpu;
440
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
441
  // init audio
442
 
443
  audio_async audio(30*1000);
examples/main/main.cpp CHANGED
@@ -90,6 +90,7 @@ struct whisper_params {
90
  bool print_progress = false;
91
  bool no_timestamps = false;
92
  bool log_score = false;
 
93
 
94
  std::string language = "en";
95
  std::string prompt;
@@ -165,6 +166,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
165
  else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
166
  else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
167
  else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
 
168
  else {
169
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
170
  whisper_print_usage(argc, argv, params);
@@ -221,6 +223,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
221
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
222
  fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
223
  fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
 
224
  fprintf(stderr, "\n");
225
  }
226
 
@@ -877,7 +880,10 @@ int main(int argc, char ** argv) {
877
 
878
  // whisper init
879
 
880
- struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
 
 
 
881
 
882
  if (ctx == nullptr) {
883
  fprintf(stderr, "error: failed to initialize whisper context\n");
 
90
  bool print_progress = false;
91
  bool no_timestamps = false;
92
  bool log_score = false;
93
+ bool use_gpu = true;
94
 
95
  std::string language = "en";
96
  std::string prompt;
 
166
  else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
167
  else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
168
  else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
169
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
170
  else {
171
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
172
  whisper_print_usage(argc, argv, params);
 
223
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
224
  fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
225
  fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
226
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
227
  fprintf(stderr, "\n");
228
  }
229
 
 
880
 
881
  // whisper init
882
 
883
+ struct whisper_context_params cparams;
884
+ cparams.use_gpu = params.use_gpu;
885
+
886
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
887
 
888
  if (ctx == nullptr) {
889
  fprintf(stderr, "error: failed to initialize whisper context\n");
examples/stream.wasm/emscripten.cpp CHANGED
@@ -132,7 +132,7 @@ EMSCRIPTEN_BINDINGS(stream) {
132
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
133
  for (size_t i = 0; i < g_contexts.size(); ++i) {
134
  if (g_contexts[i] == nullptr) {
135
- g_contexts[i] = whisper_init_from_file(path_model.c_str());
136
  if (g_contexts[i] != nullptr) {
137
  g_running = true;
138
  if (g_worker.joinable()) {
 
132
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
133
  for (size_t i = 0; i < g_contexts.size(); ++i) {
134
  if (g_contexts[i] == nullptr) {
135
+ g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
136
  if (g_contexts[i] != nullptr) {
137
  g_running = true;
138
  if (g_worker.joinable()) {
examples/stream/stream.cpp CHANGED
@@ -48,11 +48,12 @@ struct whisper_params {
48
  bool no_context = true;
49
  bool no_timestamps = false;
50
  bool tinydiarize = false;
 
 
51
 
52
  std::string language = "en";
53
  std::string model = "models/ggml-base.en.bin";
54
  std::string fname_out;
55
- bool save_audio = false; // save audio to wav file
56
  };
57
 
58
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@@ -65,25 +66,26 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
65
  whisper_print_usage(argc, argv, params);
66
  exit(0);
67
  }
68
- else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
69
- else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); }
70
- else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); }
71
- else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); }
72
- else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
73
- else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
74
- else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
75
- else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
76
- else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
77
- else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
78
- else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
79
- else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
80
- else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
81
- else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
82
- else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
83
- else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
84
- else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
85
- else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
86
- else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
 
87
 
88
  else {
89
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@@ -118,8 +120,9 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
118
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
119
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
120
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
121
- fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
122
  fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
 
123
  fprintf(stderr, "\n");
124
  }
125
 
@@ -163,7 +166,10 @@ int main(int argc, char ** argv) {
163
  exit(0);
164
  }
165
 
166
- struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
 
 
 
167
 
168
  std::vector<float> pcmf32 (n_samples_30s, 0.0f);
169
  std::vector<float> pcmf32_old;
@@ -424,4 +430,4 @@ int main(int argc, char ** argv) {
424
  whisper_free(ctx);
425
 
426
  return 0;
427
- }
 
48
  bool no_context = true;
49
  bool no_timestamps = false;
50
  bool tinydiarize = false;
51
+ bool save_audio = false; // save audio to wav file
52
+ bool use_gpu = true;
53
 
54
  std::string language = "en";
55
  std::string model = "models/ggml-base.en.bin";
56
  std::string fname_out;
 
57
  };
58
 
59
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
 
66
  whisper_print_usage(argc, argv, params);
67
  exit(0);
68
  }
69
+ else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
70
+ else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); }
71
+ else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); }
72
+ else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); }
73
+ else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
74
+ else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
75
+ else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
76
+ else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
77
+ else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
78
+ else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
79
+ else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
80
+ else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
81
+ else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
82
+ else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
83
+ else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
84
+ else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
85
+ else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
86
+ else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
87
+ else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
88
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
89
 
90
  else {
91
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
 
120
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
121
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
122
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
123
+ fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
124
  fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
125
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
126
  fprintf(stderr, "\n");
127
  }
128
 
 
166
  exit(0);
167
  }
168
 
169
+ struct whisper_context_params cparams;
170
+ cparams.use_gpu = params.use_gpu;
171
+
172
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
173
 
174
  std::vector<float> pcmf32 (n_samples_30s, 0.0f);
175
  std::vector<float> pcmf32_old;
 
430
  whisper_free(ctx);
431
 
432
  return 0;
433
+ }
examples/talk-llama/talk-llama.cpp CHANGED
@@ -63,6 +63,7 @@ struct whisper_params {
63
  bool print_energy = false;
64
  bool no_timestamps = true;
65
  bool verbose_prompt = false;
 
66
 
67
  std::string person = "Georgi";
68
  std::string language = "en";
@@ -84,25 +85,26 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
84
  whisper_print_usage(argc, argv, params);
85
  exit(0);
86
  }
87
- else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
88
- else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); }
89
- else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
90
- else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
91
- else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
92
- else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
93
- else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
94
- else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
95
- else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
96
- else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
97
- else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
98
- else if (arg == "--verbose-prompt") { params.verbose_prompt = true; }
99
- else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
100
- else if (arg == "--session") { params.path_session = argv[++i];}
101
- else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
102
- else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
103
- else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
104
- else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
105
- else if (arg == "--prompt-file") {
 
106
  std::ifstream file(argv[++i]);
107
  std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
108
  if (params.prompt.back() == '\n') {
@@ -110,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
110
  }
111
  }
112
  else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
 
113
  else {
114
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
115
  whisper_print_usage(argc, argv, params);
@@ -125,27 +128,28 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
125
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
126
  fprintf(stderr, "\n");
127
  fprintf(stderr, "options:\n");
128
- fprintf(stderr, " -h, --help [default] show this help message and exit\n");
129
- fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
130
- fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms);
131
- fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
132
- fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
133
- fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
134
- fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
135
- fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
136
- fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
137
- fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
138
- fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
139
- fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
140
- fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
141
- fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
142
- fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
143
- fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
144
- fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
145
- fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
146
- fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
147
- fprintf(stderr, " --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
148
- fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
 
149
  fprintf(stderr, "\n");
150
  }
151
 
@@ -252,7 +256,10 @@ int main(int argc, char ** argv) {
252
 
253
  // whisper init
254
 
255
- struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
 
 
 
256
 
257
  // llama init
258
 
@@ -269,6 +276,9 @@ int main(int argc, char ** argv) {
269
  lcparams.seed = 1;
270
  lcparams.f16_kv = true;
271
  lcparams.n_threads = params.n_threads;
 
 
 
272
 
273
  struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
274
 
 
63
  bool print_energy = false;
64
  bool no_timestamps = true;
65
  bool verbose_prompt = false;
66
+ bool use_gpu = true;
67
 
68
  std::string person = "Georgi";
69
  std::string language = "en";
 
85
  whisper_print_usage(argc, argv, params);
86
  exit(0);
87
  }
88
+ else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
89
+ else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); }
90
+ else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
91
+ else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
92
+ else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
93
+ else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
94
+ else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
95
+ else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
96
+ else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
97
+ else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
98
+ else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
99
+ else if (arg == "-vp" || arg == "--verbose-prompt") { params.verbose_prompt = true; }
100
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
101
+ else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
102
+ else if (arg == "--session") { params.path_session = argv[++i];}
103
+ else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
104
+ else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
105
+ else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
106
+ else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
107
+ else if (arg == "--prompt-file") {
108
  std::ifstream file(argv[++i]);
109
  std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
110
  if (params.prompt.back() == '\n') {
 
112
  }
113
  }
114
  else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
115
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
116
  else {
117
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
118
  whisper_print_usage(argc, argv, params);
 
128
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
129
  fprintf(stderr, "\n");
130
  fprintf(stderr, "options:\n");
131
+ fprintf(stderr, " -h, --help [default] show this help message and exit\n");
132
+ fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
133
+ fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms);
134
+ fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
135
+ fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
136
+ fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
137
+ fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
138
+ fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
139
+ fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
140
+ fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
141
+ fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
142
+ fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
143
+ fprintf(stderr, " -vp, --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
144
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
145
+ fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
146
+ fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
147
+ fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
148
+ fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
149
+ fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
150
+ fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
151
+ fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
152
+ fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
153
  fprintf(stderr, "\n");
154
  }
155
 
 
256
 
257
  // whisper init
258
 
259
+ struct whisper_context_params cparams;
260
+ cparams.use_gpu = params.use_gpu;
261
+
262
+ struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
263
 
264
  // llama init
265
 
 
276
  lcparams.seed = 1;
277
  lcparams.f16_kv = true;
278
  lcparams.n_threads = params.n_threads;
279
+ if (!params.use_gpu) {
280
+ lcparams.n_gpu_layers = 0;
281
+ }
282
 
283
  struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
284
 
examples/talk.wasm/emscripten.cpp CHANGED
@@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) {
271
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
272
  for (size_t i = 0; i < g_contexts.size(); ++i) {
273
  if (g_contexts[i] == nullptr) {
274
- g_contexts[i] = whisper_init_from_file(path_model.c_str());
275
  if (g_contexts[i] != nullptr) {
276
  g_running = true;
277
  if (g_worker.joinable()) {
 
271
  emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
272
  for (size_t i = 0; i < g_contexts.size(); ++i) {
273
  if (g_contexts[i] == nullptr) {
274
+ g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
275
  if (g_contexts[i] != nullptr) {
276
  g_running = true;
277
  if (g_worker.joinable()) {
examples/talk/talk.cpp CHANGED
@@ -31,6 +31,7 @@ struct whisper_params {
31
  bool print_special = false;
32
  bool print_energy = false;
33
  bool no_timestamps = true;
 
34
 
35
  std::string person = "Santa";
36
  std::string language = "en";
@@ -61,6 +62,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
61
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
62
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
63
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
 
64
  else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
65
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
66
  else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
@@ -94,6 +96,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
94
  fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
95
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
96
  fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
 
97
  fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
98
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
99
  fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
@@ -181,8 +184,10 @@ int main(int argc, char ** argv) {
181
  }
182
 
183
  // whisper init
 
 
184
 
185
- struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
186
 
187
  // gpt init
188
 
 
31
  bool print_special = false;
32
  bool print_energy = false;
33
  bool no_timestamps = true;
34
+ bool use_gpu = true;
35
 
36
  std::string person = "Santa";
37
  std::string language = "en";
 
62
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
63
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
64
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
65
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
66
  else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
67
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
68
  else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
 
96
  fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
97
  fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
98
  fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
99
+ fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
100
  fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
101
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
102
  fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
 
184
  }
185
 
186
  // whisper init
187
+ struct whisper_context_params cparams;
188
+ cparams.use_gpu = params.use_gpu;
189
 
190
+ struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
191
 
192
  // gpt init
193
 
examples/whisper.android/app/src/main/jni/whisper/jni.c CHANGED
@@ -127,7 +127,7 @@ static struct whisper_context *whisper_init_from_asset(
127
  .close = &asset_close
128
  };
129
 
130
- return whisper_init(&loader);
131
  }
132
 
133
  JNIEXPORT jlong JNICALL
@@ -147,7 +147,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
147
  UNUSED(thiz);
148
  struct whisper_context *context = NULL;
149
  const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
150
- context = whisper_init_from_file(model_path_chars);
151
  (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
152
  return (jlong) context;
153
  }
 
127
  .close = &asset_close
128
  };
129
 
130
+ return whisper_init_with_params(&loader, whisper_context_default_params());
131
  }
132
 
133
  JNIEXPORT jlong JNICALL
 
147
  UNUSED(thiz);
148
  struct whisper_context *context = NULL;
149
  const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
150
+ context = whisper_init_from_file_with_params(model_path_chars, whisper_context_default_params());
151
  (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
152
  return (jlong) context;
153
  }
examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj CHANGED
@@ -17,8 +17,8 @@
17
  18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
18
  18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
19
  18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
20
- 18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML"; }; };
21
- 18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE"; }; };
22
  18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
23
  18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
24
  18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
 
17
  18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
18
  18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
19
  18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
20
+ 18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
21
+ 18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
22
  18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
23
  18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
24
  18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
examples/whisper.objc/whisper.objc/ViewController.m CHANGED
@@ -61,7 +61,13 @@ void AudioInputCallback(void * inUserData,
61
  NSLog(@"Loading model from %@", modelPath);
62
 
63
  // create ggml context
64
- stateInp.ctx = whisper_init_from_file([modelPath UTF8String]);
 
 
 
 
 
 
65
 
66
  // check if the model was loaded successfully
67
  if (stateInp.ctx == NULL) {
 
61
  NSLog(@"Loading model from %@", modelPath);
62
 
63
  // create ggml context
64
+
65
+ struct whisper_context_params cparams = whisper_context_default_params();
66
+ #if TARGET_OS_SIMULATOR
67
+ cparams.use_gpu = false;
68
+ NSLog(@"Running on simulator, using CPU");
69
+ #endif
70
+ stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);
71
 
72
  // check if the model was loaded successfully
73
  if (stateInp.ctx == NULL) {
examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift CHANGED
@@ -55,7 +55,12 @@ actor WhisperContext {
55
  }
56
 
57
  static func createContext(path: String) throws -> WhisperContext {
58
- let context = whisper_init_from_file(path)
 
 
 
 
 
59
  if let context {
60
  return WhisperContext(context: context)
61
  } else {
 
55
  }
56
 
57
  static func createContext(path: String) throws -> WhisperContext {
58
+ var params = whisper_context_default_params()
59
+ #if targetEnvironment(simulator)
60
+ params.use_gpu = false
61
+ print("Running on the simulator, using CPU")
62
+ #endif
63
+ let context = whisper_init_from_file_with_params(path, params)
64
  if let context {
65
  return WhisperContext(context: context)
66
  } else {
examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj CHANGED
@@ -16,13 +16,15 @@
16
  0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
17
  0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
18
  0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
19
- 0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-Wno-shorten-64-to-32"; }; };
20
- 0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"; }; };
21
  0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
22
  0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
23
  18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
24
  18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
25
  18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
 
 
26
  /* End PBXBuildFile section */
27
 
28
  /* Begin PBXFileReference section */
@@ -52,6 +54,9 @@
52
  18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
53
  18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
54
  18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
 
 
 
55
  /* End PBXFileReference section */
56
 
57
  /* Begin PBXFrameworksBuildPhase section */
@@ -135,6 +140,9 @@
135
  0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
136
  isa = PBXGroup;
137
  children = (
 
 
 
138
  18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
139
  18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
140
  18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
@@ -258,10 +266,12 @@
258
  0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
259
  18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
260
  0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
 
261
  0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
262
  0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
263
  0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
264
  0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
 
265
  18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
266
  18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
267
  );
 
16
  0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
17
  0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
18
  0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
19
+ 0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
20
+ 0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
21
  0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
22
  0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
23
  18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
24
  18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
25
  18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
26
+ 7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
27
+ 7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */; };
28
  /* End PBXBuildFile section */
29
 
30
  /* Begin PBXFileReference section */
 
54
  18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
55
  18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
56
  18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
57
+ 7FCB081E2ACFA04400AF3530 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-metal.h"; sourceTree = "<group>"; };
58
+ 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "ggml-metal.m"; sourceTree = "<group>"; };
59
+ 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = "ggml-metal.metal"; sourceTree = "<group>"; };
60
  /* End PBXFileReference section */
61
 
62
  /* Begin PBXFrameworksBuildPhase section */
 
140
  0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
141
  isa = PBXGroup;
142
  children = (
143
+ 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */,
144
+ 7FCB081E2ACFA04400AF3530 /* ggml-metal.h */,
145
+ 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */,
146
  18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
147
  18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
148
  18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
 
266
  0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
267
  18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
268
  0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
269
+ 7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */,
270
  0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
271
  0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
272
  0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
273
  0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
274
+ 7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */,
275
  18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
276
  18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
277
  );
examples/whisper.wasm/emscripten.cpp CHANGED
@@ -24,7 +24,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
24
 
25
  for (size_t i = 0; i < g_contexts.size(); ++i) {
26
  if (g_contexts[i] == nullptr) {
27
- g_contexts[i] = whisper_init_from_file(path_model.c_str());
28
  if (g_contexts[i] != nullptr) {
29
  return i + 1;
30
  } else {
 
24
 
25
  for (size_t i = 0; i < g_contexts.size(); ++i) {
26
  if (g_contexts[i] == nullptr) {
27
+ g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
28
  if (g_contexts[i] != nullptr) {
29
  return i + 1;
30
  } else {
whisper.cpp CHANGED
@@ -736,7 +736,7 @@ struct whisper_state {
736
 
737
  int lang_id = 0; // english by default
738
 
739
- std::string path_model; // populated by whisper_init_from_file()
740
  #ifdef WHISPER_USE_COREML
741
  whisper_coreml_context * ctx_coreml = nullptr;
742
  #endif
@@ -770,7 +770,8 @@ struct whisper_context {
770
  whisper_vocab vocab;
771
  whisper_state * state = nullptr;
772
 
773
- std::string path_model; // populated by whisper_init_from_file()
 
774
  };
775
 
776
  static void whisper_default_log(const char * text) {
@@ -2930,59 +2931,64 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
2930
  }
2931
 
2932
  #ifdef GGML_USE_METAL
2933
- state->ctx_metal = ggml_metal_init(1);
2934
- if (!state->ctx_metal) {
2935
- log("%s: ggml_metal_init() failed\n", __func__);
2936
- delete state;
2937
- return nullptr;
 
 
2938
  }
2939
 
2940
- log("%s: Metal context initialized\n", __func__);
 
2941
 
2942
- // this allocates all Metal resources and memory buffers
2943
 
2944
- void * data_ptr = NULL;
2945
- size_t data_size = 0;
2946
 
2947
- // TODO: add mmap support
2948
- //if (params.use_mmap) {
2949
- // data_ptr = ctx->model.mapping->addr;
2950
- // data_size = ctx->model.mapping->size;
2951
- //} else {
2952
- // data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
2953
- // data_size = ggml_get_mem_size (ctx->model.ctx);
2954
- //}
2955
 
2956
- data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
2957
- data_size = ggml_get_mem_size (ctx->model.ctx);
2958
 
2959
- const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx);
2960
 
2961
- log("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0);
2962
 
2963
  #define WHISPER_METAL_CHECK_BUF(result) \
2964
- if (!(result)) { \
2965
- log("%s: failed to add metal buffer\n", __func__); \
2966
- delete state; \
2967
- return nullptr; \
2968
- }
2969
 
2970
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data", data_ptr, data_size, max_size));
2971
 
2972
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_conv", state->alloc_conv.meta.data(), state->alloc_conv.meta.size(), 0));
2973
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_encode", state->alloc_encode.meta.data(), state->alloc_encode.meta.size(), 0));
2974
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_cross", state->alloc_cross.meta.data(), state->alloc_cross.meta.size(), 0));
2975
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_decode", state->alloc_decode.meta.data(), state->alloc_decode.meta.size(), 0));
2976
 
2977
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_conv", state->alloc_conv.data.data(), state->alloc_conv.data.size(), 0));
2978
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_encode", state->alloc_encode.data.data(), state->alloc_encode.data.size(), 0));
2979
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_cross", state->alloc_cross.data.data(), state->alloc_cross.data.size(), 0));
2980
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_decode", state->alloc_decode.data.data(), state->alloc_decode.data.size(), 0));
2981
 
2982
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "kv_cross", state->kv_cross.buf.data(), state->kv_cross.buf.size(), 0));
2983
 
2984
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "kv_self_0", state->decoders[0].kv_self.buf.data(), state->decoders[0].kv_self.buf.size(), 0));
2985
  #undef WHISPER_METAL_CHECK_BUF
 
 
2986
  #endif
2987
 
2988
  state->rng = std::mt19937(0);
@@ -3039,7 +3045,14 @@ int whisper_ctx_init_openvino_encoder(
3039
  #endif
3040
  }
3041
 
3042
- struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
 
 
 
 
 
 
 
3043
  log("%s: loading model from '%s'\n", __func__, path_model);
3044
 
3045
  auto fin = std::ifstream(path_model, std::ios::binary);
@@ -3068,7 +3081,7 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
3068
  fin->close();
3069
  };
3070
 
3071
- auto ctx = whisper_init_no_state(&loader);
3072
 
3073
  if (ctx) {
3074
  ctx->path_model = path_model;
@@ -3077,7 +3090,7 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
3077
  return ctx;
3078
  }
3079
 
3080
- struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size) {
3081
  struct buf_context {
3082
  uint8_t* buffer;
3083
  size_t size;
@@ -3111,13 +3124,14 @@ struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t
3111
 
3112
  loader.close = [](void * /*ctx*/) { };
3113
 
3114
- return whisper_init_no_state(&loader);
3115
  }
3116
 
3117
- struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader) {
3118
  ggml_time_init();
3119
 
3120
  whisper_context * ctx = new whisper_context;
 
3121
 
3122
  if (!whisper_model_load(loader, *ctx)) {
3123
  loader->close(loader->context);
@@ -3131,8 +3145,8 @@ struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loa
3131
  return ctx;
3132
  }
3133
 
3134
- struct whisper_context * whisper_init_from_file(const char * path_model) {
3135
- whisper_context * ctx = whisper_init_from_file_no_state(path_model);
3136
  if (!ctx) {
3137
  return nullptr;
3138
  }
@@ -3146,8 +3160,8 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
3146
  return ctx;
3147
  }
3148
 
3149
- struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
3150
- whisper_context * ctx = whisper_init_from_buffer_no_state(buffer, buffer_size);
3151
  if (!ctx) {
3152
  return nullptr;
3153
  }
@@ -3161,8 +3175,8 @@ struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_s
3161
  return ctx;
3162
  }
3163
 
3164
- struct whisper_context * whisper_init(struct whisper_model_loader * loader) {
3165
- whisper_context * ctx = whisper_init_no_state(loader);
3166
  if (!ctx) {
3167
  return nullptr;
3168
  }
@@ -3176,6 +3190,30 @@ struct whisper_context * whisper_init(struct whisper_model_loader * loader) {
3176
  return ctx;
3177
  }
3178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3179
  void whisper_free_state(struct whisper_state * state)
3180
  {
3181
  if (state) {
@@ -3230,6 +3268,12 @@ void whisper_free(struct whisper_context * ctx) {
3230
  }
3231
  }
3232
 
 
 
 
 
 
 
3233
  void whisper_free_params(struct whisper_full_params * params) {
3234
  if (params) {
3235
  delete params;
@@ -3698,6 +3742,14 @@ const char * whisper_print_system_info(void) {
3698
 
3699
  ////////////////////////////////////////////////////////////////////////////
3700
 
 
 
 
 
 
 
 
 
3701
  struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy) {
3702
  struct whisper_full_params params = whisper_full_default_params(strategy);
3703
 
@@ -4507,17 +4559,19 @@ int whisper_full_with_state(
4507
 
4508
  // TODO: not very clean - look for a better way and potentially merging with the init of decoder 0
4509
  #ifdef GGML_USE_METAL
 
4510
  #define WHISPER_METAL_CHECK_BUF(result) \
4511
- if (!(result)) { \
4512
- log("%s: failed to add metal buffer\n", __func__); \
4513
- return 0; \
4514
- }
4515
 
4516
- const std::string kv_name = "kv_self_" + std::to_string(j);
4517
- auto & kv_self = decoder.kv_self;
4518
 
4519
- WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, kv_name.c_str(), kv_self.buf.data(), kv_self.buf.size(), 0));
4520
  #undef WHISPER_METAL_CHECK_BUF
 
4521
  #endif
4522
  }
4523
  }
 
736
 
737
  int lang_id = 0; // english by default
738
 
739
+ std::string path_model; // populated by whisper_init_from_file_with_params()
740
  #ifdef WHISPER_USE_COREML
741
  whisper_coreml_context * ctx_coreml = nullptr;
742
  #endif
 
770
  whisper_vocab vocab;
771
  whisper_state * state = nullptr;
772
 
773
+ std::string path_model; // populated by whisper_init_from_file_with_params()
774
+ whisper_context_params params;
775
  };
776
 
777
  static void whisper_default_log(const char * text) {
 
2931
  }
2932
 
2933
  #ifdef GGML_USE_METAL
2934
+ if (ctx->params.use_gpu) {
2935
+ state->ctx_metal = ggml_metal_init(1);
2936
+ if (!state->ctx_metal) {
2937
+ log("%s: ggml_metal_init() failed\n", __func__);
2938
+ delete state;
2939
+ return nullptr;
2940
+ }
2941
  }
2942
 
2943
+ if (state->ctx_metal) {
2944
+ log("%s: Metal context initialized\n", __func__);
2945
 
2946
+ // this allocates all Metal resources and memory buffers
2947
 
2948
+ void * data_ptr = NULL;
2949
+ size_t data_size = 0;
2950
 
2951
+ // TODO: add mmap support
2952
+ //if (params.use_mmap) {
2953
+ // data_ptr = ctx->model.mapping->addr;
2954
+ // data_size = ctx->model.mapping->size;
2955
+ //} else {
2956
+ // data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
2957
+ // data_size = ggml_get_mem_size (ctx->model.ctx);
2958
+ //}
2959
 
2960
+ data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
2961
+ data_size = ggml_get_mem_size (ctx->model.ctx);
2962
 
2963
+ const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx);
2964
 
2965
+ log("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0);
2966
 
2967
  #define WHISPER_METAL_CHECK_BUF(result) \
2968
+ if (!(result)) { \
2969
+ log("%s: failed to add metal buffer\n", __func__); \
2970
+ delete state; \
2971
+ return nullptr; \
2972
+ }
2973
 
2974
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data", data_ptr, data_size, max_size));
2975
 
2976
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_conv", state->alloc_conv.meta.data(), state->alloc_conv.meta.size(), 0));
2977
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_encode", state->alloc_encode.meta.data(), state->alloc_encode.meta.size(), 0));
2978
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_cross", state->alloc_cross.meta.data(), state->alloc_cross.meta.size(), 0));
2979
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_decode", state->alloc_decode.meta.data(), state->alloc_decode.meta.size(), 0));
2980
 
2981
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_conv", state->alloc_conv.data.data(), state->alloc_conv.data.size(), 0));
2982
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_encode", state->alloc_encode.data.data(), state->alloc_encode.data.size(), 0));
2983
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_cross", state->alloc_cross.data.data(), state->alloc_cross.data.size(), 0));
2984
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_decode", state->alloc_decode.data.data(), state->alloc_decode.data.size(), 0));
2985
 
2986
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "kv_cross", state->kv_cross.buf.data(), state->kv_cross.buf.size(), 0));
2987
 
2988
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "kv_self_0", state->decoders[0].kv_self.buf.data(), state->decoders[0].kv_self.buf.size(), 0));
2989
  #undef WHISPER_METAL_CHECK_BUF
2990
+
2991
+ }
2992
  #endif
2993
 
2994
  state->rng = std::mt19937(0);
 
3045
  #endif
3046
  }
3047
 
3048
+ struct whisper_context_params whisper_context_default_params() {
3049
+ struct whisper_context_params result = {
3050
+ /*.use_gpu =*/ true,
3051
+ };
3052
+ return result;
3053
+ }
3054
+
3055
+ struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params) {
3056
  log("%s: loading model from '%s'\n", __func__, path_model);
3057
 
3058
  auto fin = std::ifstream(path_model, std::ios::binary);
 
3081
  fin->close();
3082
  };
3083
 
3084
+ auto ctx = whisper_init_with_params_no_state(&loader, params);
3085
 
3086
  if (ctx) {
3087
  ctx->path_model = path_model;
 
3090
  return ctx;
3091
  }
3092
 
3093
+ struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params) {
3094
  struct buf_context {
3095
  uint8_t* buffer;
3096
  size_t size;
 
3124
 
3125
  loader.close = [](void * /*ctx*/) { };
3126
 
3127
+ return whisper_init_with_params_no_state(&loader, params);
3128
  }
3129
 
3130
+ struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params) {
3131
  ggml_time_init();
3132
 
3133
  whisper_context * ctx = new whisper_context;
3134
+ ctx->params = params;
3135
 
3136
  if (!whisper_model_load(loader, *ctx)) {
3137
  loader->close(loader->context);
 
3145
  return ctx;
3146
  }
3147
 
3148
+ struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params) {
3149
+ whisper_context * ctx = whisper_init_from_file_with_params_no_state(path_model, params);
3150
  if (!ctx) {
3151
  return nullptr;
3152
  }
 
3160
  return ctx;
3161
  }
3162
 
3163
+ struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params) {
3164
+ whisper_context * ctx = whisper_init_from_buffer_with_params_no_state(buffer, buffer_size, params);
3165
  if (!ctx) {
3166
  return nullptr;
3167
  }
 
3175
  return ctx;
3176
  }
3177
 
3178
+ struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params) {
3179
+ whisper_context * ctx = whisper_init_with_params_no_state(loader, params);
3180
  if (!ctx) {
3181
  return nullptr;
3182
  }
 
3190
  return ctx;
3191
  }
3192
 
3193
+ struct whisper_context * whisper_init_from_file(const char * path_model) {
3194
+ return whisper_init_from_file_with_params(path_model, whisper_context_default_params());
3195
+ }
3196
+
3197
+ struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
3198
+ return whisper_init_from_buffer_with_params(buffer, buffer_size, whisper_context_default_params());
3199
+ }
3200
+
3201
+ struct whisper_context * whisper_init(struct whisper_model_loader * loader) {
3202
+ return whisper_init_with_params(loader, whisper_context_default_params());
3203
+ }
3204
+
3205
+ struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
3206
+ return whisper_init_from_file_with_params_no_state(path_model, whisper_context_default_params());
3207
+ }
3208
+
3209
+ struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size) {
3210
+ return whisper_init_from_buffer_with_params_no_state(buffer, buffer_size, whisper_context_default_params());
3211
+ }
3212
+
3213
+ struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader) {
3214
+ return whisper_init_with_params_no_state(loader, whisper_context_default_params());
3215
+ }
3216
+
3217
  void whisper_free_state(struct whisper_state * state)
3218
  {
3219
  if (state) {
 
3268
  }
3269
  }
3270
 
3271
+ void whisper_free_context_params(struct whisper_context_params * params) {
3272
+ if (params) {
3273
+ delete params;
3274
+ }
3275
+ }
3276
+
3277
  void whisper_free_params(struct whisper_full_params * params) {
3278
  if (params) {
3279
  delete params;
 
3742
 
3743
  ////////////////////////////////////////////////////////////////////////////
3744
 
3745
+ struct whisper_context_params * whisper_context_default_params_by_ref() {
3746
+ struct whisper_context_params params = whisper_context_default_params();
3747
+
3748
+ struct whisper_context_params* result = new whisper_context_params();
3749
+ *result = params;
3750
+ return result;
3751
+ }
3752
+
3753
  struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy) {
3754
  struct whisper_full_params params = whisper_full_default_params(strategy);
3755
 
 
4559
 
4560
  // TODO: not very clean - look for a better way and potentially merging with the init of decoder 0
4561
  #ifdef GGML_USE_METAL
4562
+ if (state->ctx_metal) {
4563
  #define WHISPER_METAL_CHECK_BUF(result) \
4564
+ if (!(result)) { \
4565
+ log("%s: failed to add metal buffer\n", __func__); \
4566
+ return 0; \
4567
+ }
4568
 
4569
+ const std::string kv_name = "kv_self_" + std::to_string(j);
4570
+ auto & kv_self = decoder.kv_self;
4571
 
4572
+ WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, kv_name.c_str(), kv_self.buf.data(), kv_self.buf.size(), 0));
4573
  #undef WHISPER_METAL_CHECK_BUF
4574
+ }
4575
  #endif
4576
  }
4577
  }
whisper.h CHANGED
@@ -5,6 +5,14 @@
5
  #include <stdint.h>
6
  #include <stdbool.h>
7
 
 
 
 
 
 
 
 
 
8
  #ifdef WHISPER_SHARED
9
  # ifdef _WIN32
10
  # ifdef WHISPER_BUILD
@@ -71,6 +79,10 @@ extern "C" {
71
 
72
  typedef int whisper_token;
73
 
 
 
 
 
74
  typedef struct whisper_token_data {
75
  whisper_token id; // token id
76
  whisper_token tid; // forced timestamp token id
@@ -99,15 +111,40 @@ extern "C" {
99
  // Various functions for loading a ggml whisper model.
100
  // Allocate (almost) all memory needed for the model.
101
  // Return NULL on failure
102
- WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model);
103
- WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
104
- WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
105
 
106
  // These are the same as the above, but the internal state of the context is not allocated automatically
107
  // It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
108
- WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model);
109
- WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size);
110
- WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
113
 
@@ -132,6 +169,7 @@ extern "C" {
132
  WHISPER_API void whisper_free (struct whisper_context * ctx);
133
  WHISPER_API void whisper_free_state(struct whisper_state * state);
134
  WHISPER_API void whisper_free_params(struct whisper_full_params * params);
 
135
 
136
  // Convert RAW PCM audio to log mel spectrogram.
137
  // The resulting spectrogram is stored inside the default state of the provided whisper context.
@@ -442,7 +480,9 @@ extern "C" {
442
  void * logits_filter_callback_user_data;
443
  };
444
 
445
- // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_params()
 
 
446
  WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
447
  WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
448
 
 
5
  #include <stdint.h>
6
  #include <stdbool.h>
7
 
8
+ #ifdef __GNUC__
9
+ # define WHISPER_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
10
+ #elif defined(_MSC_VER)
11
+ # define WHISPER_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
12
+ #else
13
+ # define WHISPER_DEPRECATED(func, hint) func
14
+ #endif
15
+
16
  #ifdef WHISPER_SHARED
17
  # ifdef _WIN32
18
  # ifdef WHISPER_BUILD
 
79
 
80
  typedef int whisper_token;
81
 
82
+ struct whisper_context_params {
83
+ bool use_gpu;
84
+ };
85
+
86
  typedef struct whisper_token_data {
87
  whisper_token id; // token id
88
  whisper_token tid; // forced timestamp token id
 
111
  // Various functions for loading a ggml whisper model.
112
  // Allocate (almost) all memory needed for the model.
113
  // Return NULL on failure
114
+ WHISPER_API struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params);
115
+ WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
116
+ WHISPER_API struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params);
117
 
118
  // These are the same as the above, but the internal state of the context is not allocated automatically
119
  // It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
120
+ WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params);
121
+ WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
122
+ WHISPER_API struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params);
123
+
124
+ WHISPER_DEPRECATED(
125
+ WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
126
+ "use whisper_init_from_file_with_params instead"
127
+ );
128
+ WHISPER_DEPRECATED(
129
+ WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size),
130
+ "use whisper_init_from_buffer_with_params instead"
131
+ );
132
+ WHISPER_DEPRECATED(
133
+ WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader),
134
+ "use whisper_init_with_params instead"
135
+ );
136
+ WHISPER_DEPRECATED(
137
+ WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model),
138
+ "use whisper_init_from_file_with_params_no_state instead"
139
+ );
140
+ WHISPER_DEPRECATED(
141
+ WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size),
142
+ "use whisper_init_from_buffer_with_params_no_state instead"
143
+ );
144
+ WHISPER_DEPRECATED(
145
+ WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader),
146
+ "use whisper_init_with_params_no_state instead"
147
+ );
148
 
149
  WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
150
 
 
169
  WHISPER_API void whisper_free (struct whisper_context * ctx);
170
  WHISPER_API void whisper_free_state(struct whisper_state * state);
171
  WHISPER_API void whisper_free_params(struct whisper_full_params * params);
172
+ WHISPER_API void whisper_free_context_params(struct whisper_context_params * params);
173
 
174
  // Convert RAW PCM audio to log mel spectrogram.
175
  // The resulting spectrogram is stored inside the default state of the provided whisper context.
 
480
  void * logits_filter_callback_user_data;
481
  };
482
 
483
+ // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
484
+ WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
485
+ WHISPER_API struct whisper_context_params whisper_context_default_params(void);
486
  WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
487
  WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
488