Spaces:
Running
whisper : add context param to disable gpu (#1293)
Browse files* whisper : check state->ctx_metal not null
* whisper : add whisper_context_params { use_gpu }
* whisper : new API with params & deprecate old API
* examples : use no-gpu param && whisper_init_from_file_with_params
* whisper.objc : enable metal & disable on simulator
* whisper.swiftui, metal : enable metal & support load default.metallib
* whisper.android : use new API
* bindings : use new API
* addon.node : fix build & test
* bindings : updata java binding
* bindings : add missing whisper_context_default_params_by_ref WHISPER_API for java
* metal : use SWIFTPM_MODULE_BUNDLE for GGML_SWIFT and reuse library load
* metal : move bundle var into block
* metal : use SWIFT_PACKAGE instead of GGML_SWIFT
* style : minor updates
---------
Co-authored-by: Georgi Gerganov <[email protected]>
- bindings/go/whisper.go +1 -1
- bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java +3 -1
- bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java +46 -15
- bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java +21 -1
- bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java +31 -0
- bindings/javascript/emscripten.cpp +1 -1
- bindings/ruby/ext/ruby_whisper.cpp +1 -1
- examples/addon.node/__test__/whisper.spec.js +1 -0
- examples/addon.node/addon.cpp +6 -1
- examples/addon.node/index.js +1 -0
- examples/bench.wasm/emscripten.cpp +1 -1
- examples/bench/bench.cpp +11 -4
- examples/command.wasm/emscripten.cpp +1 -1
- examples/command/command.cpp +7 -1
- examples/lsp/lsp.cpp +6 -1
- examples/main/main.cpp +7 -1
- examples/stream.wasm/emscripten.cpp +1 -1
- examples/stream/stream.cpp +29 -23
- examples/talk-llama/talk-llama.cpp +51 -41
- examples/talk.wasm/emscripten.cpp +1 -1
- examples/talk/talk.cpp +6 -1
- examples/whisper.android/app/src/main/jni/whisper/jni.c +2 -2
- examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj +2 -2
- examples/whisper.objc/whisper.objc/ViewController.m +7 -1
- examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift +6 -1
- examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj +12 -2
- examples/whisper.wasm/emscripten.cpp +1 -1
- whisper.cpp +111 -57
- whisper.h +47 -7
|
@@ -103,7 +103,7 @@ var (
|
|
| 103 |
func Whisper_init(path string) *Context {
|
| 104 |
cPath := C.CString(path)
|
| 105 |
defer C.free(unsafe.Pointer(cPath))
|
| 106 |
-
if ctx := C.
|
| 107 |
return (*Context)(ctx)
|
| 108 |
} else {
|
| 109 |
return nil
|
|
|
|
| 103 |
func Whisper_init(path string) *Context {
|
| 104 |
cPath := C.CString(path)
|
| 105 |
defer C.free(unsafe.Pointer(cPath))
|
| 106 |
+
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
|
| 107 |
return (*Context)(ctx)
|
| 108 |
} else {
|
| 109 |
return nil
|
|
@@ -4,6 +4,7 @@ import com.sun.jna.Structure;
|
|
| 4 |
import com.sun.jna.ptr.PointerByReference;
|
| 5 |
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
| 6 |
import io.github.ggerganov.whispercpp.WhisperModel;
|
|
|
|
| 7 |
|
| 8 |
import java.util.List;
|
| 9 |
|
|
@@ -23,8 +24,9 @@ public class WhisperContext extends Structure {
|
|
| 23 |
public PointerByReference vocab;
|
| 24 |
public PointerByReference state;
|
| 25 |
|
| 26 |
-
/** populated by
|
| 27 |
String path_model;
|
|
|
|
| 28 |
|
| 29 |
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
| 30 |
// }
|
|
|
|
| 4 |
import com.sun.jna.ptr.PointerByReference;
|
| 5 |
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
| 6 |
import io.github.ggerganov.whispercpp.WhisperModel;
|
| 7 |
+
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
| 8 |
|
| 9 |
import java.util.List;
|
| 10 |
|
|
|
|
| 24 |
public PointerByReference vocab;
|
| 25 |
public PointerByReference state;
|
| 26 |
|
| 27 |
+
/** populated by whisper_init_from_file_with_params() */
|
| 28 |
String path_model;
|
| 29 |
+
WhisperContextParams params;
|
| 30 |
|
| 31 |
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
| 32 |
// }
|
|
@@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp;
|
|
| 2 |
|
| 3 |
import com.sun.jna.Native;
|
| 4 |
import com.sun.jna.Pointer;
|
|
|
|
| 5 |
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
| 6 |
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
| 7 |
|
|
@@ -15,8 +16,9 @@ import java.io.IOException;
|
|
| 15 |
public class WhisperCpp implements AutoCloseable {
|
| 16 |
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
|
| 17 |
private Pointer ctx = null;
|
| 18 |
-
private Pointer
|
| 19 |
-
private Pointer
|
|
|
|
| 20 |
|
| 21 |
public File modelDir() {
|
| 22 |
String modelDirPath = System.getenv("XDG_CACHE_HOME");
|
|
@@ -31,6 +33,18 @@ public class WhisperCpp implements AutoCloseable {
|
|
| 31 |
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
| 32 |
*/
|
| 33 |
public void initContext(String modelPath) throws FileNotFoundException {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
if (ctx != null) {
|
| 35 |
lib.whisper_free(ctx);
|
| 36 |
}
|
|
@@ -43,13 +57,26 @@ public class WhisperCpp implements AutoCloseable {
|
|
| 43 |
modelPath = new File(modelDir(), modelPath).getAbsolutePath();
|
| 44 |
}
|
| 45 |
|
| 46 |
-
ctx = lib.
|
| 47 |
|
| 48 |
if (ctx == null) {
|
| 49 |
throw new FileNotFoundException(modelPath);
|
| 50 |
}
|
| 51 |
}
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
/**
|
| 54 |
* Provides default params which can be used with `whisper_full()` etc.
|
| 55 |
* Because this function allocates memory for the params, the caller must call either:
|
|
@@ -63,15 +90,15 @@ public class WhisperCpp implements AutoCloseable {
|
|
| 63 |
|
| 64 |
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
| 65 |
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
|
| 66 |
-
if (
|
| 67 |
-
|
| 68 |
}
|
| 69 |
-
pointer =
|
| 70 |
} else {
|
| 71 |
-
if (
|
| 72 |
-
|
| 73 |
}
|
| 74 |
-
pointer =
|
| 75 |
}
|
| 76 |
|
| 77 |
WhisperFullParams params = new WhisperFullParams(pointer);
|
|
@@ -93,13 +120,17 @@ public class WhisperCpp implements AutoCloseable {
|
|
| 93 |
}
|
| 94 |
|
| 95 |
private void freeParams() {
|
| 96 |
-
if (
|
| 97 |
-
Native.free(Pointer.nativeValue(
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
}
|
| 100 |
-
if (
|
| 101 |
-
Native.free(Pointer.nativeValue(
|
| 102 |
-
|
| 103 |
}
|
| 104 |
}
|
| 105 |
|
|
|
|
| 2 |
|
| 3 |
import com.sun.jna.Native;
|
| 4 |
import com.sun.jna.Pointer;
|
| 5 |
+
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
| 6 |
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
| 7 |
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
| 8 |
|
|
|
|
| 16 |
public class WhisperCpp implements AutoCloseable {
|
| 17 |
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
|
| 18 |
private Pointer ctx = null;
|
| 19 |
+
private Pointer paramsPointer = null;
|
| 20 |
+
private Pointer greedyParamsPointer = null;
|
| 21 |
+
private Pointer beamParamsPointer = null;
|
| 22 |
|
| 23 |
public File modelDir() {
|
| 24 |
String modelDirPath = System.getenv("XDG_CACHE_HOME");
|
|
|
|
| 33 |
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
| 34 |
*/
|
| 35 |
public void initContext(String modelPath) throws FileNotFoundException {
|
| 36 |
+
initContextImpl(modelPath, getContextDefaultParams());
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
/**
|
| 40 |
+
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
| 41 |
+
* @param params - params to use when initialising the context
|
| 42 |
+
*/
|
| 43 |
+
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
| 44 |
+
initContextImpl(modelPath, params);
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
| 48 |
if (ctx != null) {
|
| 49 |
lib.whisper_free(ctx);
|
| 50 |
}
|
|
|
|
| 57 |
modelPath = new File(modelDir(), modelPath).getAbsolutePath();
|
| 58 |
}
|
| 59 |
|
| 60 |
+
ctx = lib.whisper_init_from_file_with_params(modelPath, params);
|
| 61 |
|
| 62 |
if (ctx == null) {
|
| 63 |
throw new FileNotFoundException(modelPath);
|
| 64 |
}
|
| 65 |
}
|
| 66 |
|
| 67 |
+
/**
|
| 68 |
+
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
| 69 |
+
* Because this function allocates memory for the params, the caller must call either:
|
| 70 |
+
* - call `whisper_free_context_params()`
|
| 71 |
+
* - `Native.free(Pointer.nativeValue(pointer));`
|
| 72 |
+
*/
|
| 73 |
+
public WhisperContextParams getContextDefaultParams() {
|
| 74 |
+
paramsPointer = lib.whisper_context_default_params_by_ref();
|
| 75 |
+
WhisperContextParams params = new WhisperContextParams(paramsPointer);
|
| 76 |
+
params.read();
|
| 77 |
+
return params;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
/**
|
| 81 |
* Provides default params which can be used with `whisper_full()` etc.
|
| 82 |
* Because this function allocates memory for the params, the caller must call either:
|
|
|
|
| 90 |
|
| 91 |
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
| 92 |
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
|
| 93 |
+
if (greedyParamsPointer == null) {
|
| 94 |
+
greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
| 95 |
}
|
| 96 |
+
pointer = greedyParamsPointer;
|
| 97 |
} else {
|
| 98 |
+
if (beamParamsPointer == null) {
|
| 99 |
+
beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
| 100 |
}
|
| 101 |
+
pointer = beamParamsPointer;
|
| 102 |
}
|
| 103 |
|
| 104 |
WhisperFullParams params = new WhisperFullParams(pointer);
|
|
|
|
| 120 |
}
|
| 121 |
|
| 122 |
private void freeParams() {
|
| 123 |
+
if (paramsPointer != null) {
|
| 124 |
+
Native.free(Pointer.nativeValue(paramsPointer));
|
| 125 |
+
paramsPointer = null;
|
| 126 |
+
}
|
| 127 |
+
if (greedyParamsPointer != null) {
|
| 128 |
+
Native.free(Pointer.nativeValue(greedyParamsPointer));
|
| 129 |
+
greedyParamsPointer = null;
|
| 130 |
}
|
| 131 |
+
if (beamParamsPointer != null) {
|
| 132 |
+
Native.free(Pointer.nativeValue(beamParamsPointer));
|
| 133 |
+
beamParamsPointer = null;
|
| 134 |
}
|
| 135 |
}
|
| 136 |
|
|
@@ -5,6 +5,7 @@ import com.sun.jna.Native;
|
|
| 5 |
import com.sun.jna.Pointer;
|
| 6 |
import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
|
| 7 |
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
|
|
|
| 8 |
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
| 9 |
|
| 10 |
public interface WhisperCppJnaLibrary extends Library {
|
|
@@ -13,12 +14,31 @@ public interface WhisperCppJnaLibrary extends Library {
|
|
| 13 |
String whisper_print_system_info();
|
| 14 |
|
| 15 |
/**
|
| 16 |
-
* Allocate (almost) all memory needed for the model by loading from a file.
|
| 17 |
*
|
| 18 |
* @param path_model Path to the model file
|
| 19 |
* @return Whisper context on success, null on failure
|
| 20 |
*/
|
| 21 |
Pointer whisper_init_from_file(String path_model);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
/**
|
| 24 |
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
|
|
|
| 5 |
import com.sun.jna.Pointer;
|
| 6 |
import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
|
| 7 |
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
| 8 |
+
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
| 9 |
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
| 10 |
|
| 11 |
public interface WhisperCppJnaLibrary extends Library {
|
|
|
|
| 14 |
String whisper_print_system_info();
|
| 15 |
|
| 16 |
/**
|
| 17 |
+
* DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
|
| 18 |
*
|
| 19 |
* @param path_model Path to the model file
|
| 20 |
* @return Whisper context on success, null on failure
|
| 21 |
*/
|
| 22 |
Pointer whisper_init_from_file(String path_model);
|
| 23 |
+
|
| 24 |
+
/**
|
| 25 |
+
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
| 26 |
+
* Because this function allocates memory for the params, the caller must call either:
|
| 27 |
+
* - call `whisper_free_context_params()`
|
| 28 |
+
* - `Native.free(Pointer.nativeValue(pointer));`
|
| 29 |
+
*/
|
| 30 |
+
Pointer whisper_context_default_params_by_ref();
|
| 31 |
+
|
| 32 |
+
void whisper_free_context_params(Pointer params);
|
| 33 |
+
|
| 34 |
+
/**
|
| 35 |
+
* Allocate (almost) all memory needed for the model by loading from a file.
|
| 36 |
+
*
|
| 37 |
+
* @param path_model Path to the model file
|
| 38 |
+
* @param params Pointer to whisper_context_params
|
| 39 |
+
* @return Whisper context on success, null on failure
|
| 40 |
+
*/
|
| 41 |
+
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
|
| 42 |
|
| 43 |
/**
|
| 44 |
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
package io.github.ggerganov.whispercpp.params;
|
| 2 |
+
|
| 3 |
+
import com.sun.jna.*;
|
| 4 |
+
|
| 5 |
+
import java.util.Arrays;
|
| 6 |
+
import java.util.List;
|
| 7 |
+
|
| 8 |
+
/**
|
| 9 |
+
* Parameters for the whisper_init_from_file_with_params() function.
|
| 10 |
+
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
| 11 |
+
* whisper_context_default_params()
|
| 12 |
+
*/
|
| 13 |
+
public class WhisperContextParams extends Structure {
|
| 14 |
+
|
| 15 |
+
public WhisperContextParams(Pointer p) {
|
| 16 |
+
super(p);
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/** Use GPU for inference Number (default = true) */
|
| 20 |
+
public CBool use_gpu;
|
| 21 |
+
|
| 22 |
+
/** Use GPU for inference Number (default = true) */
|
| 23 |
+
public void useGpu(boolean enable) {
|
| 24 |
+
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
@Override
|
| 28 |
+
protected List<String> getFieldOrder() {
|
| 29 |
+
return Arrays.asList("use_gpu");
|
| 30 |
+
}
|
| 31 |
+
}
|
|
@@ -20,7 +20,7 @@ struct whisper_context * g_context;
|
|
| 20 |
EMSCRIPTEN_BINDINGS(whisper) {
|
| 21 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 22 |
if (g_context == nullptr) {
|
| 23 |
-
g_context =
|
| 24 |
if (g_context != nullptr) {
|
| 25 |
return true;
|
| 26 |
} else {
|
|
|
|
| 20 |
EMSCRIPTEN_BINDINGS(whisper) {
|
| 21 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 22 |
if (g_context == nullptr) {
|
| 23 |
+
g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
| 24 |
if (g_context != nullptr) {
|
| 25 |
return true;
|
| 26 |
} else {
|
|
@@ -87,7 +87,7 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
|
| 87 |
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
|
| 88 |
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
| 89 |
}
|
| 90 |
-
rw->context =
|
| 91 |
if (rw->context == nullptr) {
|
| 92 |
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
| 93 |
}
|
|
|
|
| 87 |
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
|
| 88 |
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
| 89 |
}
|
| 90 |
+
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
| 91 |
if (rw->context == nullptr) {
|
| 92 |
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
| 93 |
}
|
|
@@ -11,6 +11,7 @@ const whisperParamsMock = {
|
|
| 11 |
language: "en",
|
| 12 |
model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
|
| 13 |
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
|
|
|
|
| 14 |
};
|
| 15 |
|
| 16 |
describe("Run whisper.node", () => {
|
|
|
|
| 11 |
language: "en",
|
| 12 |
model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
|
| 13 |
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
|
| 14 |
+
use_gpu: true,
|
| 15 |
};
|
| 16 |
|
| 17 |
describe("Run whisper.node", () => {
|
|
@@ -36,6 +36,7 @@ struct whisper_params {
|
|
| 36 |
bool print_colors = false;
|
| 37 |
bool print_progress = false;
|
| 38 |
bool no_timestamps = false;
|
|
|
|
| 39 |
|
| 40 |
std::string language = "en";
|
| 41 |
std::string prompt;
|
|
@@ -153,7 +154,9 @@ int run(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
|
| 153 |
|
| 154 |
// whisper init
|
| 155 |
|
| 156 |
-
struct
|
|
|
|
|
|
|
| 157 |
|
| 158 |
if (ctx == nullptr) {
|
| 159 |
fprintf(stderr, "error: failed to initialize whisper context\n");
|
|
@@ -315,10 +318,12 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
|
|
| 315 |
std::string language = whisper_params.Get("language").As<Napi::String>();
|
| 316 |
std::string model = whisper_params.Get("model").As<Napi::String>();
|
| 317 |
std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
|
|
|
|
| 318 |
|
| 319 |
params.language = language;
|
| 320 |
params.model = model;
|
| 321 |
params.fname_inp.emplace_back(input);
|
|
|
|
| 322 |
|
| 323 |
Napi::Function callback = info[1].As<Napi::Function>();
|
| 324 |
Worker* worker = new Worker(callback, params);
|
|
|
|
| 36 |
bool print_colors = false;
|
| 37 |
bool print_progress = false;
|
| 38 |
bool no_timestamps = false;
|
| 39 |
+
bool use_gpu = true;
|
| 40 |
|
| 41 |
std::string language = "en";
|
| 42 |
std::string prompt;
|
|
|
|
| 154 |
|
| 155 |
// whisper init
|
| 156 |
|
| 157 |
+
struct whisper_context_params cparams;
|
| 158 |
+
cparams.use_gpu = params.use_gpu;
|
| 159 |
+
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
| 160 |
|
| 161 |
if (ctx == nullptr) {
|
| 162 |
fprintf(stderr, "error: failed to initialize whisper context\n");
|
|
|
|
| 318 |
std::string language = whisper_params.Get("language").As<Napi::String>();
|
| 319 |
std::string model = whisper_params.Get("model").As<Napi::String>();
|
| 320 |
std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
|
| 321 |
+
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
|
| 322 |
|
| 323 |
params.language = language;
|
| 324 |
params.model = model;
|
| 325 |
params.fname_inp.emplace_back(input);
|
| 326 |
+
params.use_gpu = use_gpu;
|
| 327 |
|
| 328 |
Napi::Function callback = info[1].As<Napi::Function>();
|
| 329 |
Worker* worker = new Worker(callback, params);
|
|
@@ -11,6 +11,7 @@ const whisperParams = {
|
|
| 11 |
language: "en",
|
| 12 |
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
|
| 13 |
fname_inp: "../../samples/jfk.wav",
|
|
|
|
| 14 |
};
|
| 15 |
|
| 16 |
const arguments = process.argv.slice(2);
|
|
|
|
| 11 |
language: "en",
|
| 12 |
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
|
| 13 |
fname_inp: "../../samples/jfk.wav",
|
| 14 |
+
use_gpu: true,
|
| 15 |
};
|
| 16 |
|
| 17 |
const arguments = process.argv.slice(2);
|
|
@@ -57,7 +57,7 @@ EMSCRIPTEN_BINDINGS(bench) {
|
|
| 57 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 58 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 59 |
if (g_contexts[i] == nullptr) {
|
| 60 |
-
g_contexts[i] =
|
| 61 |
if (g_contexts[i] != nullptr) {
|
| 62 |
if (g_worker.joinable()) {
|
| 63 |
g_worker.join();
|
|
|
|
| 57 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 58 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 59 |
if (g_contexts[i] == nullptr) {
|
| 60 |
+
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
| 61 |
if (g_contexts[i] != nullptr) {
|
| 62 |
if (g_worker.joinable()) {
|
| 63 |
g_worker.join();
|
|
@@ -11,6 +11,8 @@ struct whisper_params {
|
|
| 11 |
int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
|
| 12 |
|
| 13 |
std::string model = "models/ggml-base.en.bin";
|
|
|
|
|
|
|
| 14 |
};
|
| 15 |
|
| 16 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
@@ -23,9 +25,10 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 23 |
whisper_print_usage(argc, argv, params);
|
| 24 |
exit(0);
|
| 25 |
}
|
| 26 |
-
else if (arg == "-t"
|
| 27 |
-
else if (arg == "-m"
|
| 28 |
-
else if (arg == "-w"
|
|
|
|
| 29 |
else {
|
| 30 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 31 |
whisper_print_usage(argc, argv, params);
|
|
@@ -45,6 +48,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 45 |
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
| 46 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 47 |
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
|
|
|
|
| 48 |
fprintf(stderr, " %-7s 0 - whisper\n", "");
|
| 49 |
fprintf(stderr, " %-7s 1 - memcpy\n", "");
|
| 50 |
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
|
|
@@ -54,7 +58,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 54 |
int whisper_bench_full(const whisper_params & params) {
|
| 55 |
// whisper init
|
| 56 |
|
| 57 |
-
struct
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
{
|
| 60 |
fprintf(stderr, "\n");
|
|
|
|
| 11 |
int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
|
| 12 |
|
| 13 |
std::string model = "models/ggml-base.en.bin";
|
| 14 |
+
|
| 15 |
+
bool use_gpu = true;
|
| 16 |
};
|
| 17 |
|
| 18 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
|
|
| 25 |
whisper_print_usage(argc, argv, params);
|
| 26 |
exit(0);
|
| 27 |
}
|
| 28 |
+
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
| 29 |
+
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 30 |
+
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
|
| 31 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 32 |
else {
|
| 33 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 34 |
whisper_print_usage(argc, argv, params);
|
|
|
|
| 48 |
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
| 49 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 50 |
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
|
| 51 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
| 52 |
fprintf(stderr, " %-7s 0 - whisper\n", "");
|
| 53 |
fprintf(stderr, " %-7s 1 - memcpy\n", "");
|
| 54 |
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
|
|
|
|
| 58 |
int whisper_bench_full(const whisper_params & params) {
|
| 59 |
// whisper init
|
| 60 |
|
| 61 |
+
struct whisper_context_params cparams;
|
| 62 |
+
cparams.use_gpu = params.use_gpu;
|
| 63 |
+
|
| 64 |
+
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
| 65 |
|
| 66 |
{
|
| 67 |
fprintf(stderr, "\n");
|
|
@@ -243,7 +243,7 @@ EMSCRIPTEN_BINDINGS(command) {
|
|
| 243 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 244 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 245 |
if (g_contexts[i] == nullptr) {
|
| 246 |
-
g_contexts[i] =
|
| 247 |
if (g_contexts[i] != nullptr) {
|
| 248 |
g_running = true;
|
| 249 |
if (g_worker.joinable()) {
|
|
|
|
| 243 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 244 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 245 |
if (g_contexts[i] == nullptr) {
|
| 246 |
+
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
| 247 |
if (g_contexts[i] != nullptr) {
|
| 248 |
g_running = true;
|
| 249 |
if (g_worker.joinable()) {
|
|
@@ -38,6 +38,7 @@ struct whisper_params {
|
|
| 38 |
bool print_special = false;
|
| 39 |
bool print_energy = false;
|
| 40 |
bool no_timestamps = true;
|
|
|
|
| 41 |
|
| 42 |
std::string language = "en";
|
| 43 |
std::string model = "models/ggml-base.en.bin";
|
|
@@ -68,6 +69,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 68 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 69 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 70 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
|
|
|
| 71 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 72 |
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 73 |
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
|
@@ -101,6 +103,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 101 |
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 102 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 103 |
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
|
|
|
| 104 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 105 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 106 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
|
@@ -610,7 +613,10 @@ int main(int argc, char ** argv) {
|
|
| 610 |
|
| 611 |
// whisper init
|
| 612 |
|
| 613 |
-
struct
|
|
|
|
|
|
|
|
|
|
| 614 |
|
| 615 |
// print some info about the processing
|
| 616 |
{
|
|
|
|
| 38 |
bool print_special = false;
|
| 39 |
bool print_energy = false;
|
| 40 |
bool no_timestamps = true;
|
| 41 |
+
bool use_gpu = true;
|
| 42 |
|
| 43 |
std::string language = "en";
|
| 44 |
std::string model = "models/ggml-base.en.bin";
|
|
|
|
| 69 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 70 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 71 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
| 72 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 73 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 74 |
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 75 |
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
|
|
|
| 103 |
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 104 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 105 |
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
| 106 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
| 107 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 108 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 109 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
|
|
|
| 613 |
|
| 614 |
// whisper init
|
| 615 |
|
| 616 |
+
struct whisper_context_params cparams;
|
| 617 |
+
cparams.use_gpu = params.use_gpu;
|
| 618 |
+
|
| 619 |
+
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
| 620 |
|
| 621 |
// print some info about the processing
|
| 622 |
{
|
|
@@ -30,6 +30,7 @@ struct whisper_params {
|
|
| 30 |
bool translate = false;
|
| 31 |
bool print_special = false;
|
| 32 |
bool print_energy = false;
|
|
|
|
| 33 |
|
| 34 |
std::string language = "en";
|
| 35 |
std::string model = "models/ggml-base.en.bin";
|
|
@@ -72,6 +73,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 72 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 73 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 74 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
|
|
|
| 75 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 76 |
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 77 |
else {
|
|
@@ -102,6 +104,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 102 |
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 103 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 104 |
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
|
|
|
| 105 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 106 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 107 |
fprintf(stderr, "\n");
|
|
@@ -432,7 +435,9 @@ int main(int argc, char ** argv) {
|
|
| 432 |
}
|
| 433 |
|
| 434 |
// whisper init
|
| 435 |
-
struct
|
|
|
|
|
|
|
| 436 |
// init audio
|
| 437 |
|
| 438 |
audio_async audio(30*1000);
|
|
|
|
| 30 |
bool translate = false;
|
| 31 |
bool print_special = false;
|
| 32 |
bool print_energy = false;
|
| 33 |
+
bool use_gpu = true;
|
| 34 |
|
| 35 |
std::string language = "en";
|
| 36 |
std::string model = "models/ggml-base.en.bin";
|
|
|
|
| 73 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 74 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 75 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
| 76 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 77 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 78 |
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 79 |
else {
|
|
|
|
| 104 |
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 105 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 106 |
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
| 107 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
| 108 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 109 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 110 |
fprintf(stderr, "\n");
|
|
|
|
| 435 |
}
|
| 436 |
|
| 437 |
// whisper init
|
| 438 |
+
struct whisper_context_params cparams;
|
| 439 |
+
cparams.use_gpu = params.use_gpu;
|
| 440 |
+
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
| 441 |
// init audio
|
| 442 |
|
| 443 |
audio_async audio(30*1000);
|
|
@@ -90,6 +90,7 @@ struct whisper_params {
|
|
| 90 |
bool print_progress = false;
|
| 91 |
bool no_timestamps = false;
|
| 92 |
bool log_score = false;
|
|
|
|
| 93 |
|
| 94 |
std::string language = "en";
|
| 95 |
std::string prompt;
|
|
@@ -165,6 +166,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 165 |
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
|
| 166 |
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
| 167 |
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
|
|
|
|
| 168 |
else {
|
| 169 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 170 |
whisper_print_usage(argc, argv, params);
|
|
@@ -221,6 +223,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 221 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
|
| 222 |
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
| 223 |
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
|
|
|
|
| 224 |
fprintf(stderr, "\n");
|
| 225 |
}
|
| 226 |
|
|
@@ -877,7 +880,10 @@ int main(int argc, char ** argv) {
|
|
| 877 |
|
| 878 |
// whisper init
|
| 879 |
|
| 880 |
-
struct
|
|
|
|
|
|
|
|
|
|
| 881 |
|
| 882 |
if (ctx == nullptr) {
|
| 883 |
fprintf(stderr, "error: failed to initialize whisper context\n");
|
|
|
|
| 90 |
bool print_progress = false;
|
| 91 |
bool no_timestamps = false;
|
| 92 |
bool log_score = false;
|
| 93 |
+
bool use_gpu = true;
|
| 94 |
|
| 95 |
std::string language = "en";
|
| 96 |
std::string prompt;
|
|
|
|
| 166 |
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
|
| 167 |
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
| 168 |
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
|
| 169 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 170 |
else {
|
| 171 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 172 |
whisper_print_usage(argc, argv, params);
|
|
|
|
| 223 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
|
| 224 |
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
| 225 |
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
|
| 226 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
| 227 |
fprintf(stderr, "\n");
|
| 228 |
}
|
| 229 |
|
|
|
|
| 880 |
|
| 881 |
// whisper init
|
| 882 |
|
| 883 |
+
struct whisper_context_params cparams;
|
| 884 |
+
cparams.use_gpu = params.use_gpu;
|
| 885 |
+
|
| 886 |
+
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
| 887 |
|
| 888 |
if (ctx == nullptr) {
|
| 889 |
fprintf(stderr, "error: failed to initialize whisper context\n");
|
|
@@ -132,7 +132,7 @@ EMSCRIPTEN_BINDINGS(stream) {
|
|
| 132 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 133 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 134 |
if (g_contexts[i] == nullptr) {
|
| 135 |
-
g_contexts[i] =
|
| 136 |
if (g_contexts[i] != nullptr) {
|
| 137 |
g_running = true;
|
| 138 |
if (g_worker.joinable()) {
|
|
|
|
| 132 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 133 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 134 |
if (g_contexts[i] == nullptr) {
|
| 135 |
+
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
| 136 |
if (g_contexts[i] != nullptr) {
|
| 137 |
g_running = true;
|
| 138 |
if (g_worker.joinable()) {
|
|
@@ -48,11 +48,12 @@ struct whisper_params {
|
|
| 48 |
bool no_context = true;
|
| 49 |
bool no_timestamps = false;
|
| 50 |
bool tinydiarize = false;
|
|
|
|
|
|
|
| 51 |
|
| 52 |
std::string language = "en";
|
| 53 |
std::string model = "models/ggml-base.en.bin";
|
| 54 |
std::string fname_out;
|
| 55 |
-
bool save_audio = false; // save audio to wav file
|
| 56 |
};
|
| 57 |
|
| 58 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
@@ -65,25 +66,26 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 65 |
whisper_print_usage(argc, argv, params);
|
| 66 |
exit(0);
|
| 67 |
}
|
| 68 |
-
else if (arg == "-t"
|
| 69 |
-
else if (
|
| 70 |
-
else if (
|
| 71 |
-
else if (
|
| 72 |
-
else if (arg == "-c"
|
| 73 |
-
else if (arg == "-mt"
|
| 74 |
-
else if (arg == "-ac"
|
| 75 |
-
else if (arg == "-vth"
|
| 76 |
-
else if (arg == "-fth"
|
| 77 |
-
else if (arg == "-su"
|
| 78 |
-
else if (arg == "-tr"
|
| 79 |
-
else if (arg == "-nf"
|
| 80 |
-
else if (arg == "-ps"
|
| 81 |
-
else if (arg == "-kc"
|
| 82 |
-
else if (arg == "-l"
|
| 83 |
-
else if (arg == "-m"
|
| 84 |
-
else if (arg == "-f"
|
| 85 |
-
else if (arg == "-tdrz" || arg == "--tinydiarize")
|
| 86 |
-
else if (arg == "-sa"
|
|
|
|
| 87 |
|
| 88 |
else {
|
| 89 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
|
@@ -118,8 +120,9 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 118 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 119 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 120 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
| 121 |
-
fprintf(stderr, " -tdrz,
|
| 122 |
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
|
|
|
|
| 123 |
fprintf(stderr, "\n");
|
| 124 |
}
|
| 125 |
|
|
@@ -163,7 +166,10 @@ int main(int argc, char ** argv) {
|
|
| 163 |
exit(0);
|
| 164 |
}
|
| 165 |
|
| 166 |
-
struct
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
std::vector<float> pcmf32 (n_samples_30s, 0.0f);
|
| 169 |
std::vector<float> pcmf32_old;
|
|
@@ -424,4 +430,4 @@ int main(int argc, char ** argv) {
|
|
| 424 |
whisper_free(ctx);
|
| 425 |
|
| 426 |
return 0;
|
| 427 |
-
}
|
|
|
|
| 48 |
bool no_context = true;
|
| 49 |
bool no_timestamps = false;
|
| 50 |
bool tinydiarize = false;
|
| 51 |
+
bool save_audio = false; // save audio to wav file
|
| 52 |
+
bool use_gpu = true;
|
| 53 |
|
| 54 |
std::string language = "en";
|
| 55 |
std::string model = "models/ggml-base.en.bin";
|
| 56 |
std::string fname_out;
|
|
|
|
| 57 |
};
|
| 58 |
|
| 59 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
|
|
| 66 |
whisper_print_usage(argc, argv, params);
|
| 67 |
exit(0);
|
| 68 |
}
|
| 69 |
+
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
| 70 |
+
else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); }
|
| 71 |
+
else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); }
|
| 72 |
+
else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); }
|
| 73 |
+
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
| 74 |
+
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
| 75 |
+
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
| 76 |
+
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
| 77 |
+
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
| 78 |
+
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
| 79 |
+
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 80 |
+
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
|
| 81 |
+
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 82 |
+
else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
|
| 83 |
+
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 84 |
+
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 85 |
+
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
| 86 |
+
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
|
| 87 |
+
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
|
| 88 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 89 |
|
| 90 |
else {
|
| 91 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
|
|
|
| 120 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 121 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 122 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
| 123 |
+
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
|
| 124 |
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
|
| 125 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
|
| 126 |
fprintf(stderr, "\n");
|
| 127 |
}
|
| 128 |
|
|
|
|
| 166 |
exit(0);
|
| 167 |
}
|
| 168 |
|
| 169 |
+
struct whisper_context_params cparams;
|
| 170 |
+
cparams.use_gpu = params.use_gpu;
|
| 171 |
+
|
| 172 |
+
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
| 173 |
|
| 174 |
std::vector<float> pcmf32 (n_samples_30s, 0.0f);
|
| 175 |
std::vector<float> pcmf32_old;
|
|
|
|
| 430 |
whisper_free(ctx);
|
| 431 |
|
| 432 |
return 0;
|
| 433 |
+
}
|
|
@@ -63,6 +63,7 @@ struct whisper_params {
|
|
| 63 |
bool print_energy = false;
|
| 64 |
bool no_timestamps = true;
|
| 65 |
bool verbose_prompt = false;
|
|
|
|
| 66 |
|
| 67 |
std::string person = "Georgi";
|
| 68 |
std::string language = "en";
|
|
@@ -84,25 +85,26 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 84 |
whisper_print_usage(argc, argv, params);
|
| 85 |
exit(0);
|
| 86 |
}
|
| 87 |
-
else if (arg == "-t" || arg == "--threads")
|
| 88 |
-
else if (arg == "-vms" || arg == "--voice-ms")
|
| 89 |
-
else if (arg == "-c" || arg == "--capture")
|
| 90 |
-
else if (arg == "-mt" || arg == "--max-tokens")
|
| 91 |
-
else if (arg == "-ac" || arg == "--audio-ctx")
|
| 92 |
-
else if (arg == "-vth" || arg == "--vad-thold")
|
| 93 |
-
else if (arg == "-fth" || arg == "--freq-thold")
|
| 94 |
-
else if (arg == "-su" || arg == "--speed-up")
|
| 95 |
-
else if (arg == "-tr" || arg == "--translate")
|
| 96 |
-
else if (arg == "-ps" || arg == "--print-special")
|
| 97 |
-
else if (arg == "-pe" || arg == "--print-energy")
|
| 98 |
-
else if (arg == "--verbose-prompt")
|
| 99 |
-
else if (arg == "-
|
| 100 |
-
else if (arg == "--
|
| 101 |
-
else if (arg == "
|
| 102 |
-
else if (arg == "-
|
| 103 |
-
else if (arg == "-
|
| 104 |
-
else if (arg == "-
|
| 105 |
-
else if (arg == "
|
|
|
|
| 106 |
std::ifstream file(argv[++i]);
|
| 107 |
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
| 108 |
if (params.prompt.back() == '\n') {
|
|
@@ -110,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 110 |
}
|
| 111 |
}
|
| 112 |
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
|
|
|
| 113 |
else {
|
| 114 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 115 |
whisper_print_usage(argc, argv, params);
|
|
@@ -125,27 +128,28 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 125 |
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
| 126 |
fprintf(stderr, "\n");
|
| 127 |
fprintf(stderr, "options:\n");
|
| 128 |
-
fprintf(stderr, " -h, --help
|
| 129 |
-
fprintf(stderr, " -t N, --threads N
|
| 130 |
-
fprintf(stderr, " -vms N, --voice-ms N
|
| 131 |
-
fprintf(stderr, " -c ID, --capture ID
|
| 132 |
-
fprintf(stderr, " -mt N, --max-tokens N
|
| 133 |
-
fprintf(stderr, " -ac N, --audio-ctx N
|
| 134 |
-
fprintf(stderr, " -vth N, --vad-thold N
|
| 135 |
-
fprintf(stderr, " -fth N, --freq-thold N
|
| 136 |
-
fprintf(stderr, " -su, --speed-up
|
| 137 |
-
fprintf(stderr, " -tr, --translate
|
| 138 |
-
fprintf(stderr, " -ps, --print-special
|
| 139 |
-
fprintf(stderr, " -pe, --print-energy
|
| 140 |
-
fprintf(stderr, " -
|
| 141 |
-
fprintf(stderr, " -
|
| 142 |
-
fprintf(stderr, " -
|
| 143 |
-
fprintf(stderr, " -
|
| 144 |
-
fprintf(stderr, " -
|
| 145 |
-
fprintf(stderr, " --
|
| 146 |
-
fprintf(stderr, " --
|
| 147 |
-
fprintf(stderr, " --
|
| 148 |
-
fprintf(stderr, "
|
|
|
|
| 149 |
fprintf(stderr, "\n");
|
| 150 |
}
|
| 151 |
|
|
@@ -252,7 +256,10 @@ int main(int argc, char ** argv) {
|
|
| 252 |
|
| 253 |
// whisper init
|
| 254 |
|
| 255 |
-
struct
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
// llama init
|
| 258 |
|
|
@@ -269,6 +276,9 @@ int main(int argc, char ** argv) {
|
|
| 269 |
lcparams.seed = 1;
|
| 270 |
lcparams.f16_kv = true;
|
| 271 |
lcparams.n_threads = params.n_threads;
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
|
| 274 |
|
|
|
|
| 63 |
bool print_energy = false;
|
| 64 |
bool no_timestamps = true;
|
| 65 |
bool verbose_prompt = false;
|
| 66 |
+
bool use_gpu = true;
|
| 67 |
|
| 68 |
std::string person = "Georgi";
|
| 69 |
std::string language = "en";
|
|
|
|
| 85 |
whisper_print_usage(argc, argv, params);
|
| 86 |
exit(0);
|
| 87 |
}
|
| 88 |
+
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
| 89 |
+
else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); }
|
| 90 |
+
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
| 91 |
+
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
| 92 |
+
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
| 93 |
+
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
| 94 |
+
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
| 95 |
+
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
| 96 |
+
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 97 |
+
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 98 |
+
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
| 99 |
+
else if (arg == "-vp" || arg == "--verbose-prompt") { params.verbose_prompt = true; }
|
| 100 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 101 |
+
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
| 102 |
+
else if (arg == "--session") { params.path_session = argv[++i];}
|
| 103 |
+
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 104 |
+
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
| 105 |
+
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
|
| 106 |
+
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
|
| 107 |
+
else if (arg == "--prompt-file") {
|
| 108 |
std::ifstream file(argv[++i]);
|
| 109 |
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
| 110 |
if (params.prompt.back() == '\n') {
|
|
|
|
| 112 |
}
|
| 113 |
}
|
| 114 |
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
| 115 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 116 |
else {
|
| 117 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 118 |
whisper_print_usage(argc, argv, params);
|
|
|
|
| 128 |
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
| 129 |
fprintf(stderr, "\n");
|
| 130 |
fprintf(stderr, "options:\n");
|
| 131 |
+
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
| 132 |
+
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
| 133 |
+
fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms);
|
| 134 |
+
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
|
| 135 |
+
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
|
| 136 |
+
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
|
| 137 |
+
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
|
| 138 |
+
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
|
| 139 |
+
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
| 140 |
+
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 141 |
+
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 142 |
+
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
| 143 |
+
fprintf(stderr, " -vp, --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
|
| 144 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
| 145 |
+
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
|
| 146 |
+
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 147 |
+
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
| 148 |
+
fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
|
| 149 |
+
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
|
| 150 |
+
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
|
| 151 |
+
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
|
| 152 |
+
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
| 153 |
fprintf(stderr, "\n");
|
| 154 |
}
|
| 155 |
|
|
|
|
| 256 |
|
| 257 |
// whisper init
|
| 258 |
|
| 259 |
+
struct whisper_context_params cparams;
|
| 260 |
+
cparams.use_gpu = params.use_gpu;
|
| 261 |
+
|
| 262 |
+
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
|
| 263 |
|
| 264 |
// llama init
|
| 265 |
|
|
|
|
| 276 |
lcparams.seed = 1;
|
| 277 |
lcparams.f16_kv = true;
|
| 278 |
lcparams.n_threads = params.n_threads;
|
| 279 |
+
if (!params.use_gpu) {
|
| 280 |
+
lcparams.n_gpu_layers = 0;
|
| 281 |
+
}
|
| 282 |
|
| 283 |
struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
|
| 284 |
|
|
@@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) {
|
|
| 271 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 272 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 273 |
if (g_contexts[i] == nullptr) {
|
| 274 |
-
g_contexts[i] =
|
| 275 |
if (g_contexts[i] != nullptr) {
|
| 276 |
g_running = true;
|
| 277 |
if (g_worker.joinable()) {
|
|
|
|
| 271 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 272 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 273 |
if (g_contexts[i] == nullptr) {
|
| 274 |
+
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
| 275 |
if (g_contexts[i] != nullptr) {
|
| 276 |
g_running = true;
|
| 277 |
if (g_worker.joinable()) {
|
|
@@ -31,6 +31,7 @@ struct whisper_params {
|
|
| 31 |
bool print_special = false;
|
| 32 |
bool print_energy = false;
|
| 33 |
bool no_timestamps = true;
|
|
|
|
| 34 |
|
| 35 |
std::string person = "Santa";
|
| 36 |
std::string language = "en";
|
|
@@ -61,6 +62,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 61 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 62 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 63 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
|
|
|
| 64 |
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
| 65 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 66 |
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
|
@@ -94,6 +96,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 94 |
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 95 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 96 |
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
|
|
|
| 97 |
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
|
| 98 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 99 |
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
|
@@ -181,8 +184,10 @@ int main(int argc, char ** argv) {
|
|
| 181 |
}
|
| 182 |
|
| 183 |
// whisper init
|
|
|
|
|
|
|
| 184 |
|
| 185 |
-
struct whisper_context * ctx_wsp =
|
| 186 |
|
| 187 |
// gpt init
|
| 188 |
|
|
|
|
| 31 |
bool print_special = false;
|
| 32 |
bool print_energy = false;
|
| 33 |
bool no_timestamps = true;
|
| 34 |
+
bool use_gpu = true;
|
| 35 |
|
| 36 |
std::string person = "Santa";
|
| 37 |
std::string language = "en";
|
|
|
|
| 62 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 63 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 64 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
| 65 |
+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
| 66 |
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
| 67 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 68 |
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
|
|
|
| 96 |
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
| 97 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 98 |
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
| 99 |
+
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
| 100 |
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
|
| 101 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 102 |
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
|
|
|
| 184 |
}
|
| 185 |
|
| 186 |
// whisper init
|
| 187 |
+
struct whisper_context_params cparams;
|
| 188 |
+
cparams.use_gpu = params.use_gpu;
|
| 189 |
|
| 190 |
+
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
|
| 191 |
|
| 192 |
// gpt init
|
| 193 |
|
|
@@ -127,7 +127,7 @@ static struct whisper_context *whisper_init_from_asset(
|
|
| 127 |
.close = &asset_close
|
| 128 |
};
|
| 129 |
|
| 130 |
-
return
|
| 131 |
}
|
| 132 |
|
| 133 |
JNIEXPORT jlong JNICALL
|
|
@@ -147,7 +147,7 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
|
| 147 |
UNUSED(thiz);
|
| 148 |
struct whisper_context *context = NULL;
|
| 149 |
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
|
| 150 |
-
context =
|
| 151 |
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
|
| 152 |
return (jlong) context;
|
| 153 |
}
|
|
|
|
| 127 |
.close = &asset_close
|
| 128 |
};
|
| 129 |
|
| 130 |
+
return whisper_init_with_params(&loader, whisper_context_default_params());
|
| 131 |
}
|
| 132 |
|
| 133 |
JNIEXPORT jlong JNICALL
|
|
|
|
| 147 |
UNUSED(thiz);
|
| 148 |
struct whisper_context *context = NULL;
|
| 149 |
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
|
| 150 |
+
context = whisper_init_from_file_with_params(model_path_chars, whisper_context_default_params());
|
| 151 |
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
|
| 152 |
return (jlong) context;
|
| 153 |
}
|
|
@@ -17,8 +17,8 @@
|
|
| 17 |
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
|
| 18 |
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
|
| 19 |
18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
|
| 20 |
-
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML"; }; };
|
| 21 |
-
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE"; }; };
|
| 22 |
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
| 23 |
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
|
| 24 |
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
|
|
|
| 17 |
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
|
| 18 |
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
|
| 19 |
18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
|
| 20 |
+
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
|
| 21 |
+
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
|
| 22 |
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
| 23 |
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
|
| 24 |
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
|
@@ -61,7 +61,13 @@ void AudioInputCallback(void * inUserData,
|
|
| 61 |
NSLog(@"Loading model from %@", modelPath);
|
| 62 |
|
| 63 |
// create ggml context
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
// check if the model was loaded successfully
|
| 67 |
if (stateInp.ctx == NULL) {
|
|
|
|
| 61 |
NSLog(@"Loading model from %@", modelPath);
|
| 62 |
|
| 63 |
// create ggml context
|
| 64 |
+
|
| 65 |
+
struct whisper_context_params cparams = whisper_context_default_params();
|
| 66 |
+
#if TARGET_OS_SIMULATOR
|
| 67 |
+
cparams.use_gpu = false;
|
| 68 |
+
NSLog(@"Running on simulator, using CPU");
|
| 69 |
+
#endif
|
| 70 |
+
stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);
|
| 71 |
|
| 72 |
// check if the model was loaded successfully
|
| 73 |
if (stateInp.ctx == NULL) {
|
|
@@ -55,7 +55,12 @@ actor WhisperContext {
|
|
| 55 |
}
|
| 56 |
|
| 57 |
static func createContext(path: String) throws -> WhisperContext {
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if let context {
|
| 60 |
return WhisperContext(context: context)
|
| 61 |
} else {
|
|
|
|
| 55 |
}
|
| 56 |
|
| 57 |
static func createContext(path: String) throws -> WhisperContext {
|
| 58 |
+
var params = whisper_context_default_params()
|
| 59 |
+
#if targetEnvironment(simulator)
|
| 60 |
+
params.use_gpu = false
|
| 61 |
+
print("Running on the simulator, using CPU")
|
| 62 |
+
#endif
|
| 63 |
+
let context = whisper_init_from_file_with_params(path, params)
|
| 64 |
if let context {
|
| 65 |
return WhisperContext(context: context)
|
| 66 |
} else {
|
|
@@ -16,13 +16,15 @@
|
|
| 16 |
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
|
| 17 |
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
| 18 |
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
|
| 19 |
-
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-Wno-shorten-64-to-32"; }; };
|
| 20 |
-
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"; }; };
|
| 21 |
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
| 22 |
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
| 23 |
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
|
| 24 |
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
|
| 25 |
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
|
|
|
|
|
|
|
| 26 |
/* End PBXBuildFile section */
|
| 27 |
|
| 28 |
/* Begin PBXFileReference section */
|
|
@@ -52,6 +54,9 @@
|
|
| 52 |
18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
|
| 53 |
18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
|
| 54 |
18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
|
|
|
|
|
|
|
|
|
|
| 55 |
/* End PBXFileReference section */
|
| 56 |
|
| 57 |
/* Begin PBXFrameworksBuildPhase section */
|
|
@@ -135,6 +140,9 @@
|
|
| 135 |
0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
|
| 136 |
isa = PBXGroup;
|
| 137 |
children = (
|
|
|
|
|
|
|
|
|
|
| 138 |
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
|
| 139 |
18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
|
| 140 |
18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
|
|
@@ -258,10 +266,12 @@
|
|
| 258 |
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
|
| 259 |
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
|
| 260 |
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
|
|
|
| 261 |
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
| 262 |
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
| 263 |
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
|
| 264 |
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
|
|
|
| 265 |
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
|
| 266 |
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
|
| 267 |
);
|
|
|
|
| 16 |
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
|
| 17 |
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
| 18 |
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
|
| 19 |
+
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
|
| 20 |
+
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
|
| 21 |
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
| 22 |
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
| 23 |
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
|
| 24 |
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
|
| 25 |
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
|
| 26 |
+
7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
|
| 27 |
+
7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */; };
|
| 28 |
/* End PBXBuildFile section */
|
| 29 |
|
| 30 |
/* Begin PBXFileReference section */
|
|
|
|
| 54 |
18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
|
| 55 |
18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
|
| 56 |
18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
|
| 57 |
+
7FCB081E2ACFA04400AF3530 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-metal.h"; sourceTree = "<group>"; };
|
| 58 |
+
7FCB08252ACFA3A400AF3530 /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "ggml-metal.m"; sourceTree = "<group>"; };
|
| 59 |
+
7FCB08272ACFA48500AF3530 /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = "ggml-metal.metal"; sourceTree = "<group>"; };
|
| 60 |
/* End PBXFileReference section */
|
| 61 |
|
| 62 |
/* Begin PBXFrameworksBuildPhase section */
|
|
|
|
| 140 |
0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
|
| 141 |
isa = PBXGroup;
|
| 142 |
children = (
|
| 143 |
+
7FCB08272ACFA48500AF3530 /* ggml-metal.metal */,
|
| 144 |
+
7FCB081E2ACFA04400AF3530 /* ggml-metal.h */,
|
| 145 |
+
7FCB08252ACFA3A400AF3530 /* ggml-metal.m */,
|
| 146 |
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
|
| 147 |
18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
|
| 148 |
18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
|
|
|
|
| 266 |
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
|
| 267 |
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
|
| 268 |
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
| 269 |
+
7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */,
|
| 270 |
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
| 271 |
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
| 272 |
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
|
| 273 |
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
| 274 |
+
7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */,
|
| 275 |
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
|
| 276 |
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
|
| 277 |
);
|
|
@@ -24,7 +24,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
|
|
| 24 |
|
| 25 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 26 |
if (g_contexts[i] == nullptr) {
|
| 27 |
-
g_contexts[i] =
|
| 28 |
if (g_contexts[i] != nullptr) {
|
| 29 |
return i + 1;
|
| 30 |
} else {
|
|
|
|
| 24 |
|
| 25 |
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 26 |
if (g_contexts[i] == nullptr) {
|
| 27 |
+
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
| 28 |
if (g_contexts[i] != nullptr) {
|
| 29 |
return i + 1;
|
| 30 |
} else {
|
|
@@ -736,7 +736,7 @@ struct whisper_state {
|
|
| 736 |
|
| 737 |
int lang_id = 0; // english by default
|
| 738 |
|
| 739 |
-
std::string path_model; // populated by
|
| 740 |
#ifdef WHISPER_USE_COREML
|
| 741 |
whisper_coreml_context * ctx_coreml = nullptr;
|
| 742 |
#endif
|
|
@@ -770,7 +770,8 @@ struct whisper_context {
|
|
| 770 |
whisper_vocab vocab;
|
| 771 |
whisper_state * state = nullptr;
|
| 772 |
|
| 773 |
-
std::string path_model; // populated by
|
|
|
|
| 774 |
};
|
| 775 |
|
| 776 |
static void whisper_default_log(const char * text) {
|
|
@@ -2930,59 +2931,64 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 2930 |
}
|
| 2931 |
|
| 2932 |
#ifdef GGML_USE_METAL
|
| 2933 |
-
|
| 2934 |
-
|
| 2935 |
-
|
| 2936 |
-
|
| 2937 |
-
|
|
|
|
|
|
|
| 2938 |
}
|
| 2939 |
|
| 2940 |
-
|
|
|
|
| 2941 |
|
| 2942 |
-
|
| 2943 |
|
| 2944 |
-
|
| 2945 |
-
|
| 2946 |
|
| 2947 |
-
|
| 2948 |
-
|
| 2949 |
-
|
| 2950 |
-
|
| 2951 |
-
|
| 2952 |
-
|
| 2953 |
-
|
| 2954 |
-
|
| 2955 |
|
| 2956 |
-
|
| 2957 |
-
|
| 2958 |
|
| 2959 |
-
|
| 2960 |
|
| 2961 |
-
|
| 2962 |
|
| 2963 |
#define WHISPER_METAL_CHECK_BUF(result) \
|
| 2964 |
-
|
| 2965 |
-
|
| 2966 |
-
|
| 2967 |
-
|
| 2968 |
-
|
| 2969 |
|
| 2970 |
-
|
| 2971 |
|
| 2972 |
-
|
| 2973 |
-
|
| 2974 |
-
|
| 2975 |
-
|
| 2976 |
|
| 2977 |
-
|
| 2978 |
-
|
| 2979 |
-
|
| 2980 |
-
|
| 2981 |
|
| 2982 |
-
|
| 2983 |
|
| 2984 |
-
|
| 2985 |
#undef WHISPER_METAL_CHECK_BUF
|
|
|
|
|
|
|
| 2986 |
#endif
|
| 2987 |
|
| 2988 |
state->rng = std::mt19937(0);
|
|
@@ -3039,7 +3045,14 @@ int whisper_ctx_init_openvino_encoder(
|
|
| 3039 |
#endif
|
| 3040 |
}
|
| 3041 |
|
| 3042 |
-
struct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3043 |
log("%s: loading model from '%s'\n", __func__, path_model);
|
| 3044 |
|
| 3045 |
auto fin = std::ifstream(path_model, std::ios::binary);
|
|
@@ -3068,7 +3081,7 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
|
|
| 3068 |
fin->close();
|
| 3069 |
};
|
| 3070 |
|
| 3071 |
-
auto ctx =
|
| 3072 |
|
| 3073 |
if (ctx) {
|
| 3074 |
ctx->path_model = path_model;
|
|
@@ -3077,7 +3090,7 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
|
|
| 3077 |
return ctx;
|
| 3078 |
}
|
| 3079 |
|
| 3080 |
-
struct whisper_context *
|
| 3081 |
struct buf_context {
|
| 3082 |
uint8_t* buffer;
|
| 3083 |
size_t size;
|
|
@@ -3111,13 +3124,14 @@ struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t
|
|
| 3111 |
|
| 3112 |
loader.close = [](void * /*ctx*/) { };
|
| 3113 |
|
| 3114 |
-
return
|
| 3115 |
}
|
| 3116 |
|
| 3117 |
-
struct whisper_context *
|
| 3118 |
ggml_time_init();
|
| 3119 |
|
| 3120 |
whisper_context * ctx = new whisper_context;
|
|
|
|
| 3121 |
|
| 3122 |
if (!whisper_model_load(loader, *ctx)) {
|
| 3123 |
loader->close(loader->context);
|
|
@@ -3131,8 +3145,8 @@ struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loa
|
|
| 3131 |
return ctx;
|
| 3132 |
}
|
| 3133 |
|
| 3134 |
-
struct whisper_context *
|
| 3135 |
-
whisper_context * ctx =
|
| 3136 |
if (!ctx) {
|
| 3137 |
return nullptr;
|
| 3138 |
}
|
|
@@ -3146,8 +3160,8 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
|
|
| 3146 |
return ctx;
|
| 3147 |
}
|
| 3148 |
|
| 3149 |
-
struct whisper_context *
|
| 3150 |
-
whisper_context * ctx =
|
| 3151 |
if (!ctx) {
|
| 3152 |
return nullptr;
|
| 3153 |
}
|
|
@@ -3161,8 +3175,8 @@ struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_s
|
|
| 3161 |
return ctx;
|
| 3162 |
}
|
| 3163 |
|
| 3164 |
-
struct whisper_context *
|
| 3165 |
-
whisper_context * ctx =
|
| 3166 |
if (!ctx) {
|
| 3167 |
return nullptr;
|
| 3168 |
}
|
|
@@ -3176,6 +3190,30 @@ struct whisper_context * whisper_init(struct whisper_model_loader * loader) {
|
|
| 3176 |
return ctx;
|
| 3177 |
}
|
| 3178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3179 |
void whisper_free_state(struct whisper_state * state)
|
| 3180 |
{
|
| 3181 |
if (state) {
|
|
@@ -3230,6 +3268,12 @@ void whisper_free(struct whisper_context * ctx) {
|
|
| 3230 |
}
|
| 3231 |
}
|
| 3232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3233 |
void whisper_free_params(struct whisper_full_params * params) {
|
| 3234 |
if (params) {
|
| 3235 |
delete params;
|
|
@@ -3698,6 +3742,14 @@ const char * whisper_print_system_info(void) {
|
|
| 3698 |
|
| 3699 |
////////////////////////////////////////////////////////////////////////////
|
| 3700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3701 |
struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy) {
|
| 3702 |
struct whisper_full_params params = whisper_full_default_params(strategy);
|
| 3703 |
|
|
@@ -4507,17 +4559,19 @@ int whisper_full_with_state(
|
|
| 4507 |
|
| 4508 |
// TODO: not very clean - look for a better way and potentially merging with the init of decoder 0
|
| 4509 |
#ifdef GGML_USE_METAL
|
|
|
|
| 4510 |
#define WHISPER_METAL_CHECK_BUF(result) \
|
| 4511 |
-
|
| 4512 |
-
|
| 4513 |
-
|
| 4514 |
-
|
| 4515 |
|
| 4516 |
-
|
| 4517 |
-
|
| 4518 |
|
| 4519 |
-
|
| 4520 |
#undef WHISPER_METAL_CHECK_BUF
|
|
|
|
| 4521 |
#endif
|
| 4522 |
}
|
| 4523 |
}
|
|
|
|
| 736 |
|
| 737 |
int lang_id = 0; // english by default
|
| 738 |
|
| 739 |
+
std::string path_model; // populated by whisper_init_from_file_with_params()
|
| 740 |
#ifdef WHISPER_USE_COREML
|
| 741 |
whisper_coreml_context * ctx_coreml = nullptr;
|
| 742 |
#endif
|
|
|
|
| 770 |
whisper_vocab vocab;
|
| 771 |
whisper_state * state = nullptr;
|
| 772 |
|
| 773 |
+
std::string path_model; // populated by whisper_init_from_file_with_params()
|
| 774 |
+
whisper_context_params params;
|
| 775 |
};
|
| 776 |
|
| 777 |
static void whisper_default_log(const char * text) {
|
|
|
|
| 2931 |
}
|
| 2932 |
|
| 2933 |
#ifdef GGML_USE_METAL
|
| 2934 |
+
if (ctx->params.use_gpu) {
|
| 2935 |
+
state->ctx_metal = ggml_metal_init(1);
|
| 2936 |
+
if (!state->ctx_metal) {
|
| 2937 |
+
log("%s: ggml_metal_init() failed\n", __func__);
|
| 2938 |
+
delete state;
|
| 2939 |
+
return nullptr;
|
| 2940 |
+
}
|
| 2941 |
}
|
| 2942 |
|
| 2943 |
+
if (state->ctx_metal) {
|
| 2944 |
+
log("%s: Metal context initialized\n", __func__);
|
| 2945 |
|
| 2946 |
+
// this allocates all Metal resources and memory buffers
|
| 2947 |
|
| 2948 |
+
void * data_ptr = NULL;
|
| 2949 |
+
size_t data_size = 0;
|
| 2950 |
|
| 2951 |
+
// TODO: add mmap support
|
| 2952 |
+
//if (params.use_mmap) {
|
| 2953 |
+
// data_ptr = ctx->model.mapping->addr;
|
| 2954 |
+
// data_size = ctx->model.mapping->size;
|
| 2955 |
+
//} else {
|
| 2956 |
+
// data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
|
| 2957 |
+
// data_size = ggml_get_mem_size (ctx->model.ctx);
|
| 2958 |
+
//}
|
| 2959 |
|
| 2960 |
+
data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
|
| 2961 |
+
data_size = ggml_get_mem_size (ctx->model.ctx);
|
| 2962 |
|
| 2963 |
+
const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx);
|
| 2964 |
|
| 2965 |
+
log("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0);
|
| 2966 |
|
| 2967 |
#define WHISPER_METAL_CHECK_BUF(result) \
|
| 2968 |
+
if (!(result)) { \
|
| 2969 |
+
log("%s: failed to add metal buffer\n", __func__); \
|
| 2970 |
+
delete state; \
|
| 2971 |
+
return nullptr; \
|
| 2972 |
+
}
|
| 2973 |
|
| 2974 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data", data_ptr, data_size, max_size));
|
| 2975 |
|
| 2976 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_conv", state->alloc_conv.meta.data(), state->alloc_conv.meta.size(), 0));
|
| 2977 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_encode", state->alloc_encode.meta.data(), state->alloc_encode.meta.size(), 0));
|
| 2978 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_cross", state->alloc_cross.meta.data(), state->alloc_cross.meta.size(), 0));
|
| 2979 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "meta_decode", state->alloc_decode.meta.data(), state->alloc_decode.meta.size(), 0));
|
| 2980 |
|
| 2981 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_conv", state->alloc_conv.data.data(), state->alloc_conv.data.size(), 0));
|
| 2982 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_encode", state->alloc_encode.data.data(), state->alloc_encode.data.size(), 0));
|
| 2983 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_cross", state->alloc_cross.data.data(), state->alloc_cross.data.size(), 0));
|
| 2984 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "data_decode", state->alloc_decode.data.data(), state->alloc_decode.data.size(), 0));
|
| 2985 |
|
| 2986 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "kv_cross", state->kv_cross.buf.data(), state->kv_cross.buf.size(), 0));
|
| 2987 |
|
| 2988 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, "kv_self_0", state->decoders[0].kv_self.buf.data(), state->decoders[0].kv_self.buf.size(), 0));
|
| 2989 |
#undef WHISPER_METAL_CHECK_BUF
|
| 2990 |
+
|
| 2991 |
+
}
|
| 2992 |
#endif
|
| 2993 |
|
| 2994 |
state->rng = std::mt19937(0);
|
|
|
|
| 3045 |
#endif
|
| 3046 |
}
|
| 3047 |
|
| 3048 |
+
struct whisper_context_params whisper_context_default_params() {
|
| 3049 |
+
struct whisper_context_params result = {
|
| 3050 |
+
/*.use_gpu =*/ true,
|
| 3051 |
+
};
|
| 3052 |
+
return result;
|
| 3053 |
+
}
|
| 3054 |
+
|
| 3055 |
+
struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params) {
|
| 3056 |
log("%s: loading model from '%s'\n", __func__, path_model);
|
| 3057 |
|
| 3058 |
auto fin = std::ifstream(path_model, std::ios::binary);
|
|
|
|
| 3081 |
fin->close();
|
| 3082 |
};
|
| 3083 |
|
| 3084 |
+
auto ctx = whisper_init_with_params_no_state(&loader, params);
|
| 3085 |
|
| 3086 |
if (ctx) {
|
| 3087 |
ctx->path_model = path_model;
|
|
|
|
| 3090 |
return ctx;
|
| 3091 |
}
|
| 3092 |
|
| 3093 |
+
struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params) {
|
| 3094 |
struct buf_context {
|
| 3095 |
uint8_t* buffer;
|
| 3096 |
size_t size;
|
|
|
|
| 3124 |
|
| 3125 |
loader.close = [](void * /*ctx*/) { };
|
| 3126 |
|
| 3127 |
+
return whisper_init_with_params_no_state(&loader, params);
|
| 3128 |
}
|
| 3129 |
|
| 3130 |
+
struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params) {
|
| 3131 |
ggml_time_init();
|
| 3132 |
|
| 3133 |
whisper_context * ctx = new whisper_context;
|
| 3134 |
+
ctx->params = params;
|
| 3135 |
|
| 3136 |
if (!whisper_model_load(loader, *ctx)) {
|
| 3137 |
loader->close(loader->context);
|
|
|
|
| 3145 |
return ctx;
|
| 3146 |
}
|
| 3147 |
|
| 3148 |
+
struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params) {
|
| 3149 |
+
whisper_context * ctx = whisper_init_from_file_with_params_no_state(path_model, params);
|
| 3150 |
if (!ctx) {
|
| 3151 |
return nullptr;
|
| 3152 |
}
|
|
|
|
| 3160 |
return ctx;
|
| 3161 |
}
|
| 3162 |
|
| 3163 |
+
struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params) {
|
| 3164 |
+
whisper_context * ctx = whisper_init_from_buffer_with_params_no_state(buffer, buffer_size, params);
|
| 3165 |
if (!ctx) {
|
| 3166 |
return nullptr;
|
| 3167 |
}
|
|
|
|
| 3175 |
return ctx;
|
| 3176 |
}
|
| 3177 |
|
| 3178 |
+
struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params) {
|
| 3179 |
+
whisper_context * ctx = whisper_init_with_params_no_state(loader, params);
|
| 3180 |
if (!ctx) {
|
| 3181 |
return nullptr;
|
| 3182 |
}
|
|
|
|
| 3190 |
return ctx;
|
| 3191 |
}
|
| 3192 |
|
| 3193 |
+
struct whisper_context * whisper_init_from_file(const char * path_model) {
|
| 3194 |
+
return whisper_init_from_file_with_params(path_model, whisper_context_default_params());
|
| 3195 |
+
}
|
| 3196 |
+
|
| 3197 |
+
struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
|
| 3198 |
+
return whisper_init_from_buffer_with_params(buffer, buffer_size, whisper_context_default_params());
|
| 3199 |
+
}
|
| 3200 |
+
|
| 3201 |
+
struct whisper_context * whisper_init(struct whisper_model_loader * loader) {
|
| 3202 |
+
return whisper_init_with_params(loader, whisper_context_default_params());
|
| 3203 |
+
}
|
| 3204 |
+
|
| 3205 |
+
struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
|
| 3206 |
+
return whisper_init_from_file_with_params_no_state(path_model, whisper_context_default_params());
|
| 3207 |
+
}
|
| 3208 |
+
|
| 3209 |
+
struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size) {
|
| 3210 |
+
return whisper_init_from_buffer_with_params_no_state(buffer, buffer_size, whisper_context_default_params());
|
| 3211 |
+
}
|
| 3212 |
+
|
| 3213 |
+
struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader) {
|
| 3214 |
+
return whisper_init_with_params_no_state(loader, whisper_context_default_params());
|
| 3215 |
+
}
|
| 3216 |
+
|
| 3217 |
void whisper_free_state(struct whisper_state * state)
|
| 3218 |
{
|
| 3219 |
if (state) {
|
|
|
|
| 3268 |
}
|
| 3269 |
}
|
| 3270 |
|
| 3271 |
+
void whisper_free_context_params(struct whisper_context_params * params) {
|
| 3272 |
+
if (params) {
|
| 3273 |
+
delete params;
|
| 3274 |
+
}
|
| 3275 |
+
}
|
| 3276 |
+
|
| 3277 |
void whisper_free_params(struct whisper_full_params * params) {
|
| 3278 |
if (params) {
|
| 3279 |
delete params;
|
|
|
|
| 3742 |
|
| 3743 |
////////////////////////////////////////////////////////////////////////////
|
| 3744 |
|
| 3745 |
+
struct whisper_context_params * whisper_context_default_params_by_ref() {
|
| 3746 |
+
struct whisper_context_params params = whisper_context_default_params();
|
| 3747 |
+
|
| 3748 |
+
struct whisper_context_params* result = new whisper_context_params();
|
| 3749 |
+
*result = params;
|
| 3750 |
+
return result;
|
| 3751 |
+
}
|
| 3752 |
+
|
| 3753 |
struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy) {
|
| 3754 |
struct whisper_full_params params = whisper_full_default_params(strategy);
|
| 3755 |
|
|
|
|
| 4559 |
|
| 4560 |
// TODO: not very clean - look for a better way and potentially merging with the init of decoder 0
|
| 4561 |
#ifdef GGML_USE_METAL
|
| 4562 |
+
if (state->ctx_metal) {
|
| 4563 |
#define WHISPER_METAL_CHECK_BUF(result) \
|
| 4564 |
+
if (!(result)) { \
|
| 4565 |
+
log("%s: failed to add metal buffer\n", __func__); \
|
| 4566 |
+
return 0; \
|
| 4567 |
+
}
|
| 4568 |
|
| 4569 |
+
const std::string kv_name = "kv_self_" + std::to_string(j);
|
| 4570 |
+
auto & kv_self = decoder.kv_self;
|
| 4571 |
|
| 4572 |
+
WHISPER_METAL_CHECK_BUF(ggml_metal_add_buffer(state->ctx_metal, kv_name.c_str(), kv_self.buf.data(), kv_self.buf.size(), 0));
|
| 4573 |
#undef WHISPER_METAL_CHECK_BUF
|
| 4574 |
+
}
|
| 4575 |
#endif
|
| 4576 |
}
|
| 4577 |
}
|
|
@@ -5,6 +5,14 @@
|
|
| 5 |
#include <stdint.h>
|
| 6 |
#include <stdbool.h>
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
#ifdef WHISPER_SHARED
|
| 9 |
# ifdef _WIN32
|
| 10 |
# ifdef WHISPER_BUILD
|
|
@@ -71,6 +79,10 @@ extern "C" {
|
|
| 71 |
|
| 72 |
typedef int whisper_token;
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
typedef struct whisper_token_data {
|
| 75 |
whisper_token id; // token id
|
| 76 |
whisper_token tid; // forced timestamp token id
|
|
@@ -99,15 +111,40 @@ extern "C" {
|
|
| 99 |
// Various functions for loading a ggml whisper model.
|
| 100 |
// Allocate (almost) all memory needed for the model.
|
| 101 |
// Return NULL on failure
|
| 102 |
-
WHISPER_API struct whisper_context *
|
| 103 |
-
WHISPER_API struct whisper_context *
|
| 104 |
-
WHISPER_API struct whisper_context *
|
| 105 |
|
| 106 |
// These are the same as the above, but the internal state of the context is not allocated automatically
|
| 107 |
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
| 108 |
-
WHISPER_API struct whisper_context *
|
| 109 |
-
WHISPER_API struct whisper_context *
|
| 110 |
-
WHISPER_API struct whisper_context *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
|
| 113 |
|
|
@@ -132,6 +169,7 @@ extern "C" {
|
|
| 132 |
WHISPER_API void whisper_free (struct whisper_context * ctx);
|
| 133 |
WHISPER_API void whisper_free_state(struct whisper_state * state);
|
| 134 |
WHISPER_API void whisper_free_params(struct whisper_full_params * params);
|
|
|
|
| 135 |
|
| 136 |
// Convert RAW PCM audio to log mel spectrogram.
|
| 137 |
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
|
@@ -442,7 +480,9 @@ extern "C" {
|
|
| 442 |
void * logits_filter_callback_user_data;
|
| 443 |
};
|
| 444 |
|
| 445 |
-
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_params()
|
|
|
|
|
|
|
| 446 |
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
| 447 |
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
|
| 448 |
|
|
|
|
| 5 |
#include <stdint.h>
|
| 6 |
#include <stdbool.h>
|
| 7 |
|
| 8 |
+
#ifdef __GNUC__
|
| 9 |
+
# define WHISPER_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
| 10 |
+
#elif defined(_MSC_VER)
|
| 11 |
+
# define WHISPER_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
| 12 |
+
#else
|
| 13 |
+
# define WHISPER_DEPRECATED(func, hint) func
|
| 14 |
+
#endif
|
| 15 |
+
|
| 16 |
#ifdef WHISPER_SHARED
|
| 17 |
# ifdef _WIN32
|
| 18 |
# ifdef WHISPER_BUILD
|
|
|
|
| 79 |
|
| 80 |
typedef int whisper_token;
|
| 81 |
|
| 82 |
+
struct whisper_context_params {
|
| 83 |
+
bool use_gpu;
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
typedef struct whisper_token_data {
|
| 87 |
whisper_token id; // token id
|
| 88 |
whisper_token tid; // forced timestamp token id
|
|
|
|
| 111 |
// Various functions for loading a ggml whisper model.
|
| 112 |
// Allocate (almost) all memory needed for the model.
|
| 113 |
// Return NULL on failure
|
| 114 |
+
WHISPER_API struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params);
|
| 115 |
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
| 116 |
+
WHISPER_API struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params);
|
| 117 |
|
| 118 |
// These are the same as the above, but the internal state of the context is not allocated automatically
|
| 119 |
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
| 120 |
+
WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params);
|
| 121 |
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
| 122 |
+
WHISPER_API struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params);
|
| 123 |
+
|
| 124 |
+
WHISPER_DEPRECATED(
|
| 125 |
+
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
|
| 126 |
+
"use whisper_init_from_file_with_params instead"
|
| 127 |
+
);
|
| 128 |
+
WHISPER_DEPRECATED(
|
| 129 |
+
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size),
|
| 130 |
+
"use whisper_init_from_buffer_with_params instead"
|
| 131 |
+
);
|
| 132 |
+
WHISPER_DEPRECATED(
|
| 133 |
+
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader),
|
| 134 |
+
"use whisper_init_with_params instead"
|
| 135 |
+
);
|
| 136 |
+
WHISPER_DEPRECATED(
|
| 137 |
+
WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model),
|
| 138 |
+
"use whisper_init_from_file_with_params_no_state instead"
|
| 139 |
+
);
|
| 140 |
+
WHISPER_DEPRECATED(
|
| 141 |
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size),
|
| 142 |
+
"use whisper_init_from_buffer_with_params_no_state instead"
|
| 143 |
+
);
|
| 144 |
+
WHISPER_DEPRECATED(
|
| 145 |
+
WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader),
|
| 146 |
+
"use whisper_init_with_params_no_state instead"
|
| 147 |
+
);
|
| 148 |
|
| 149 |
WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
|
| 150 |
|
|
|
|
| 169 |
WHISPER_API void whisper_free (struct whisper_context * ctx);
|
| 170 |
WHISPER_API void whisper_free_state(struct whisper_state * state);
|
| 171 |
WHISPER_API void whisper_free_params(struct whisper_full_params * params);
|
| 172 |
+
WHISPER_API void whisper_free_context_params(struct whisper_context_params * params);
|
| 173 |
|
| 174 |
// Convert RAW PCM audio to log mel spectrogram.
|
| 175 |
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
|
|
|
| 480 |
void * logits_filter_callback_user_data;
|
| 481 |
};
|
| 482 |
|
| 483 |
+
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
| 484 |
+
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
|
| 485 |
+
WHISPER_API struct whisper_context_params whisper_context_default_params(void);
|
| 486 |
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
| 487 |
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
|
| 488 |
|