ggerganov commited on
Commit
f3283ba
·
unverified ·
1 Parent(s): 050ba38

whisper : add large-v3-turbo (#2440)

Browse files
.gitignore CHANGED
@@ -3,6 +3,7 @@
3
  .cache/
4
  .coreml/
5
  .test/
 
6
  .vs/
7
  .vscode/
8
  .DS_Store
 
3
  .cache/
4
  .coreml/
5
  .test/
6
+ .venv/
7
  .vs/
8
  .vscode/
9
  .DS_Store
Makefile CHANGED
@@ -1145,8 +1145,9 @@ samples:
1145
  .PHONY: large-v1
1146
  .PHONY: large-v2
1147
  .PHONY: large-v3
 
1148
 
1149
- tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
1150
  bash ./models/download-ggml-model.sh $@
1151
  @echo ""
1152
  @echo "==============================================="
 
1145
  .PHONY: large-v1
1146
  .PHONY: large-v2
1147
  .PHONY: large-v3
1148
+ .PHONY: large-v3-turbo
1149
 
1150
+ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo: main
1151
  bash ./models/download-ggml-model.sh $@
1152
  @echo ""
1153
  @echo "==============================================="
README.md CHANGED
@@ -236,6 +236,7 @@ make medium
236
  make large-v1
237
  make large-v2
238
  make large-v3
 
239
  ```
240
 
241
  ## Memory usage
 
236
  make large-v1
237
  make large-v2
238
  make large-v3
239
+ make large-v3-turbo
240
  ```
241
 
242
  ## Memory usage
bindings/go/examples/go-model-download/main.go CHANGED
@@ -24,7 +24,7 @@ const (
24
 
25
  var (
26
  // The models which will be downloaded, if no model is specified as an argument
27
- modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
28
  )
29
 
30
  var (
 
24
 
25
  var (
26
  // The models which will be downloaded, if no model is specified as an argument
27
+ modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
28
  )
29
 
30
  var (
examples/livestream.sh CHANGED
@@ -48,7 +48,7 @@ if [ -n "$3" ]; then
48
  fi
49
 
50
  # Whisper models
51
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
52
 
53
  # list available models
54
  function list_models {
 
48
  fi
49
 
50
  # Whisper models
51
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
52
 
53
  # list available models
54
  function list_models {
examples/twitch.sh CHANGED
@@ -21,7 +21,7 @@ help()
21
  echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
22
  echo "options:"
23
  echo "-s Step in seconds (default is $step)."
24
- echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
25
  echo "-t Number of threads to use."
26
  echo "-h Print this help page."
27
  echo
 
21
  echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
22
  echo "options:"
23
  echo "-s Step in seconds (default is $step)."
24
+ echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' 'large-v3-turbo' (default is '$model')."
25
  echo "-t Number of threads to use."
26
  echo "-h Print this help page."
27
  echo
models/README.md CHANGED
@@ -42,22 +42,24 @@ rmdir models/whisper-medium
42
 
43
  ## Available models
44
 
45
- | Model | Disk | SHA |
46
- | ------------- | ------- | ------------------------------------------ |
47
- | tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
48
- | tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
49
- | base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
50
- | base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` |
51
- | small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
52
- | small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
53
- | small.en-tdrz | 465 MiB | `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` |
54
- | medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
55
- | medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
56
- | large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
57
- | large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
58
- | large-v2-q5_0 | 1.1 GiB | `00e39f2196344e901b3a2bd5814807a769bd1630` |
59
- | large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
60
- | large-v3-q5_0 | 1.1 GiB | `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` |
 
 
61
 
62
  Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere.
63
 
 
42
 
43
  ## Available models
44
 
45
+ | Model | Disk | SHA |
46
+ | ------------------- | ------- | ------------------------------------------ |
47
+ | tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
48
+ | tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
49
+ | base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
50
+ | base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` |
51
+ | small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
52
+ | small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
53
+ | small.en-tdrz | 465 MiB | `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` |
54
+ | medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
55
+ | medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
56
+ | large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
57
+ | large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
58
+ | large-v2-q5_0 | 1.1 GiB | `00e39f2196344e901b3a2bd5814807a769bd1630` |
59
+ | large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
60
+ | large-v3-q5_0 | 1.1 GiB | `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` |
61
+ | large-v3-turbo | 1.5 GiB | `4af2b29d7ec73d781377bfd1758ca957a807e941` |
62
+ | large-v3-turbo-q5_0 | 547 MiB | `e050f7970618a659205450ad97eb95a18d69c9ee` |
63
 
64
  Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere.
65
 
models/convert-h5-to-coreml.py CHANGED
@@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
78
  # Ported from models/convert-whisper-to-coreml.py
79
  if __name__ == "__main__":
80
  parser = argparse.ArgumentParser()
81
- parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
82
  parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
83
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
84
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
85
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
86
  args = parser.parse_args()
87
 
88
- if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
89
  raise ValueError("Invalid model name")
90
 
91
  pt_target_path = f"models/hf-{args.model_name}.pt"
 
78
  # Ported from models/convert-whisper-to-coreml.py
79
  if __name__ == "__main__":
80
  parser = argparse.ArgumentParser()
81
+ parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True)
82
  parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
83
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
84
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
85
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
86
  args = parser.parse_args()
87
 
88
+ if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
89
  raise ValueError("Invalid model name")
90
 
91
  pt_target_path = f"models/hf-{args.model_name}.pt"
models/convert-whisper-to-coreml.py CHANGED
@@ -283,13 +283,13 @@ def convert_decoder(hparams, model, quantize=False):
283
 
284
  if __name__ == "__main__":
285
  parser = argparse.ArgumentParser()
286
- parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
287
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
288
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
289
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
290
  args = parser.parse_args()
291
 
292
- if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
293
  raise ValueError("Invalid model name")
294
 
295
  whisper = load_model(args.model).cpu()
 
283
 
284
  if __name__ == "__main__":
285
  parser = argparse.ArgumentParser()
286
+ parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True)
287
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
288
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
289
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
290
  args = parser.parse_args()
291
 
292
+ if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
293
  raise ValueError("Invalid model name")
294
 
295
  whisper = load_model(args.model).cpu()
models/convert-whisper-to-openvino.py CHANGED
@@ -45,10 +45,10 @@ def convert_encoder(hparams, encoder, mname):
45
 
46
  if __name__ == "__main__":
47
  parser = argparse.ArgumentParser()
48
- parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
49
  args = parser.parse_args()
50
 
51
- if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
52
  raise ValueError("Invalid model name")
53
 
54
  whisper = load_model(args.model).cpu()
 
45
 
46
  if __name__ == "__main__":
47
  parser = argparse.ArgumentParser()
48
+ parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True)
49
  args = parser.parse_args()
50
 
51
+ if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
52
  raise ValueError("Invalid model name")
53
 
54
  whisper = load_model(args.model).cpu()
models/download-coreml-model.sh CHANGED
@@ -22,7 +22,7 @@ get_script_path() {
22
  models_path="$(get_script_path)"
23
 
24
  # Whisper models
25
- models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"
26
 
27
  # list available models
28
  list_models() {
 
22
  models_path="$(get_script_path)"
23
 
24
  # Whisper models
25
+ models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo"
26
 
27
  # list available models
28
  list_models() {
models/download-ggml-model.cmd CHANGED
@@ -8,7 +8,7 @@ popd
8
  set argc=0
9
  for %%x in (%*) do set /A argc+=1
10
 
11
- set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3
12
 
13
  if %argc% neq 1 (
14
  echo.
 
8
  set argc=0
9
  for %%x in (%*) do set /A argc+=1
10
 
11
+ set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo
12
 
13
  if %argc% neq 1 (
14
  echo.
models/download-ggml-model.sh CHANGED
@@ -46,7 +46,9 @@ large-v1
46
  large-v2
47
  large-v2-q5_0
48
  large-v3
49
- large-v3-q5_0"
 
 
50
 
51
  # list available models
52
  list_models() {
 
46
  large-v2
47
  large-v2-q5_0
48
  large-v3
49
+ large-v3-q5_0
50
+ large-v3-turbo
51
+ large-v3-turbo-q5_0"
52
 
53
  # list available models
54
  list_models() {
scripts/bench.py CHANGED
@@ -64,6 +64,7 @@ models = [
64
  "ggml-large-v1.bin",
65
  "ggml-large-v2.bin",
66
  "ggml-large-v3.bin",
 
67
  ]
68
 
69
 
 
64
  "ggml-large-v1.bin",
65
  "ggml-large-v2.bin",
66
  "ggml-large-v3.bin",
67
+ "ggml-large-v3-turbo.bin",
68
  ]
69
 
70
 
scripts/convert-all.sh CHANGED
@@ -1,6 +1,6 @@
1
  #!/bin/bash
2
 
3
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
4
 
5
  for model in "${models[@]}"; do
6
  python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
 
1
  #!/bin/bash
2
 
3
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
4
 
5
  for model in "${models[@]}"; do
6
  python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
tests/run-tests.sh CHANGED
@@ -19,7 +19,7 @@
19
  cd `dirname $0`
20
 
21
  # Whisper models
22
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
23
 
24
  # list available models
25
  function list_models {
 
19
  cd `dirname $0`
20
 
21
  # Whisper models
22
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" )
23
 
24
  # list available models
25
  function list_models {