sonphantrung commited on
Commit
f7aef3e
·
unverified ·
1 Parent(s): d5836c9

models : make all scripts to be POSIX Compliant (#1725)

Browse files

* download-coreml-model: make it POSIX-compliant

* download-ggml-model: posix compliant (2nd)

* minor edit

* forgot to add newline

* generate-coreml-interface: far more straightforward

* generate-coreml-model: done with the posix thingy

* typo

* Update download-ggml-model.sh

* fix

* fix typo

* another fix

* Update download-coreml-model.sh

* Update download-ggml-model.sh

* Update download-coreml-model.sh

models/download-coreml-model.sh CHANGED
@@ -1,4 +1,4 @@
1
- #!/bin/bash
2
 
3
  # This script downloads Whisper model files that have already been converted to Core ML format.
4
  # This way you don't have to convert them yourself.
@@ -7,32 +7,32 @@ src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
7
  pfx="resolve/main/ggml"
8
 
9
  # get the path of this script
10
- function get_script_path() {
11
  if [ -x "$(command -v realpath)" ]; then
12
- echo "$(dirname $(realpath $0))"
13
  else
14
- local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
15
- echo "$ret"
16
  fi
17
  }
18
 
19
  models_path="$(get_script_path)"
20
 
21
  # Whisper models
22
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
23
 
24
  # list available models
25
- function list_models {
26
- printf "\n"
27
- printf " Available models:"
28
- for model in "${models[@]}"; do
29
- printf " $model"
30
- done
31
- printf "\n\n"
32
  }
33
 
34
  if [ "$#" -ne 1 ]; then
35
- printf "Usage: $0 <model>\n"
36
  list_models
37
 
38
  exit 1
@@ -40,8 +40,8 @@ fi
40
 
41
  model=$1
42
 
43
- if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
44
- printf "Invalid model: $model\n"
45
  list_models
46
 
47
  exit 1
@@ -49,19 +49,19 @@ fi
49
 
50
  # download Core ML model
51
 
52
- printf "Downloading Core ML model $model from '$src' ...\n"
53
 
54
- cd $models_path
55
 
56
  if [ -f "ggml-$model.mlmodel" ]; then
57
- printf "Model $model already exists. Skipping download.\n"
58
  exit 0
59
  fi
60
 
61
  if [ -x "$(command -v wget)" ]; then
62
- wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
63
  elif [ -x "$(command -v curl)" ]; then
64
- curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
65
  else
66
  printf "Either wget or curl is required to download models.\n"
67
  exit 1
@@ -69,14 +69,14 @@ fi
69
 
70
 
71
  if [ $? -ne 0 ]; then
72
- printf "Failed to download Core ML model $model \n"
73
  printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
74
  exit 1
75
  fi
76
 
77
- printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
78
  printf "Run the following command to compile it:\n\n"
79
- printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
80
  printf "You can now use it like this:\n\n"
81
- printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
82
  printf "\n"
 
1
+ #!/bin/sh
2
 
3
  # This script downloads Whisper model files that have already been converted to Core ML format.
4
  # This way you don't have to convert them yourself.
 
7
  pfx="resolve/main/ggml"
8
 
9
  # get the path of this script
10
+ get_script_path() {
11
  if [ -x "$(command -v realpath)" ]; then
12
+ dirname "$(realpath "$0")"
13
  else
14
+ _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
15
+ echo "$_ret"
16
  fi
17
  }
18
 
19
  models_path="$(get_script_path)"
20
 
21
  # Whisper models
22
+ models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"
23
 
24
  # list available models
25
+ list_models() {
26
+ printf "\n"
27
+ printf " Available models:"
28
+ for model in $models; do
29
+ printf " %s" "$models"
30
+ done
31
+ printf "\n\n"
32
  }
33
 
34
  if [ "$#" -ne 1 ]; then
35
+ printf "Usage: %s <model>\n" "$0"
36
  list_models
37
 
38
  exit 1
 
40
 
41
  model=$1
42
 
43
+ if ! echo "$models" | grep -q -w "$model"; then
44
+ printf "Invalid model: %s\n" "$model"
45
  list_models
46
 
47
  exit 1
 
49
 
50
  # download Core ML model
51
 
52
+ printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src"
53
 
54
+ cd "$models_path" || exit
55
 
56
  if [ -f "ggml-$model.mlmodel" ]; then
57
+ printf "Model %s already exists. Skipping download.\n" "$model"
58
  exit 0
59
  fi
60
 
61
  if [ -x "$(command -v wget)" ]; then
62
+ wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
63
  elif [ -x "$(command -v curl)" ]; then
64
+ curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
65
  else
66
  printf "Either wget or curl is required to download models.\n"
67
  exit 1
 
69
 
70
 
71
  if [ $? -ne 0 ]; then
72
+ printf "Failed to download Core ML model %s \n" "$model"
73
  printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
74
  exit 1
75
  fi
76
 
77
+ printf "Done! Model '%s' saved in 'models/ggml-%s.mlmodel'\n" "$model" "$model"
78
  printf "Run the following command to compile it:\n\n"
79
+ printf " $ xcrun coremlc compile ./models/ggml-%s.mlmodel ./models\n\n" "$model"
80
  printf "You can now use it like this:\n\n"
81
+ printf " $ ./main -m models/ggml-%s.bin -f samples/jfk.wav\n" "$model"
82
  printf "\n"
models/download-ggml-model.sh CHANGED
@@ -1,4 +1,4 @@
1
- #!/bin/bash
2
 
3
  # This script downloads Whisper model files that have already been converted to ggml format.
4
  # This way you don't have to convert them yourself.
@@ -10,54 +10,52 @@ src="https://huggingface.co/ggerganov/whisper.cpp"
10
  pfx="resolve/main/ggml"
11
 
12
  # get the path of this script
13
- function get_script_path() {
14
  if [ -x "$(command -v realpath)" ]; then
15
- echo "$(dirname "$(realpath "$0")")"
16
  else
17
- local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
18
- echo "$ret"
19
  fi
20
  }
21
 
22
  models_path="${2:-$(get_script_path)}"
23
 
24
  # Whisper models
25
- models=(
26
- "tiny.en"
27
- "tiny"
28
- "tiny-q5_1"
29
- "tiny.en-q5_1"
30
- "base.en"
31
- "base"
32
- "base-q5_1"
33
- "base.en-q5_1"
34
- "small.en"
35
- "small.en-tdrz"
36
- "small"
37
- "small-q5_1"
38
- "small.en-q5_1"
39
- "medium"
40
- "medium.en"
41
- "medium-q5_0"
42
- "medium.en-q5_0"
43
- "large-v1"
44
- "large-v2"
45
- "large-v3"
46
- "large-v3-q5_0"
47
- )
48
 
49
  # list available models
50
- function list_models {
51
  printf "\n"
52
  printf " Available models:"
53
- for model in "${models[@]}"; do
54
- printf " $model"
55
  done
56
  printf "\n\n"
57
  }
58
 
59
  if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
60
- printf "Usage: $0 <model> [models_path]\n"
61
  list_models
62
 
63
  exit 1
@@ -65,34 +63,36 @@ fi
65
 
66
  model=$1
67
 
68
- if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
69
- printf "Invalid model: $model\n"
70
  list_models
71
 
72
  exit 1
73
  fi
74
 
75
  # check if model contains `tdrz` and update the src and pfx accordingly
76
- if [[ $model == *"tdrz"* ]]; then
77
  src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
78
  pfx="resolve/main/ggml"
79
  fi
80
 
 
 
81
  # download ggml model
82
 
83
- printf "Downloading ggml model $model from '$src' ...\n"
84
 
85
- cd "$models_path"
86
 
87
  if [ -f "ggml-$model.bin" ]; then
88
- printf "Model $model already exists. Skipping download.\n"
89
  exit 0
90
  fi
91
 
92
  if [ -x "$(command -v wget)" ]; then
93
- wget --no-config --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin
94
  elif [ -x "$(command -v curl)" ]; then
95
- curl -L --output ggml-$model.bin $src/$pfx-$model.bin
96
  else
97
  printf "Either wget or curl is required to download models.\n"
98
  exit 1
@@ -100,12 +100,13 @@ fi
100
 
101
 
102
  if [ $? -ne 0 ]; then
103
- printf "Failed to download ggml model $model \n"
104
  printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
105
  exit 1
106
  fi
107
 
108
- printf "Done! Model '$model' saved in '$models_path/ggml-$model.bin'\n"
 
109
  printf "You can now use it like this:\n\n"
110
- printf " $ ./main -m $models_path/ggml-$model.bin -f samples/jfk.wav\n"
111
  printf "\n"
 
1
+ #!/bin/sh
2
 
3
  # This script downloads Whisper model files that have already been converted to ggml format.
4
  # This way you don't have to convert them yourself.
 
10
  pfx="resolve/main/ggml"
11
 
12
  # get the path of this script
13
+ get_script_path() {
14
  if [ -x "$(command -v realpath)" ]; then
15
+ dirname "$(realpath "$0")"
16
  else
17
+ _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
18
+ echo "$_ret"
19
  fi
20
  }
21
 
22
  models_path="${2:-$(get_script_path)}"
23
 
24
  # Whisper models
25
+ models="tiny.en
26
+ tiny
27
+ tiny-q5_1
28
+ tiny.en-q5_1
29
+ base.en
30
+ base
31
+ base-q5_1
32
+ base.en-q5_1
33
+ small.en
34
+ small.en-tdrz
35
+ small
36
+ small-q5_1
37
+ small.en-q5_1
38
+ medium
39
+ medium.en
40
+ medium-q5_0
41
+ medium.en-q5_0
42
+ large-v1
43
+ large-v2
44
+ large-v3
45
+ large-v3-q5_0"
 
 
46
 
47
  # list available models
48
+ list_models() {
49
  printf "\n"
50
  printf " Available models:"
51
+ for model in $models; do
52
+ printf " %s" "$model"
53
  done
54
  printf "\n\n"
55
  }
56
 
57
  if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
58
+ printf "Usage: %s <model> [models_path]\n" "$0"
59
  list_models
60
 
61
  exit 1
 
63
 
64
  model=$1
65
 
66
+ if ! echo "$models" | grep -q -w "$model"; then
67
+ printf "Invalid model: %s\n" "$model"
68
  list_models
69
 
70
  exit 1
71
  fi
72
 
73
  # check if model contains `tdrz` and update the src and pfx accordingly
74
+ if echo "$model" | grep -q "tdrz"; then
75
  src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
76
  pfx="resolve/main/ggml"
77
  fi
78
 
79
+ echo "$model" | grep -q '^"tdrz"*$'
80
+
81
  # download ggml model
82
 
83
+ printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
84
 
85
+ cd "$models_path" || exit
86
 
87
  if [ -f "ggml-$model.bin" ]; then
88
+ printf "Model %s already exists. Skipping download.\n" "$model"
89
  exit 0
90
  fi
91
 
92
  if [ -x "$(command -v wget)" ]; then
93
+ wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
94
  elif [ -x "$(command -v curl)" ]; then
95
+ curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
96
  else
97
  printf "Either wget or curl is required to download models.\n"
98
  exit 1
 
100
 
101
 
102
  if [ $? -ne 0 ]; then
103
+ printf "Failed to download ggml model %s \n" "$model"
104
  printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
105
  exit 1
106
  fi
107
 
108
+
109
+ printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
110
  printf "You can now use it like this:\n\n"
111
+ printf " $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model"
112
  printf "\n"
models/generate-coreml-interface.sh CHANGED
@@ -1,4 +1,4 @@
1
- #!/bin/bash
2
  #
3
  # This generates:
4
  # - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
@@ -6,7 +6,7 @@
6
  #
7
 
8
  wd=$(dirname "$0")
9
- cd "$wd/../"
10
 
11
  python3 models/convert-whisper-to-coreml.py --model tiny.en
12
 
 
1
+ #!/bin/sh
2
  #
3
  # This generates:
4
  # - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
 
6
  #
7
 
8
  wd=$(dirname "$0")
9
+ cd "$wd/../" || exit
10
 
11
  python3 models/convert-whisper-to-coreml.py --model tiny.en
12
 
models/generate-coreml-model.sh CHANGED
@@ -1,4 +1,4 @@
1
- #!/bin/bash
2
 
3
  # Usage: ./generate-coreml-model.sh <model-name>
4
  if [ $# -eq 0 ]; then
@@ -6,7 +6,7 @@ if [ $# -eq 0 ]; then
6
  echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
7
  echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
8
  exit 1
9
- elif [[ "$1" == "-h5" && $# != 3 ]]; then
10
  echo "No model name and model path supplied for a HuggingFace model"
11
  echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
12
  exit 1
@@ -15,20 +15,20 @@ fi
15
  mname="$1"
16
 
17
  wd=$(dirname "$0")
18
- cd "$wd/../"
19
 
20
- if [[ $mname == "-h5" ]]; then
21
  mname="$2"
22
  mpath="$3"
23
- echo $mpath
24
- python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
25
  else
26
- python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True
27
  fi
28
 
29
- xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
30
- rm -rf models/ggml-${mname}-encoder.mlmodelc
31
- mv -v models/coreml-encoder-${mname}.mlmodelc models/ggml-${mname}-encoder.mlmodelc
32
 
33
  # TODO: decoder (sometime in the future maybe)
34
  #xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/
 
1
+ #!/bin/sh
2
 
3
  # Usage: ./generate-coreml-model.sh <model-name>
4
  if [ $# -eq 0 ]; then
 
6
  echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
7
  echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
8
  exit 1
9
+ elif [ "$1" = "-h5" ] && [ $# != 3 ]; then
10
  echo "No model name and model path supplied for a HuggingFace model"
11
  echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
12
  exit 1
 
15
  mname="$1"
16
 
17
  wd=$(dirname "$0")
18
+ cd "$wd/../" || exit
19
 
20
+ if [ "$mname" = "-h5" ]; then
21
  mname="$2"
22
  mpath="$3"
23
+ echo "$mpath"
24
+ python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True
25
  else
26
+ python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True
27
  fi
28
 
29
+ xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/
30
+ rm -rf models/ggml-"${mname}"-encoder.mlmodelc
31
+ mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc
32
 
33
  # TODO: decoder (sometime in the future maybe)
34
  #xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/