Spaces:
Running
Running
models : make all scripts to be POSIX Compliant (#1725)
Browse files* download-coreml-model: make it POSIX-compliant
* download-ggml-model: posix compliant (2nd)
* minor edit
* forgot to add newline
* generate-coreml-interface: far more straightforward
* generate-coreml-model: done with the posix thingy
* typo
* Update download-ggml-model.sh
* fix
* fix typo
* another fix
* Update download-coreml-model.sh
* Update download-ggml-model.sh
* Update download-coreml-model.sh
- models/download-coreml-model.sh +25 -25
- models/download-ggml-model.sh +44 -43
- models/generate-coreml-interface.sh +2 -2
- models/generate-coreml-model.sh +10 -10
models/download-coreml-model.sh
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#!/bin/
|
| 2 |
|
| 3 |
# This script downloads Whisper model files that have already been converted to Core ML format.
|
| 4 |
# This way you don't have to convert them yourself.
|
|
@@ -7,32 +7,32 @@ src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
|
|
| 7 |
pfx="resolve/main/ggml"
|
| 8 |
|
| 9 |
# get the path of this script
|
| 10 |
-
|
| 11 |
if [ -x "$(command -v realpath)" ]; then
|
| 12 |
-
|
| 13 |
else
|
| 14 |
-
|
| 15 |
-
echo "$
|
| 16 |
fi
|
| 17 |
}
|
| 18 |
|
| 19 |
models_path="$(get_script_path)"
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
-
models=
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
}
|
| 33 |
|
| 34 |
if [ "$#" -ne 1 ]; then
|
| 35 |
-
printf "Usage:
|
| 36 |
list_models
|
| 37 |
|
| 38 |
exit 1
|
|
@@ -40,8 +40,8 @@ fi
|
|
| 40 |
|
| 41 |
model=$1
|
| 42 |
|
| 43 |
-
if
|
| 44 |
-
printf "Invalid model:
|
| 45 |
list_models
|
| 46 |
|
| 47 |
exit 1
|
|
@@ -49,19 +49,19 @@ fi
|
|
| 49 |
|
| 50 |
# download Core ML model
|
| 51 |
|
| 52 |
-
printf "Downloading Core ML model
|
| 53 |
|
| 54 |
-
cd $models_path
|
| 55 |
|
| 56 |
if [ -f "ggml-$model.mlmodel" ]; then
|
| 57 |
-
printf "Model
|
| 58 |
exit 0
|
| 59 |
fi
|
| 60 |
|
| 61 |
if [ -x "$(command -v wget)" ]; then
|
| 62 |
-
wget --quiet --show-progress -O ggml
|
| 63 |
elif [ -x "$(command -v curl)" ]; then
|
| 64 |
-
curl -L --output ggml
|
| 65 |
else
|
| 66 |
printf "Either wget or curl is required to download models.\n"
|
| 67 |
exit 1
|
|
@@ -69,14 +69,14 @@ fi
|
|
| 69 |
|
| 70 |
|
| 71 |
if [ $? -ne 0 ]; then
|
| 72 |
-
printf "Failed to download Core ML model
|
| 73 |
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
| 74 |
exit 1
|
| 75 |
fi
|
| 76 |
|
| 77 |
-
printf "Done! Model '
|
| 78 |
printf "Run the following command to compile it:\n\n"
|
| 79 |
-
printf " $ xcrun coremlc compile ./models/ggml
|
| 80 |
printf "You can now use it like this:\n\n"
|
| 81 |
-
printf " $ ./main -m models/ggml
|
| 82 |
printf "\n"
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
|
| 3 |
# This script downloads Whisper model files that have already been converted to Core ML format.
|
| 4 |
# This way you don't have to convert them yourself.
|
|
|
|
| 7 |
pfx="resolve/main/ggml"
|
| 8 |
|
| 9 |
# get the path of this script
|
| 10 |
+
get_script_path() {
|
| 11 |
if [ -x "$(command -v realpath)" ]; then
|
| 12 |
+
dirname "$(realpath "$0")"
|
| 13 |
else
|
| 14 |
+
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
|
| 15 |
+
echo "$_ret"
|
| 16 |
fi
|
| 17 |
}
|
| 18 |
|
| 19 |
models_path="$(get_script_path)"
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
+
models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
+
list_models() {
|
| 26 |
+
printf "\n"
|
| 27 |
+
printf " Available models:"
|
| 28 |
+
for model in $models; do
|
| 29 |
+
printf " %s" "$models"
|
| 30 |
+
done
|
| 31 |
+
printf "\n\n"
|
| 32 |
}
|
| 33 |
|
| 34 |
if [ "$#" -ne 1 ]; then
|
| 35 |
+
printf "Usage: %s <model>\n" "$0"
|
| 36 |
list_models
|
| 37 |
|
| 38 |
exit 1
|
|
|
|
| 40 |
|
| 41 |
model=$1
|
| 42 |
|
| 43 |
+
if ! echo "$models" | grep -q -w "$model"; then
|
| 44 |
+
printf "Invalid model: %s\n" "$model"
|
| 45 |
list_models
|
| 46 |
|
| 47 |
exit 1
|
|
|
|
| 49 |
|
| 50 |
# download Core ML model
|
| 51 |
|
| 52 |
+
printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src"
|
| 53 |
|
| 54 |
+
cd "$models_path" || exit
|
| 55 |
|
| 56 |
if [ -f "ggml-$model.mlmodel" ]; then
|
| 57 |
+
printf "Model %s already exists. Skipping download.\n" "$model"
|
| 58 |
exit 0
|
| 59 |
fi
|
| 60 |
|
| 61 |
if [ -x "$(command -v wget)" ]; then
|
| 62 |
+
wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
|
| 63 |
elif [ -x "$(command -v curl)" ]; then
|
| 64 |
+
curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
|
| 65 |
else
|
| 66 |
printf "Either wget or curl is required to download models.\n"
|
| 67 |
exit 1
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
if [ $? -ne 0 ]; then
|
| 72 |
+
printf "Failed to download Core ML model %s \n" "$model"
|
| 73 |
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
| 74 |
exit 1
|
| 75 |
fi
|
| 76 |
|
| 77 |
+
printf "Done! Model '%s' saved in 'models/ggml-%s.mlmodel'\n" "$model" "$model"
|
| 78 |
printf "Run the following command to compile it:\n\n"
|
| 79 |
+
printf " $ xcrun coremlc compile ./models/ggml-%s.mlmodel ./models\n\n" "$model"
|
| 80 |
printf "You can now use it like this:\n\n"
|
| 81 |
+
printf " $ ./main -m models/ggml-%s.bin -f samples/jfk.wav\n" "$model"
|
| 82 |
printf "\n"
|
models/download-ggml-model.sh
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#!/bin/
|
| 2 |
|
| 3 |
# This script downloads Whisper model files that have already been converted to ggml format.
|
| 4 |
# This way you don't have to convert them yourself.
|
|
@@ -10,54 +10,52 @@ src="https://huggingface.co/ggerganov/whisper.cpp"
|
|
| 10 |
pfx="resolve/main/ggml"
|
| 11 |
|
| 12 |
# get the path of this script
|
| 13 |
-
|
| 14 |
if [ -x "$(command -v realpath)" ]; then
|
| 15 |
-
|
| 16 |
else
|
| 17 |
-
|
| 18 |
-
echo "$
|
| 19 |
fi
|
| 20 |
}
|
| 21 |
|
| 22 |
models_path="${2:-$(get_script_path)}"
|
| 23 |
|
| 24 |
# Whisper models
|
| 25 |
-
models=
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
"large-v3-q5_0"
|
| 47 |
-
)
|
| 48 |
|
| 49 |
# list available models
|
| 50 |
-
|
| 51 |
printf "\n"
|
| 52 |
printf " Available models:"
|
| 53 |
-
for model in
|
| 54 |
-
printf " $model"
|
| 55 |
done
|
| 56 |
printf "\n\n"
|
| 57 |
}
|
| 58 |
|
| 59 |
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
|
| 60 |
-
printf "Usage:
|
| 61 |
list_models
|
| 62 |
|
| 63 |
exit 1
|
|
@@ -65,34 +63,36 @@ fi
|
|
| 65 |
|
| 66 |
model=$1
|
| 67 |
|
| 68 |
-
if
|
| 69 |
-
printf "Invalid model:
|
| 70 |
list_models
|
| 71 |
|
| 72 |
exit 1
|
| 73 |
fi
|
| 74 |
|
| 75 |
# check if model contains `tdrz` and update the src and pfx accordingly
|
| 76 |
-
if
|
| 77 |
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
|
| 78 |
pfx="resolve/main/ggml"
|
| 79 |
fi
|
| 80 |
|
|
|
|
|
|
|
| 81 |
# download ggml model
|
| 82 |
|
| 83 |
-
printf "Downloading ggml model
|
| 84 |
|
| 85 |
-
cd "$models_path"
|
| 86 |
|
| 87 |
if [ -f "ggml-$model.bin" ]; then
|
| 88 |
-
printf "Model
|
| 89 |
exit 0
|
| 90 |
fi
|
| 91 |
|
| 92 |
if [ -x "$(command -v wget)" ]; then
|
| 93 |
-
wget --no-config --quiet --show-progress -O ggml
|
| 94 |
elif [ -x "$(command -v curl)" ]; then
|
| 95 |
-
curl -L --output ggml
|
| 96 |
else
|
| 97 |
printf "Either wget or curl is required to download models.\n"
|
| 98 |
exit 1
|
|
@@ -100,12 +100,13 @@ fi
|
|
| 100 |
|
| 101 |
|
| 102 |
if [ $? -ne 0 ]; then
|
| 103 |
-
printf "Failed to download ggml model
|
| 104 |
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
| 105 |
exit 1
|
| 106 |
fi
|
| 107 |
|
| 108 |
-
|
|
|
|
| 109 |
printf "You can now use it like this:\n\n"
|
| 110 |
-
printf " $ ./main -m
|
| 111 |
printf "\n"
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
|
| 3 |
# This script downloads Whisper model files that have already been converted to ggml format.
|
| 4 |
# This way you don't have to convert them yourself.
|
|
|
|
| 10 |
pfx="resolve/main/ggml"
|
| 11 |
|
| 12 |
# get the path of this script
|
| 13 |
+
get_script_path() {
|
| 14 |
if [ -x "$(command -v realpath)" ]; then
|
| 15 |
+
dirname "$(realpath "$0")"
|
| 16 |
else
|
| 17 |
+
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
|
| 18 |
+
echo "$_ret"
|
| 19 |
fi
|
| 20 |
}
|
| 21 |
|
| 22 |
models_path="${2:-$(get_script_path)}"
|
| 23 |
|
| 24 |
# Whisper models
|
| 25 |
+
models="tiny.en
|
| 26 |
+
tiny
|
| 27 |
+
tiny-q5_1
|
| 28 |
+
tiny.en-q5_1
|
| 29 |
+
base.en
|
| 30 |
+
base
|
| 31 |
+
base-q5_1
|
| 32 |
+
base.en-q5_1
|
| 33 |
+
small.en
|
| 34 |
+
small.en-tdrz
|
| 35 |
+
small
|
| 36 |
+
small-q5_1
|
| 37 |
+
small.en-q5_1
|
| 38 |
+
medium
|
| 39 |
+
medium.en
|
| 40 |
+
medium-q5_0
|
| 41 |
+
medium.en-q5_0
|
| 42 |
+
large-v1
|
| 43 |
+
large-v2
|
| 44 |
+
large-v3
|
| 45 |
+
large-v3-q5_0"
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# list available models
|
| 48 |
+
list_models() {
|
| 49 |
printf "\n"
|
| 50 |
printf " Available models:"
|
| 51 |
+
for model in $models; do
|
| 52 |
+
printf " %s" "$model"
|
| 53 |
done
|
| 54 |
printf "\n\n"
|
| 55 |
}
|
| 56 |
|
| 57 |
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
|
| 58 |
+
printf "Usage: %s <model> [models_path]\n" "$0"
|
| 59 |
list_models
|
| 60 |
|
| 61 |
exit 1
|
|
|
|
| 63 |
|
| 64 |
model=$1
|
| 65 |
|
| 66 |
+
if ! echo "$models" | grep -q -w "$model"; then
|
| 67 |
+
printf "Invalid model: %s\n" "$model"
|
| 68 |
list_models
|
| 69 |
|
| 70 |
exit 1
|
| 71 |
fi
|
| 72 |
|
| 73 |
# check if model contains `tdrz` and update the src and pfx accordingly
|
| 74 |
+
if echo "$model" | grep -q "tdrz"; then
|
| 75 |
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
|
| 76 |
pfx="resolve/main/ggml"
|
| 77 |
fi
|
| 78 |
|
| 79 |
+
echo "$model" | grep -q '^"tdrz"*$'
|
| 80 |
+
|
| 81 |
# download ggml model
|
| 82 |
|
| 83 |
+
printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
|
| 84 |
|
| 85 |
+
cd "$models_path" || exit
|
| 86 |
|
| 87 |
if [ -f "ggml-$model.bin" ]; then
|
| 88 |
+
printf "Model %s already exists. Skipping download.\n" "$model"
|
| 89 |
exit 0
|
| 90 |
fi
|
| 91 |
|
| 92 |
if [ -x "$(command -v wget)" ]; then
|
| 93 |
+
wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
|
| 94 |
elif [ -x "$(command -v curl)" ]; then
|
| 95 |
+
curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
|
| 96 |
else
|
| 97 |
printf "Either wget or curl is required to download models.\n"
|
| 98 |
exit 1
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
if [ $? -ne 0 ]; then
|
| 103 |
+
printf "Failed to download ggml model %s \n" "$model"
|
| 104 |
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
| 105 |
exit 1
|
| 106 |
fi
|
| 107 |
|
| 108 |
+
|
| 109 |
+
printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
|
| 110 |
printf "You can now use it like this:\n\n"
|
| 111 |
+
printf " $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model"
|
| 112 |
printf "\n"
|
models/generate-coreml-interface.sh
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#!/bin/
|
| 2 |
#
|
| 3 |
# This generates:
|
| 4 |
# - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
|
|
@@ -6,7 +6,7 @@
|
|
| 6 |
#
|
| 7 |
|
| 8 |
wd=$(dirname "$0")
|
| 9 |
-
cd "$wd/../"
|
| 10 |
|
| 11 |
python3 models/convert-whisper-to-coreml.py --model tiny.en
|
| 12 |
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
#
|
| 3 |
# This generates:
|
| 4 |
# - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
|
|
|
|
| 6 |
#
|
| 7 |
|
| 8 |
wd=$(dirname "$0")
|
| 9 |
+
cd "$wd/../" || exit
|
| 10 |
|
| 11 |
python3 models/convert-whisper-to-coreml.py --model tiny.en
|
| 12 |
|
models/generate-coreml-model.sh
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#!/bin/
|
| 2 |
|
| 3 |
# Usage: ./generate-coreml-model.sh <model-name>
|
| 4 |
if [ $# -eq 0 ]; then
|
|
@@ -6,7 +6,7 @@ if [ $# -eq 0 ]; then
|
|
| 6 |
echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
|
| 7 |
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
|
| 8 |
exit 1
|
| 9 |
-
elif [
|
| 10 |
echo "No model name and model path supplied for a HuggingFace model"
|
| 11 |
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
|
| 12 |
exit 1
|
|
@@ -15,20 +15,20 @@ fi
|
|
| 15 |
mname="$1"
|
| 16 |
|
| 17 |
wd=$(dirname "$0")
|
| 18 |
-
cd "$wd/../"
|
| 19 |
|
| 20 |
-
if [
|
| 21 |
mname="$2"
|
| 22 |
mpath="$3"
|
| 23 |
-
echo $mpath
|
| 24 |
-
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
|
| 25 |
else
|
| 26 |
-
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True
|
| 27 |
fi
|
| 28 |
|
| 29 |
-
xcrun coremlc compile models/coreml-encoder
|
| 30 |
-
rm -rf models/ggml
|
| 31 |
-
mv -v models/coreml-encoder
|
| 32 |
|
| 33 |
# TODO: decoder (sometime in the future maybe)
|
| 34 |
#xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
|
| 3 |
# Usage: ./generate-coreml-model.sh <model-name>
|
| 4 |
if [ $# -eq 0 ]; then
|
|
|
|
| 6 |
echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
|
| 7 |
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
|
| 8 |
exit 1
|
| 9 |
+
elif [ "$1" = "-h5" ] && [ $# != 3 ]; then
|
| 10 |
echo "No model name and model path supplied for a HuggingFace model"
|
| 11 |
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
|
| 12 |
exit 1
|
|
|
|
| 15 |
mname="$1"
|
| 16 |
|
| 17 |
wd=$(dirname "$0")
|
| 18 |
+
cd "$wd/../" || exit
|
| 19 |
|
| 20 |
+
if [ "$mname" = "-h5" ]; then
|
| 21 |
mname="$2"
|
| 22 |
mpath="$3"
|
| 23 |
+
echo "$mpath"
|
| 24 |
+
python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True
|
| 25 |
else
|
| 26 |
+
python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True
|
| 27 |
fi
|
| 28 |
|
| 29 |
+
xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/
|
| 30 |
+
rm -rf models/ggml-"${mname}"-encoder.mlmodelc
|
| 31 |
+
mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc
|
| 32 |
|
| 33 |
# TODO: decoder (sometime in the future maybe)
|
| 34 |
#xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/
|