Spaces:
Running
Running
whisper : adapt to new ggml (wip)
Browse files- .gitignore +4 -0
- Makefile +101 -156
- Package.swift +11 -5
- examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt +6 -1
- examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj +35 -3
- examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift +0 -2
- ggml/ggml_vk_generate_shaders.py +0 -220
- src/ggml-cpu-impl.h +0 -614
- src/whisper.cpp +1 -4
.gitignore
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
*.o
|
| 2 |
*.a
|
|
|
|
| 3 |
.cache/
|
| 4 |
.coreml/
|
| 5 |
.test/
|
|
@@ -19,6 +20,9 @@ build-*/
|
|
| 19 |
.swiftpm
|
| 20 |
*.metallib
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
/main
|
| 23 |
/stream
|
| 24 |
/command
|
|
|
|
| 1 |
*.o
|
| 2 |
*.a
|
| 3 |
+
*.d
|
| 4 |
.cache/
|
| 5 |
.coreml/
|
| 6 |
.test/
|
|
|
|
| 20 |
.swiftpm
|
| 21 |
*.metallib
|
| 22 |
|
| 23 |
+
ggml-metal-embed.metal
|
| 24 |
+
ggml-metal-embed.metal.tmp
|
| 25 |
+
|
| 26 |
/main
|
| 27 |
/stream
|
| 28 |
/command
|
Makefile
CHANGED
|
@@ -444,17 +444,17 @@ endif
|
|
| 444 |
else
|
| 445 |
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
|
| 446 |
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
|
| 447 |
-
endif
|
| 448 |
|
| 449 |
ifndef GGML_NO_ACCELERATE
|
| 450 |
# Mac OS - include Accelerate framework.
|
| 451 |
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
| 452 |
ifeq ($(UNAME_S),Darwin)
|
| 453 |
-
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
| 454 |
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
| 455 |
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
| 456 |
MK_LDFLAGS += -framework Accelerate
|
| 457 |
-
OBJ_GGML += ggml/src/ggml-blas.o
|
| 458 |
endif
|
| 459 |
endif # GGML_NO_ACCELERATE
|
| 460 |
|
|
@@ -464,29 +464,38 @@ ifndef GGML_NO_OPENMP
|
|
| 464 |
MK_CXXFLAGS += -fopenmp
|
| 465 |
endif # GGML_NO_OPENMP
|
| 466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
ifdef GGML_OPENBLAS
|
| 468 |
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
| 469 |
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
| 470 |
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
| 471 |
-
OBJ_GGML += ggml/src/ggml-blas.o
|
| 472 |
endif # GGML_OPENBLAS
|
| 473 |
|
| 474 |
ifdef GGML_OPENBLAS64
|
| 475 |
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
| 476 |
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
| 477 |
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
| 478 |
-
OBJ_GGML += ggml/src/ggml-blas.o
|
| 479 |
endif # GGML_OPENBLAS64
|
| 480 |
|
| 481 |
ifdef GGML_BLIS
|
| 482 |
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
| 483 |
MK_LDFLAGS += -lblis -L/usr/local/lib
|
| 484 |
-
OBJ_GGML += ggml/src/ggml-blas.o
|
| 485 |
endif # GGML_BLIS
|
| 486 |
|
| 487 |
ifdef GGML_RPC
|
| 488 |
MK_CPPFLAGS += -DGGML_USE_RPC
|
| 489 |
-
OBJ_GGML += ggml/src/ggml-rpc.o
|
| 490 |
endif # GGML_RPC
|
| 491 |
|
| 492 |
OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
|
|
@@ -513,7 +522,7 @@ ifdef GGML_CUDA
|
|
| 513 |
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
| 514 |
MK_NVCCFLAGS += -use_fast_math
|
| 515 |
|
| 516 |
-
OBJ_GGML += ggml/src/ggml-cuda.o
|
| 517 |
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
| 518 |
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
| 519 |
ifdef WHISPER_FATAL_WARNINGS
|
|
@@ -615,11 +624,11 @@ ggml/src/ggml-cuda/%.o: \
|
|
| 615 |
ggml/src/ggml-cuda/common.cuh
|
| 616 |
$(NVCC_COMPILE)
|
| 617 |
|
| 618 |
-
ggml/src/ggml-cuda.o: \
|
| 619 |
-
ggml/src/ggml-cuda.cu \
|
|
|
|
| 620 |
ggml/include/ggml.h \
|
| 621 |
ggml/include/ggml-backend.h \
|
| 622 |
-
ggml/include/ggml-cuda.h \
|
| 623 |
ggml/src/ggml-backend-impl.h \
|
| 624 |
ggml/src/ggml-common.h \
|
| 625 |
$(wildcard ggml/src/ggml-cuda/*.cuh)
|
|
@@ -742,50 +751,43 @@ endif # GGML_HIPBLAS
|
|
| 742 |
ifdef GGML_METAL
|
| 743 |
MK_CPPFLAGS += -DGGML_USE_METAL
|
| 744 |
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
| 745 |
-
OBJ_GGML += ggml/src/ggml-metal.o
|
| 746 |
ifdef GGML_METAL_NDEBUG
|
| 747 |
MK_CPPFLAGS += -DGGML_METAL_NDEBUG
|
| 748 |
endif
|
| 749 |
|
| 750 |
ifdef GGML_METAL_EMBED_LIBRARY
|
| 751 |
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
|
| 752 |
-
OBJ_GGML += ggml/src/ggml-metal-embed.o
|
| 753 |
endif
|
| 754 |
endif # GGML_METAL
|
| 755 |
|
| 756 |
-
ifdef WHISPER_COREML
|
| 757 |
-
MK_CXXFLAGS += -DWHISPER_USE_COREML
|
| 758 |
-
LDFLAGS += -framework Foundation -framework CoreML
|
| 759 |
-
|
| 760 |
-
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
| 761 |
-
MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
| 762 |
-
endif
|
| 763 |
-
endif
|
| 764 |
-
|
| 765 |
-
# ===
|
| 766 |
-
|
| 767 |
ifdef GGML_METAL
|
| 768 |
-
ggml/src/ggml-metal.o: \
|
| 769 |
-
ggml/src/ggml-metal.m \
|
|
|
|
| 770 |
ggml/include/ggml-metal.h \
|
| 771 |
ggml/include/ggml.h
|
| 772 |
$(CC) $(CFLAGS) -c $< -o $@
|
| 773 |
|
| 774 |
ifdef GGML_METAL_EMBED_LIBRARY
|
| 775 |
-
ggml/src/ggml-metal-embed.o: \
|
| 776 |
-
ggml/src/ggml-metal.metal \
|
|
|
|
| 777 |
ggml/src/ggml-common.h
|
| 778 |
@echo "Embedding Metal library"
|
| 779 |
-
@sed -e '
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
@echo ".
|
| 783 |
-
@echo "_ggml_metallib_start
|
| 784 |
-
@echo "
|
| 785 |
-
@echo ".
|
| 786 |
-
@echo "_ggml_metallib_end
|
| 787 |
-
|
| 788 |
-
|
|
|
|
|
|
|
| 789 |
endif
|
| 790 |
endif # GGML_METAL
|
| 791 |
|
|
@@ -801,11 +803,17 @@ endif
|
|
| 801 |
|
| 802 |
OBJ_GGML += \
|
| 803 |
ggml/src/ggml.o \
|
| 804 |
-
ggml/src/ggml-
|
| 805 |
ggml/src/ggml-alloc.o \
|
| 806 |
ggml/src/ggml-backend.o \
|
|
|
|
|
|
|
| 807 |
ggml/src/ggml-quants.o \
|
| 808 |
-
ggml/src/ggml-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
|
| 810 |
OBJ_WHISPER += \
|
| 811 |
src/whisper.o
|
|
@@ -910,114 +918,64 @@ endif
|
|
| 910 |
# Build libraries
|
| 911 |
#
|
| 912 |
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
ggml/src/ggml.o: \
|
| 916 |
-
ggml/src/ggml.c \
|
| 917 |
-
ggml/include/ggml.h
|
| 918 |
-
$(CC) $(CFLAGS) -c $< -o $@
|
| 919 |
-
|
| 920 |
-
ggml/src/ggml-cpu.o: \
|
| 921 |
-
ggml/src/ggml-cpu.c \
|
| 922 |
-
ggml/include/ggml.h \
|
| 923 |
-
ggml/src/ggml-common.h
|
| 924 |
-
$(CC) $(CFLAGS) -c $< -o $@
|
| 925 |
-
|
| 926 |
-
ggml/src/ggml-alloc.o: \
|
| 927 |
-
ggml/src/ggml-alloc.c \
|
| 928 |
-
ggml/include/ggml.h \
|
| 929 |
-
ggml/include/ggml-alloc.h
|
| 930 |
-
$(CC) $(CFLAGS) -c $< -o $@
|
| 931 |
-
|
| 932 |
-
ggml/src/ggml-backend.o: \
|
| 933 |
-
ggml/src/ggml-backend.cpp \
|
| 934 |
-
ggml/include/ggml.h \
|
| 935 |
-
ggml/include/ggml-backend.h
|
| 936 |
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
| 937 |
-
|
| 938 |
-
ggml/src/ggml-quants.o: \
|
| 939 |
-
ggml/src/ggml-quants.c \
|
| 940 |
-
ggml/include/ggml.h \
|
| 941 |
-
ggml/src/ggml-quants.h \
|
| 942 |
-
ggml/src/ggml-common.h
|
| 943 |
-
$(CC) $(CFLAGS) -c $< -o $@
|
| 944 |
-
|
| 945 |
-
ggml/src/ggml-aarch64.o: \
|
| 946 |
-
ggml/src/ggml-aarch64.c \
|
| 947 |
-
ggml/include/ggml.h \
|
| 948 |
-
ggml/src/ggml-aarch64.h \
|
| 949 |
-
ggml/src/ggml-common.h
|
| 950 |
-
$(CC) $(CFLAGS) -c $< -o $@
|
| 951 |
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
ggml/include/ggml-blas.h
|
| 955 |
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
| 956 |
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
ggml/src/sgemm.cpp \
|
| 960 |
-
ggml/src/sgemm.h \
|
| 961 |
-
ggml/include/ggml.h
|
| 962 |
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
| 963 |
-
endif # GGML_LLAMAFILE
|
| 964 |
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
ggml/src/ggml-rpc.cpp \
|
| 968 |
-
ggml/include/ggml-rpc.h
|
| 969 |
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
| 970 |
-
endif # GGML_RPC
|
| 971 |
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 975 |
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
ar rcs $(LIB_GGML_S) $^
|
| 979 |
|
| 980 |
-
#
|
|
|
|
| 981 |
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
|
|
|
|
|
|
| 985 |
ggml/include/ggml.h \
|
| 986 |
ggml/include/ggml-alloc.h \
|
| 987 |
-
ggml/
|
| 988 |
-
ggml/include/ggml-
|
| 989 |
-
ggml/
|
| 990 |
-
$(CXX) $(CXXFLAGS)
|
| 991 |
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
$(
|
| 995 |
-
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 996 |
|
| 997 |
-
|
| 998 |
-
$(
|
| 999 |
-
$(OBJ_GGML)
|
| 1000 |
-
ar rcs $(LIB_WHISPER_S) $^
|
| 1001 |
|
| 1002 |
-
|
|
|
|
| 1003 |
|
| 1004 |
-
examples
|
| 1005 |
-
|
| 1006 |
-
examples/common.h
|
| 1007 |
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
| 1008 |
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
|
|
|
|
|
|
| 1013 |
|
| 1014 |
-
$(
|
| 1015 |
-
$(OBJ_COMMON)
|
| 1016 |
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 1017 |
|
| 1018 |
-
$(
|
| 1019 |
-
$(
|
| 1020 |
-
ar rcs $(LIB_COMMON_S) $^
|
| 1021 |
|
| 1022 |
# common-sdl
|
| 1023 |
|
|
@@ -1029,34 +987,21 @@ examples/common-sdl.o: \
|
|
| 1029 |
examples/common-sdl.h
|
| 1030 |
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
|
| 1031 |
|
| 1032 |
-
$(
|
| 1033 |
-
$(
|
| 1034 |
-
|
|
|
|
|
|
|
| 1035 |
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
ar rcs $(LIB_COMMON_SDL_S) $^
|
| 1039 |
|
|
|
|
| 1040 |
clean:
|
| 1041 |
-
rm -vrf
|
| 1042 |
-
rm -rvf
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
rm -rvf examples/*.o
|
| 1046 |
-
rm -rvf *.a
|
| 1047 |
-
rm -rvf *.dll
|
| 1048 |
-
rm -rvf *.so
|
| 1049 |
-
rm -rvf *.dot
|
| 1050 |
-
rm -rvf ggml/*.a
|
| 1051 |
-
rm -rvf ggml/*.dll
|
| 1052 |
-
rm -rvf ggml/*.so
|
| 1053 |
-
rm -vrf ggml/src/*.o
|
| 1054 |
-
rm -vrf ggml/src/ggml-metal-embed.metal
|
| 1055 |
-
rm -vrf ggml/src/ggml-cuda/*.o
|
| 1056 |
-
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
| 1057 |
-
rm -rvf $(BUILD_TARGETS)
|
| 1058 |
-
rm -rvf $(TEST_TARGETS)
|
| 1059 |
-
find examples -type f -name "*.o" -delete
|
| 1060 |
|
| 1061 |
#
|
| 1062 |
# Examples
|
|
|
|
| 444 |
else
|
| 445 |
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
|
| 446 |
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
|
| 447 |
+
endif # RISCV
|
| 448 |
|
| 449 |
ifndef GGML_NO_ACCELERATE
|
| 450 |
# Mac OS - include Accelerate framework.
|
| 451 |
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
| 452 |
ifeq ($(UNAME_S),Darwin)
|
| 453 |
+
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
|
| 454 |
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
| 455 |
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
| 456 |
MK_LDFLAGS += -framework Accelerate
|
| 457 |
+
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
| 458 |
endif
|
| 459 |
endif # GGML_NO_ACCELERATE
|
| 460 |
|
|
|
|
| 464 |
MK_CXXFLAGS += -fopenmp
|
| 465 |
endif # GGML_NO_OPENMP
|
| 466 |
|
| 467 |
+
ifdef WHISPER_COREML
|
| 468 |
+
MK_CXXFLAGS += -DWHISPER_USE_COREML
|
| 469 |
+
LDFLAGS += -framework Foundation -framework CoreML
|
| 470 |
+
|
| 471 |
+
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
| 472 |
+
MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
| 473 |
+
endif
|
| 474 |
+
endif # WHISPER_COREML
|
| 475 |
+
|
| 476 |
ifdef GGML_OPENBLAS
|
| 477 |
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
| 478 |
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
| 479 |
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
| 480 |
+
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
| 481 |
endif # GGML_OPENBLAS
|
| 482 |
|
| 483 |
ifdef GGML_OPENBLAS64
|
| 484 |
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
| 485 |
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
| 486 |
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
| 487 |
+
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
| 488 |
endif # GGML_OPENBLAS64
|
| 489 |
|
| 490 |
ifdef GGML_BLIS
|
| 491 |
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
| 492 |
MK_LDFLAGS += -lblis -L/usr/local/lib
|
| 493 |
+
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
| 494 |
endif # GGML_BLIS
|
| 495 |
|
| 496 |
ifdef GGML_RPC
|
| 497 |
MK_CPPFLAGS += -DGGML_USE_RPC
|
| 498 |
+
OBJ_GGML += ggml/src/ggml-rpc/ggml-rpc.o
|
| 499 |
endif # GGML_RPC
|
| 500 |
|
| 501 |
OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
|
|
|
|
| 522 |
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
| 523 |
MK_NVCCFLAGS += -use_fast_math
|
| 524 |
|
| 525 |
+
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
|
| 526 |
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
| 527 |
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
| 528 |
ifdef WHISPER_FATAL_WARNINGS
|
|
|
|
| 624 |
ggml/src/ggml-cuda/common.cuh
|
| 625 |
$(NVCC_COMPILE)
|
| 626 |
|
| 627 |
+
ggml/src/ggml-cuda/ggml-cuda.o: \
|
| 628 |
+
ggml/src/ggml-cuda/ggml-cuda.cu \
|
| 629 |
+
ggml/include/ggml-cuda.h \
|
| 630 |
ggml/include/ggml.h \
|
| 631 |
ggml/include/ggml-backend.h \
|
|
|
|
| 632 |
ggml/src/ggml-backend-impl.h \
|
| 633 |
ggml/src/ggml-common.h \
|
| 634 |
$(wildcard ggml/src/ggml-cuda/*.cuh)
|
|
|
|
| 751 |
ifdef GGML_METAL
|
| 752 |
MK_CPPFLAGS += -DGGML_USE_METAL
|
| 753 |
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
| 754 |
+
OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o
|
| 755 |
ifdef GGML_METAL_NDEBUG
|
| 756 |
MK_CPPFLAGS += -DGGML_METAL_NDEBUG
|
| 757 |
endif
|
| 758 |
|
| 759 |
ifdef GGML_METAL_EMBED_LIBRARY
|
| 760 |
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
|
| 761 |
+
OBJ_GGML += ggml/src/ggml-metal/ggml-metal-embed.o
|
| 762 |
endif
|
| 763 |
endif # GGML_METAL
|
| 764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 765 |
ifdef GGML_METAL
|
| 766 |
+
ggml/src/ggml-metal/ggml-metal.o: \
|
| 767 |
+
ggml/src/ggml-metal/ggml-metal.m \
|
| 768 |
+
ggml/src/ggml-metal/ggml-metal-impl.h \
|
| 769 |
ggml/include/ggml-metal.h \
|
| 770 |
ggml/include/ggml.h
|
| 771 |
$(CC) $(CFLAGS) -c $< -o $@
|
| 772 |
|
| 773 |
ifdef GGML_METAL_EMBED_LIBRARY
|
| 774 |
+
ggml/src/ggml-metal/ggml-metal-embed.o: \
|
| 775 |
+
ggml/src/ggml-metal/ggml-metal.metal \
|
| 776 |
+
ggml/src/ggml-metal/ggml-metal-impl.h \
|
| 777 |
ggml/src/ggml-common.h
|
| 778 |
@echo "Embedding Metal library"
|
| 779 |
+
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
|
| 780 |
+
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
|
| 781 |
+
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
| 782 |
+
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
| 783 |
+
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
| 784 |
+
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
| 785 |
+
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
| 786 |
+
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
| 787 |
+
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
| 788 |
+
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
|
| 789 |
+
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
|
| 790 |
+
@rmdir ${TEMP_ASSEMBLY}
|
| 791 |
endif
|
| 792 |
endif # GGML_METAL
|
| 793 |
|
|
|
|
| 803 |
|
| 804 |
OBJ_GGML += \
|
| 805 |
ggml/src/ggml.o \
|
| 806 |
+
ggml/src/ggml-aarch64.o \
|
| 807 |
ggml/src/ggml-alloc.o \
|
| 808 |
ggml/src/ggml-backend.o \
|
| 809 |
+
ggml/src/ggml-backend-reg.o \
|
| 810 |
+
ggml/src/ggml-opt.o \
|
| 811 |
ggml/src/ggml-quants.o \
|
| 812 |
+
ggml/src/ggml-threading.o \
|
| 813 |
+
ggml/src/ggml-cpu/ggml-cpu.o \
|
| 814 |
+
ggml/src/ggml-cpu/ggml-cpu-cpp.o \
|
| 815 |
+
ggml/src/ggml-cpu/ggml-cpu-aarch64.o \
|
| 816 |
+
ggml/src/ggml-cpu/ggml-cpu-quants.o
|
| 817 |
|
| 818 |
OBJ_WHISPER += \
|
| 819 |
src/whisper.o
|
|
|
|
| 918 |
# Build libraries
|
| 919 |
#
|
| 920 |
|
| 921 |
+
LIB_GGML = libggml.so
|
| 922 |
+
LIB_GGML_S = libggml.a
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 923 |
|
| 924 |
+
LIB_LLAMA = libllama.so
|
| 925 |
+
LIB_LLAMA_S = libllama.a
|
|
|
|
|
|
|
| 926 |
|
| 927 |
+
LIB_COMMON = libcommon.so
|
| 928 |
+
LIB_COMMON_S = libcommon.a
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
|
| 930 |
+
LIB_COMMON_SDL = libcommon-sdl.so
|
| 931 |
+
LIB_COMMON_SDL_S = libcommon-sdl.a
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
|
| 933 |
+
# Targets
|
| 934 |
+
BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S)
|
|
|
|
| 935 |
|
| 936 |
+
# Dependency files
|
| 937 |
+
DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
|
|
|
|
| 938 |
|
| 939 |
+
# Default target
|
| 940 |
+
all: $(BUILD_TARGETS)
|
| 941 |
|
| 942 |
+
# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
|
| 943 |
+
# g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
|
| 944 |
+
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
|
| 945 |
+
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
| 946 |
+
ggml/include/ggml-backend.h \
|
| 947 |
ggml/include/ggml.h \
|
| 948 |
ggml/include/ggml-alloc.h \
|
| 949 |
+
ggml/src/ggml-backend-impl.h \
|
| 950 |
+
ggml/include/ggml-cpu.h \
|
| 951 |
+
ggml/src/ggml-impl.h
|
| 952 |
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
| 953 |
|
| 954 |
+
# Rules for building object files
|
| 955 |
+
ggml/%.o: ggml/%.c
|
| 956 |
+
$(CC) $(CFLAGS) -MMD -c $< -o $@
|
|
|
|
| 957 |
|
| 958 |
+
ggml/%.o: ggml/%.cpp
|
| 959 |
+
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
|
|
|
|
|
|
|
| 960 |
|
| 961 |
+
src/%.o: src/%.cpp
|
| 962 |
+
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
|
| 963 |
|
| 964 |
+
examples/%.o: examples/%.cpp
|
| 965 |
+
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
|
|
|
|
|
|
|
| 966 |
|
| 967 |
+
# Rules for building libraries
|
| 968 |
+
$(LIB_GGML): $(OBJ_GGML)
|
| 969 |
+
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 970 |
+
|
| 971 |
+
$(LIB_GGML_S): $(OBJ_GGML)
|
| 972 |
+
ar rcs $(LIB_GGML_S) $^
|
| 973 |
|
| 974 |
+
$(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML)
|
|
|
|
| 975 |
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 976 |
|
| 977 |
+
$(LIB_LLAMA_S): $(OBJ_LLAMA)
|
| 978 |
+
ar rcs $(LIB_LLAMA_S) $^
|
|
|
|
| 979 |
|
| 980 |
# common-sdl
|
| 981 |
|
|
|
|
| 987 |
examples/common-sdl.h
|
| 988 |
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
|
| 989 |
|
| 990 |
+
$(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML)
|
| 991 |
+
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 992 |
+
|
| 993 |
+
$(LIB_COMMON_S): $(OBJ_COMMON)
|
| 994 |
+
ar rcs $(LIB_COMMON_S) $^
|
| 995 |
|
| 996 |
+
# Include dependency files
|
| 997 |
+
-include $(DEP_FILES)
|
|
|
|
| 998 |
|
| 999 |
+
# Clean rule
|
| 1000 |
clean:
|
| 1001 |
+
rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
|
| 1002 |
+
rm -rvf *.a *.dll *.so *.dot
|
| 1003 |
+
find ggml src tests examples -type f -name "*.o" -delete
|
| 1004 |
+
find ggml src tests examples -type f -name "*.d" -delete
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1005 |
|
| 1006 |
#
|
| 1007 |
# Examples
|
Package.swift
CHANGED
|
@@ -28,7 +28,7 @@ let package = Package(
|
|
| 28 |
"tests",
|
| 29 |
"CMakeLists.txt",
|
| 30 |
"Makefile",
|
| 31 |
-
"ggml/src/ggml-metal-embed.metal"
|
| 32 |
],
|
| 33 |
sources: [
|
| 34 |
"ggml/src/ggml.c",
|
|
@@ -36,16 +36,22 @@ let package = Package(
|
|
| 36 |
"ggml/src/ggml-aarch64.c",
|
| 37 |
"ggml/src/ggml-alloc.c",
|
| 38 |
"ggml/src/ggml-backend.cpp",
|
| 39 |
-
"ggml/src/ggml-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"ggml/src/ggml-quants.c",
|
| 41 |
-
"ggml/src/ggml-
|
|
|
|
| 42 |
],
|
| 43 |
-
resources: [.process("ggml/src/ggml-metal.metal")],
|
| 44 |
publicHeadersPath: "spm-headers",
|
| 45 |
cSettings: [
|
| 46 |
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
| 47 |
-
.define("GGML_USE_ACCELERATE"),
|
| 48 |
.unsafeFlags(["-fno-objc-arc"]),
|
|
|
|
|
|
|
| 49 |
.define("GGML_USE_METAL")
|
| 50 |
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
| 51 |
// We should consider add this in the future when we drop support for iOS 14
|
|
|
|
| 28 |
"tests",
|
| 29 |
"CMakeLists.txt",
|
| 30 |
"Makefile",
|
| 31 |
+
"ggml/src/ggml-metal/ggml-metal-embed.metal"
|
| 32 |
],
|
| 33 |
sources: [
|
| 34 |
"ggml/src/ggml.c",
|
|
|
|
| 36 |
"ggml/src/ggml-aarch64.c",
|
| 37 |
"ggml/src/ggml-alloc.c",
|
| 38 |
"ggml/src/ggml-backend.cpp",
|
| 39 |
+
"ggml/src/ggml-backend-reg.cpp",
|
| 40 |
+
"ggml/src/ggml-cpu/ggml-cpu.c",
|
| 41 |
+
"ggml/src/ggml-cpu/ggml-cpu.cpp",
|
| 42 |
+
"ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
|
| 43 |
+
"ggml/src/ggml-cpu/ggml-cpu-quants.c",
|
| 44 |
"ggml/src/ggml-quants.c",
|
| 45 |
+
"ggml/src/ggml-threading.cpp",
|
| 46 |
+
"ggml/src/ggml-metal/ggml-metal.m"
|
| 47 |
],
|
| 48 |
+
resources: [.process("ggml/src/ggml-metal/ggml-metal.metal")],
|
| 49 |
publicHeadersPath: "spm-headers",
|
| 50 |
cSettings: [
|
| 51 |
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
|
|
|
| 52 |
.unsafeFlags(["-fno-objc-arc"]),
|
| 53 |
+
.headerSearchPath("ggml/src"),
|
| 54 |
+
.define("GGML_USE_ACCELERATE"),
|
| 55 |
.define("GGML_USE_METAL")
|
| 56 |
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
| 57 |
// We should consider add this in the future when we drop support for iOS 14
|
examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
CHANGED
|
@@ -19,11 +19,16 @@ if (NOT GGML_HOME)
|
|
| 19 |
SOURCE_FILES
|
| 20 |
${SOURCE_FILES}
|
| 21 |
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
| 22 |
-
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu.c
|
| 23 |
${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
|
| 24 |
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
| 25 |
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
|
|
|
|
| 26 |
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
)
|
| 28 |
endif()
|
| 29 |
|
|
|
|
| 19 |
SOURCE_FILES
|
| 20 |
${SOURCE_FILES}
|
| 21 |
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
|
|
|
| 22 |
${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
|
| 23 |
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
| 24 |
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
|
| 25 |
+
${WHISPER_LIB_DIR}/ggml/src/ggml-backend-reg.cpp
|
| 26 |
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
| 27 |
+
${WHISPER_LIB_DIR}/ggml/src/ggml-threading.cpp
|
| 28 |
+
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.c
|
| 29 |
+
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.cpp
|
| 30 |
+
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-aarch64.c
|
| 31 |
+
${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
|
| 32 |
)
|
| 33 |
endif()
|
| 34 |
|
examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
CHANGED
|
@@ -25,6 +25,11 @@
|
|
| 25 |
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
|
| 26 |
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
| 27 |
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
|
| 29 |
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
|
| 30 |
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
|
|
@@ -50,8 +55,8 @@
|
|
| 50 |
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-aarch64.c"; path = "../../../ggml/src/ggml-aarch64.c"; sourceTree = "<group>"; };
|
| 51 |
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
|
| 52 |
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
|
| 53 |
-
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal.m"; sourceTree = "<group>"; };
|
| 54 |
-
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal.metal"; sourceTree = "<group>"; };
|
| 55 |
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
| 56 |
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
| 57 |
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
|
|
@@ -77,8 +82,17 @@
|
|
| 77 |
18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
|
| 78 |
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
|
| 79 |
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
|
| 80 |
-
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu.c"; sourceTree = "<group>"; };
|
| 81 |
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
|
| 83 |
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
|
| 84 |
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
|
|
@@ -118,6 +132,15 @@
|
|
| 118 |
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
|
| 119 |
isa = PBXGroup;
|
| 120 |
children = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
|
| 122 |
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
|
| 123 |
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */,
|
|
@@ -252,11 +275,16 @@
|
|
| 252 |
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
|
| 253 |
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
|
| 254 |
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
|
|
|
|
|
|
|
| 255 |
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
|
|
|
|
|
|
|
| 256 |
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
|
| 257 |
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
|
| 258 |
18627C8C29052BE000BD2A04 /* main.m in Sources */,
|
| 259 |
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
|
|
|
|
| 260 |
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
|
| 261 |
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
|
| 262 |
);
|
|
@@ -335,6 +363,7 @@
|
|
| 335 |
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
| 336 |
GCC_WARN_UNUSED_FUNCTION = YES;
|
| 337 |
GCC_WARN_UNUSED_VARIABLE = YES;
|
|
|
|
| 338 |
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
| 339 |
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
| 340 |
MTL_FAST_MATH = YES;
|
|
@@ -388,6 +417,7 @@
|
|
| 388 |
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
| 389 |
GCC_WARN_UNUSED_FUNCTION = YES;
|
| 390 |
GCC_WARN_UNUSED_VARIABLE = YES;
|
|
|
|
| 391 |
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
| 392 |
MTL_ENABLE_DEBUG_INFO = NO;
|
| 393 |
MTL_FAST_MATH = YES;
|
|
@@ -410,6 +440,7 @@
|
|
| 410 |
DEVELOPMENT_TEAM = P8JZH34X63;
|
| 411 |
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
|
| 412 |
GENERATE_INFOPLIST_FILE = YES;
|
|
|
|
| 413 |
INFOPLIST_FILE = whisper.objc/Info.plist;
|
| 414 |
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
| 415 |
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
|
@@ -439,6 +470,7 @@
|
|
| 439 |
DEVELOPMENT_TEAM = P8JZH34X63;
|
| 440 |
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
|
| 441 |
GENERATE_INFOPLIST_FILE = YES;
|
|
|
|
| 442 |
INFOPLIST_FILE = whisper.objc/Info.plist;
|
| 443 |
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
| 444 |
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
|
|
|
| 25 |
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
|
| 26 |
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
| 27 |
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
|
| 28 |
+
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */; };
|
| 29 |
+
18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */; };
|
| 30 |
+
18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */; };
|
| 31 |
+
18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */; };
|
| 32 |
+
18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */; };
|
| 33 |
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
|
| 34 |
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
|
| 35 |
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
|
|
|
|
| 55 |
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-aarch64.c"; path = "../../../ggml/src/ggml-aarch64.c"; sourceTree = "<group>"; };
|
| 56 |
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
|
| 57 |
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
|
| 58 |
+
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
|
| 59 |
+
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
|
| 60 |
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
| 61 |
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
| 62 |
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
|
|
|
|
| 82 |
18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
|
| 83 |
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
|
| 84 |
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
|
| 85 |
+
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.c"; sourceTree = "<group>"; };
|
| 86 |
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
|
| 87 |
+
18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-threading.h"; path = "../../../ggml/src/ggml-threading.h"; sourceTree = "<group>"; };
|
| 88 |
+
18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-threading.cpp"; path = "../../../ggml/src/ggml-threading.cpp"; sourceTree = "<group>"; };
|
| 89 |
+
18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.cpp"; sourceTree = "<group>"; };
|
| 90 |
+
18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-aarch64.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.h"; sourceTree = "<group>"; };
|
| 91 |
+
18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-aarch64.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.c"; sourceTree = "<group>"; };
|
| 92 |
+
18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-impl.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-impl.h"; sourceTree = "<group>"; };
|
| 93 |
+
18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-quants.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.h"; sourceTree = "<group>"; };
|
| 94 |
+
18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-quants.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.c"; sourceTree = "<group>"; };
|
| 95 |
+
18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-backend-reg.cpp"; path = "../../../ggml/src/ggml-backend-reg.cpp"; sourceTree = "<group>"; };
|
| 96 |
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
|
| 97 |
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
|
| 98 |
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
|
|
|
|
| 132 |
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
|
| 133 |
isa = PBXGroup;
|
| 134 |
children = (
|
| 135 |
+
18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */,
|
| 136 |
+
18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */,
|
| 137 |
+
18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */,
|
| 138 |
+
18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */,
|
| 139 |
+
18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */,
|
| 140 |
+
18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */,
|
| 141 |
+
18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */,
|
| 142 |
+
18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */,
|
| 143 |
+
18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */,
|
| 144 |
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
|
| 145 |
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
|
| 146 |
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */,
|
|
|
|
| 275 |
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
|
| 276 |
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
|
| 277 |
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
|
| 278 |
+
18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */,
|
| 279 |
+
18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */,
|
| 280 |
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
|
| 281 |
+
18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */,
|
| 282 |
+
18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */,
|
| 283 |
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
|
| 284 |
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
|
| 285 |
18627C8C29052BE000BD2A04 /* main.m in Sources */,
|
| 286 |
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
|
| 287 |
+
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
|
| 288 |
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
|
| 289 |
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
|
| 290 |
);
|
|
|
|
| 363 |
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
| 364 |
GCC_WARN_UNUSED_FUNCTION = YES;
|
| 365 |
GCC_WARN_UNUSED_VARIABLE = YES;
|
| 366 |
+
HEADER_SEARCH_PATHS = "";
|
| 367 |
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
| 368 |
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
| 369 |
MTL_FAST_MATH = YES;
|
|
|
|
| 417 |
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
| 418 |
GCC_WARN_UNUSED_FUNCTION = YES;
|
| 419 |
GCC_WARN_UNUSED_VARIABLE = YES;
|
| 420 |
+
HEADER_SEARCH_PATHS = "";
|
| 421 |
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
| 422 |
MTL_ENABLE_DEBUG_INFO = NO;
|
| 423 |
MTL_FAST_MATH = YES;
|
|
|
|
| 440 |
DEVELOPMENT_TEAM = P8JZH34X63;
|
| 441 |
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
|
| 442 |
GENERATE_INFOPLIST_FILE = YES;
|
| 443 |
+
HEADER_SEARCH_PATHS = ../../../ggml/src/;
|
| 444 |
INFOPLIST_FILE = whisper.objc/Info.plist;
|
| 445 |
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
| 446 |
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
|
|
|
| 470 |
DEVELOPMENT_TEAM = P8JZH34X63;
|
| 471 |
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
|
| 472 |
GENERATE_INFOPLIST_FILE = YES;
|
| 473 |
+
HEADER_SEARCH_PATHS = ../../../ggml/src/;
|
| 474 |
INFOPLIST_FILE = whisper.objc/Info.plist;
|
| 475 |
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
| 476 |
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
CHANGED
|
@@ -67,8 +67,6 @@ actor WhisperContext {
|
|
| 67 |
private func systemInfo() -> String {
|
| 68 |
var info = ""
|
| 69 |
if (ggml_cpu_has_neon() != 0) { info += "NEON " }
|
| 70 |
-
if (ggml_cpu_has_metal() != 0) { info += "METAL " }
|
| 71 |
-
if (ggml_cpu_has_blas() != 0) { info += "BLAS " }
|
| 72 |
return String(info.dropLast())
|
| 73 |
}
|
| 74 |
|
|
|
|
| 67 |
private func systemInfo() -> String {
|
| 68 |
var info = ""
|
| 69 |
if (ggml_cpu_has_neon() != 0) { info += "NEON " }
|
|
|
|
|
|
|
| 70 |
return String(info.dropLast())
|
| 71 |
}
|
| 72 |
|
ggml/ggml_vk_generate_shaders.py
DELETED
|
@@ -1,220 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python
|
| 2 |
-
|
| 3 |
-
import logging
|
| 4 |
-
import argparse
|
| 5 |
-
import asyncio
|
| 6 |
-
import os
|
| 7 |
-
from tempfile import gettempdir
|
| 8 |
-
|
| 9 |
-
logger = logging.getLogger("ggml-vk-generate-shaders")
|
| 10 |
-
|
| 11 |
-
GLSLC = "glslc"
|
| 12 |
-
|
| 13 |
-
type_names = [
|
| 14 |
-
"f32",
|
| 15 |
-
"f16",
|
| 16 |
-
"q4_0",
|
| 17 |
-
"q4_1",
|
| 18 |
-
"q5_0",
|
| 19 |
-
"q5_1",
|
| 20 |
-
"q8_0",
|
| 21 |
-
"q2_k",
|
| 22 |
-
"q3_k",
|
| 23 |
-
"q4_k",
|
| 24 |
-
"q5_k",
|
| 25 |
-
"q6_k",
|
| 26 |
-
]
|
| 27 |
-
|
| 28 |
-
ASYNCIO_CONCURRENCY = 64
|
| 29 |
-
|
| 30 |
-
input_dir = "vulkan-shaders"
|
| 31 |
-
output_dir = gettempdir()
|
| 32 |
-
|
| 33 |
-
lock = asyncio.Lock()
|
| 34 |
-
shader_fnames = []
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
async def string_to_spv(name, in_fname, defines, fp16=True):
|
| 38 |
-
name = f"{name}{'_fp32' if not fp16 else ''}"
|
| 39 |
-
out_fname = os.path.join(output_dir, f"{name}.spv")
|
| 40 |
-
|
| 41 |
-
in_path = os.path.join(input_dir, in_fname)
|
| 42 |
-
|
| 43 |
-
cmd = [GLSLC, "-fshader-stage=compute", "--target-env=vulkan1.2", "-O", in_path, "-o", out_fname]
|
| 44 |
-
|
| 45 |
-
cmd.extend([f"-D{key}={value}" for key, value in defines.items()])
|
| 46 |
-
|
| 47 |
-
proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
|
| 48 |
-
|
| 49 |
-
stdout, stderr = await proc.communicate()
|
| 50 |
-
|
| 51 |
-
stdout = stdout.decode()
|
| 52 |
-
error = stderr.decode()
|
| 53 |
-
|
| 54 |
-
if proc.returncode:
|
| 55 |
-
cmd = " ".join(cmd)
|
| 56 |
-
logger.error(f"cannot compile {name}\n\n{cmd}\n\n{error}")
|
| 57 |
-
return
|
| 58 |
-
|
| 59 |
-
async with lock:
|
| 60 |
-
shader_fnames.append((name, out_fname))
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
def matmul_shaders(tasks, fp16, matmul_id):
|
| 64 |
-
if fp16:
|
| 65 |
-
load_vec = "8"
|
| 66 |
-
aligned_b_type_f32 = "mat2x4"
|
| 67 |
-
aligned_b_type_f16 = "f16mat2x4"
|
| 68 |
-
else:
|
| 69 |
-
load_vec = "4"
|
| 70 |
-
aligned_b_type_f32 = "vec4"
|
| 71 |
-
aligned_b_type_f16 = "f16vec4"
|
| 72 |
-
|
| 73 |
-
base_dict = {"FLOAT_TYPE": "float" if not fp16 else "float16_t"}
|
| 74 |
-
shader_name = "matmul"
|
| 75 |
-
|
| 76 |
-
if matmul_id:
|
| 77 |
-
base_dict["MUL_MAT_ID"] = "1"
|
| 78 |
-
shader_name = "matmul_id"
|
| 79 |
-
|
| 80 |
-
if fp16:
|
| 81 |
-
base_dict["FLOAT16"] = "1"
|
| 82 |
-
|
| 83 |
-
# Shaders with f16 B_TYPE
|
| 84 |
-
tasks.append(string_to_spv(f"{shader_name}_f32_f16", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
|
| 85 |
-
tasks.append(string_to_spv(f"{shader_name}_f32_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
|
| 86 |
-
|
| 87 |
-
tasks.append(string_to_spv(f"{shader_name}_f16", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
|
| 88 |
-
tasks.append(string_to_spv(f"{shader_name}_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
|
| 89 |
-
|
| 90 |
-
for tname in type_names:
|
| 91 |
-
data_a_key = f"DATA_A_{tname.upper()}"
|
| 92 |
-
load_vec_a = load_vec if tname in ("f32", "f16") else "2"
|
| 93 |
-
tasks.append(string_to_spv(f"{shader_name}_{tname}_f32", "mul_mm.comp", base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
| 94 |
-
tasks.append(string_to_spv(f"{shader_name}_{tname}_f32_aligned", "mul_mm.comp", base_dict | {data_a_key: "2", "LOAD_VEC_A": load_vec_a, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f32, "D_TYPE": "float"}, fp16))
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
async def main():
|
| 98 |
-
logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V")
|
| 99 |
-
|
| 100 |
-
tasks = []
|
| 101 |
-
|
| 102 |
-
for fp16 in (False, True):
|
| 103 |
-
# MUL_MAT
|
| 104 |
-
matmul_shaders(tasks, fp16, False)
|
| 105 |
-
# MUL_MAT_ID
|
| 106 |
-
matmul_shaders(tasks, fp16, True)
|
| 107 |
-
|
| 108 |
-
for tname in type_names:
|
| 109 |
-
base_dict = {"FLOAT_TYPE": "float"}
|
| 110 |
-
|
| 111 |
-
# mul mat vec
|
| 112 |
-
data_a_key = f"DATA_A_{tname.upper()}"
|
| 113 |
-
shader = f"mul_mat_vec_{tname}.comp" if tname.endswith("_k") else "mul_mat_vec.comp"
|
| 114 |
-
|
| 115 |
-
tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f32_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
|
| 116 |
-
tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f16_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float16_t", "D_TYPE": "float"}))
|
| 117 |
-
|
| 118 |
-
tasks.append(string_to_spv(f"mul_mat_vec_id_{tname}_f32", shader, base_dict | {"MUL_MAT_ID": "1", data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
|
| 119 |
-
|
| 120 |
-
# Dequant shaders
|
| 121 |
-
if tname != "f16":
|
| 122 |
-
tasks.append(string_to_spv(f"dequant_{tname}", f"dequant_{tname}.comp", base_dict | {data_a_key: "1", "D_TYPE": "float16_t"}))
|
| 123 |
-
|
| 124 |
-
# get_rows
|
| 125 |
-
if not tname.endswith("_k"):
|
| 126 |
-
shader = "get_rows.comp" if tname in ("f32", "f16") else "get_rows_quant.comp"
|
| 127 |
-
|
| 128 |
-
if tname == "f16":
|
| 129 |
-
tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
| 130 |
-
else:
|
| 131 |
-
tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t"}))
|
| 132 |
-
tasks.append(string_to_spv(f"get_rows_{tname}_f32", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float"}))
|
| 133 |
-
|
| 134 |
-
tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
| 135 |
-
tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
| 136 |
-
|
| 137 |
-
# Norms
|
| 138 |
-
tasks.append(string_to_spv("norm_f32", "norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 139 |
-
tasks.append(string_to_spv("rms_norm_f32", "rms_norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 140 |
-
|
| 141 |
-
tasks.append(string_to_spv("cpy_f32_f32", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 142 |
-
tasks.append(string_to_spv("cpy_f32_f16", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
| 143 |
-
tasks.append(string_to_spv("cpy_f16_f16", "copy.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
| 144 |
-
|
| 145 |
-
tasks.append(string_to_spv("add_f32", "add.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
| 146 |
-
|
| 147 |
-
tasks.append(string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {}))
|
| 148 |
-
|
| 149 |
-
tasks.append(string_to_spv("mul_f32", "mul.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
| 150 |
-
|
| 151 |
-
tasks.append(string_to_spv("div_f32", "div.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
| 152 |
-
|
| 153 |
-
tasks.append(string_to_spv("scale_f32", "scale.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
| 154 |
-
|
| 155 |
-
tasks.append(string_to_spv("sqr_f32", "square.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
| 156 |
-
|
| 157 |
-
tasks.append(string_to_spv("clamp_f32", "clamp.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
| 158 |
-
|
| 159 |
-
tasks.append(string_to_spv("gelu_f32", "gelu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 160 |
-
tasks.append(string_to_spv("silu_f32", "silu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 161 |
-
tasks.append(string_to_spv("relu_f32", "relu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 162 |
-
|
| 163 |
-
tasks.append(string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 164 |
-
|
| 165 |
-
tasks.append(string_to_spv("soft_max_f32", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}))
|
| 166 |
-
tasks.append(string_to_spv("soft_max_f32_f16", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float16_t", "D_TYPE": "float"}))
|
| 167 |
-
|
| 168 |
-
tasks.append(string_to_spv("rope_norm_f32", "rope_norm.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 169 |
-
tasks.append(string_to_spv("rope_norm_f16", "rope_norm.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
| 170 |
-
|
| 171 |
-
tasks.append(string_to_spv("rope_neox_f32", "rope_neox.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 172 |
-
tasks.append(string_to_spv("rope_neox_f16", "rope_neox.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
| 173 |
-
|
| 174 |
-
tasks.append(string_to_spv("argsort_f32", "argsort.comp", {"A_TYPE": "float"}))
|
| 175 |
-
|
| 176 |
-
tasks.append(string_to_spv("sum_rows_f32", "sum_rows.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
| 177 |
-
|
| 178 |
-
# Helper to decorate tasks with semaphore acquisition.
|
| 179 |
-
async def withSemaphore(sem, task):
|
| 180 |
-
async with sem:
|
| 181 |
-
return await task
|
| 182 |
-
|
| 183 |
-
# Run tasks concurrently guarded by a concurrency limit.
|
| 184 |
-
sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY)
|
| 185 |
-
await asyncio.gather(*(withSemaphore(sem, task) for task in tasks))
|
| 186 |
-
|
| 187 |
-
with open("ggml-vulkan-shaders.hpp", "w") as f:
|
| 188 |
-
f.write("#include <cstdint>\n\n")
|
| 189 |
-
for name, path in sorted(shader_fnames):
|
| 190 |
-
|
| 191 |
-
with open(path, "rb") as spv:
|
| 192 |
-
counter = 0
|
| 193 |
-
newline_counter = 0
|
| 194 |
-
f.write(f"unsigned char {name}_data[] = {{\n")
|
| 195 |
-
for val in spv.read():
|
| 196 |
-
f.write(f"0x{val:02x},")
|
| 197 |
-
newline_counter += 1
|
| 198 |
-
counter += 1
|
| 199 |
-
if newline_counter >= 12:
|
| 200 |
-
newline_counter = 0
|
| 201 |
-
f.write("\n")
|
| 202 |
-
f.write("\n};\n")
|
| 203 |
-
f.write(f"const uint64_t {name}_len = {counter};\n\n")
|
| 204 |
-
os.remove(path)
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
if __name__ == "__main__":
|
| 208 |
-
parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator")
|
| 209 |
-
|
| 210 |
-
parser.add_argument("--glslc", help="Path to glslc")
|
| 211 |
-
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
| 212 |
-
|
| 213 |
-
args = parser.parse_args()
|
| 214 |
-
|
| 215 |
-
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
| 216 |
-
|
| 217 |
-
if args.glslc:
|
| 218 |
-
GLSLC = args.glslc
|
| 219 |
-
|
| 220 |
-
asyncio.run(main())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ggml-cpu-impl.h
DELETED
|
@@ -1,614 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
// GGML CPU internal header
|
| 4 |
-
|
| 5 |
-
#include "ggml.h"
|
| 6 |
-
#include "ggml-impl.h"
|
| 7 |
-
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
| 8 |
-
//#include <stddef.h>
|
| 9 |
-
#include <stdbool.h>
|
| 10 |
-
#include <string.h> // memcpy
|
| 11 |
-
#include <math.h> // fabsf
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
#ifdef __cplusplus
|
| 15 |
-
extern "C" {
|
| 16 |
-
#endif
|
| 17 |
-
|
| 18 |
-
#if defined(_MSC_VER)
|
| 19 |
-
|
| 20 |
-
#define m512bh(p) p
|
| 21 |
-
#define m512i(p) p
|
| 22 |
-
|
| 23 |
-
#else
|
| 24 |
-
|
| 25 |
-
#define m512bh(p) (__m512bh)(p)
|
| 26 |
-
#define m512i(p) (__m512i)(p)
|
| 27 |
-
|
| 28 |
-
#endif
|
| 29 |
-
|
| 30 |
-
/**
|
| 31 |
-
* Converts brain16 to float32.
|
| 32 |
-
*
|
| 33 |
-
* The bfloat16 floating point format has the following structure:
|
| 34 |
-
*
|
| 35 |
-
* ┌sign
|
| 36 |
-
* │
|
| 37 |
-
* │ ┌exponent
|
| 38 |
-
* │ │
|
| 39 |
-
* │ │ ┌mantissa
|
| 40 |
-
* │ │ │
|
| 41 |
-
* │┌──┴───┐┌─┴───┐
|
| 42 |
-
* 0b0000000000000000 brain16
|
| 43 |
-
*
|
| 44 |
-
* Since bf16 has the same number of exponent bits as a 32bit float,
|
| 45 |
-
* encoding and decoding numbers becomes relatively straightforward.
|
| 46 |
-
*
|
| 47 |
-
* ┌sign
|
| 48 |
-
* │
|
| 49 |
-
* │ ┌exponent
|
| 50 |
-
* │ │
|
| 51 |
-
* │ │ ┌mantissa
|
| 52 |
-
* │ │ │
|
| 53 |
-
* │┌──┴───┐┌─┴───────────────────┐
|
| 54 |
-
* 0b00000000000000000000000000000000 IEEE binary32
|
| 55 |
-
*
|
| 56 |
-
* For comparison, the standard fp16 format has fewer exponent bits.
|
| 57 |
-
*
|
| 58 |
-
* ┌sign
|
| 59 |
-
* │
|
| 60 |
-
* │ ┌exponent
|
| 61 |
-
* │ │
|
| 62 |
-
* │ │ ┌mantissa
|
| 63 |
-
* │ │ │
|
| 64 |
-
* │┌─┴─┐┌─┴──────┐
|
| 65 |
-
* 0b0000000000000000 IEEE binary16
|
| 66 |
-
*
|
| 67 |
-
* @see IEEE 754-2008
|
| 68 |
-
*/
|
| 69 |
-
static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
|
| 70 |
-
union {
|
| 71 |
-
float f;
|
| 72 |
-
uint32_t i;
|
| 73 |
-
} u;
|
| 74 |
-
u.i = (uint32_t)h.bits << 16;
|
| 75 |
-
return u.f;
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
/**
|
| 79 |
-
* Converts float32 to brain16.
|
| 80 |
-
*
|
| 81 |
-
* This is binary identical with Google Brain float conversion.
|
| 82 |
-
* Floats shall round to nearest even, and NANs shall be quiet.
|
| 83 |
-
* Subnormals aren't flushed to zero, except perhaps when used.
|
| 84 |
-
* This code should vectorize nicely if using modern compilers.
|
| 85 |
-
*/
|
| 86 |
-
static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
|
| 87 |
-
ggml_bf16_t h;
|
| 88 |
-
union {
|
| 89 |
-
float f;
|
| 90 |
-
uint32_t i;
|
| 91 |
-
} u;
|
| 92 |
-
u.f = s;
|
| 93 |
-
if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
|
| 94 |
-
h.bits = (u.i >> 16) | 64; /* force to quiet */
|
| 95 |
-
return h;
|
| 96 |
-
}
|
| 97 |
-
h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
|
| 98 |
-
return h;
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
|
| 102 |
-
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
|
| 103 |
-
|
| 104 |
-
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
| 105 |
-
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
| 106 |
-
#ifndef __FMA__
|
| 107 |
-
#define __FMA__
|
| 108 |
-
#endif
|
| 109 |
-
#ifndef __F16C__
|
| 110 |
-
#define __F16C__
|
| 111 |
-
#endif
|
| 112 |
-
#endif
|
| 113 |
-
|
| 114 |
-
// __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
|
| 115 |
-
#if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
|
| 116 |
-
#ifndef __SSE3__
|
| 117 |
-
#define __SSE3__
|
| 118 |
-
#endif
|
| 119 |
-
#ifndef __SSSE3__
|
| 120 |
-
#define __SSSE3__
|
| 121 |
-
#endif
|
| 122 |
-
#endif
|
| 123 |
-
|
| 124 |
-
#if defined(__ARM_FEATURE_SVE)
|
| 125 |
-
#include <arm_sve.h>
|
| 126 |
-
#include <sys/prctl.h>
|
| 127 |
-
#endif
|
| 128 |
-
|
| 129 |
-
// 16-bit float
|
| 130 |
-
// on Arm, we use __fp16
|
| 131 |
-
// on x86, we use uint16_t
|
| 132 |
-
#if defined(__ARM_NEON)
|
| 133 |
-
|
| 134 |
-
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
| 135 |
-
//
|
| 136 |
-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
| 137 |
-
//
|
| 138 |
-
#include <arm_neon.h>
|
| 139 |
-
|
| 140 |
-
#ifdef _MSC_VER
|
| 141 |
-
|
| 142 |
-
typedef uint16_t ggml_fp16_internal_t;
|
| 143 |
-
|
| 144 |
-
#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
|
| 145 |
-
|
| 146 |
-
#else
|
| 147 |
-
|
| 148 |
-
typedef __fp16 ggml_fp16_internal_t;
|
| 149 |
-
|
| 150 |
-
#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
|
| 151 |
-
|
| 152 |
-
#endif // _MSC_VER
|
| 153 |
-
|
| 154 |
-
#if !defined(__aarch64__)
|
| 155 |
-
|
| 156 |
-
// 32-bit ARM compatibility
|
| 157 |
-
|
| 158 |
-
// vaddlvq_s16
|
| 159 |
-
// vpaddq_s16
|
| 160 |
-
// vpaddq_s32
|
| 161 |
-
// vaddvq_s32
|
| 162 |
-
// vaddvq_f32
|
| 163 |
-
// vmaxvq_f32
|
| 164 |
-
// vcvtnq_s32_f32
|
| 165 |
-
// vzip1_u8
|
| 166 |
-
// vzip2_u8
|
| 167 |
-
|
| 168 |
-
inline static int32_t vaddlvq_s16(int16x8_t v) {
|
| 169 |
-
int32x4_t v0 = vreinterpretq_s32_s64(vpaddlq_s32(vpaddlq_s16(v)));
|
| 170 |
-
return vgetq_lane_s32(v0, 0) + vgetq_lane_s32(v0, 2);
|
| 171 |
-
}
|
| 172 |
-
|
| 173 |
-
inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
|
| 174 |
-
int16x4_t a0 = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
|
| 175 |
-
int16x4_t b0 = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
|
| 176 |
-
return vcombine_s16(a0, b0);
|
| 177 |
-
}
|
| 178 |
-
|
| 179 |
-
inline static int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
|
| 180 |
-
int32x2_t a0 = vpadd_s32(vget_low_s32(a), vget_high_s32(a));
|
| 181 |
-
int32x2_t b0 = vpadd_s32(vget_low_s32(b), vget_high_s32(b));
|
| 182 |
-
return vcombine_s32(a0, b0);
|
| 183 |
-
}
|
| 184 |
-
|
| 185 |
-
inline static int32_t vaddvq_s32(int32x4_t v) {
|
| 186 |
-
return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
|
| 187 |
-
}
|
| 188 |
-
|
| 189 |
-
inline static float vaddvq_f32(float32x4_t v) {
|
| 190 |
-
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
| 191 |
-
}
|
| 192 |
-
|
| 193 |
-
inline static float vmaxvq_f32(float32x4_t v) {
|
| 194 |
-
return
|
| 195 |
-
MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
|
| 196 |
-
MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
|
| 197 |
-
}
|
| 198 |
-
|
| 199 |
-
inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
|
| 200 |
-
int32x4_t res;
|
| 201 |
-
|
| 202 |
-
res[0] = roundf(vgetq_lane_f32(v, 0));
|
| 203 |
-
res[1] = roundf(vgetq_lane_f32(v, 1));
|
| 204 |
-
res[2] = roundf(vgetq_lane_f32(v, 2));
|
| 205 |
-
res[3] = roundf(vgetq_lane_f32(v, 3));
|
| 206 |
-
|
| 207 |
-
return res;
|
| 208 |
-
}
|
| 209 |
-
|
| 210 |
-
inline static uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
|
| 211 |
-
uint8x8_t res;
|
| 212 |
-
|
| 213 |
-
res[0] = a[0]; res[1] = b[0];
|
| 214 |
-
res[2] = a[1]; res[3] = b[1];
|
| 215 |
-
res[4] = a[2]; res[5] = b[2];
|
| 216 |
-
res[6] = a[3]; res[7] = b[3];
|
| 217 |
-
|
| 218 |
-
return res;
|
| 219 |
-
}
|
| 220 |
-
|
| 221 |
-
inline static uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
|
| 222 |
-
uint8x8_t res;
|
| 223 |
-
|
| 224 |
-
res[0] = a[4]; res[1] = b[4];
|
| 225 |
-
res[2] = a[5]; res[3] = b[5];
|
| 226 |
-
res[4] = a[6]; res[5] = b[6];
|
| 227 |
-
res[6] = a[7]; res[7] = b[7];
|
| 228 |
-
|
| 229 |
-
return res;
|
| 230 |
-
}
|
| 231 |
-
|
| 232 |
-
// vld1q_s16_x2
|
| 233 |
-
// vld1q_u8_x2
|
| 234 |
-
// vld1q_u8_x4
|
| 235 |
-
// vld1q_s8_x2
|
| 236 |
-
// vld1q_s8_x4
|
| 237 |
-
// TODO: double-check these work correctly
|
| 238 |
-
|
| 239 |
-
typedef struct ggml_int16x8x2_t {
|
| 240 |
-
int16x8_t val[2];
|
| 241 |
-
} ggml_int16x8x2_t;
|
| 242 |
-
|
| 243 |
-
inline static ggml_int16x8x2_t ggml_vld1q_s16_x2(const int16_t * ptr) {
|
| 244 |
-
ggml_int16x8x2_t res;
|
| 245 |
-
|
| 246 |
-
res.val[0] = vld1q_s16(ptr + 0);
|
| 247 |
-
res.val[1] = vld1q_s16(ptr + 8);
|
| 248 |
-
|
| 249 |
-
return res;
|
| 250 |
-
}
|
| 251 |
-
|
| 252 |
-
typedef struct ggml_uint8x16x2_t {
|
| 253 |
-
uint8x16_t val[2];
|
| 254 |
-
} ggml_uint8x16x2_t;
|
| 255 |
-
|
| 256 |
-
inline static ggml_uint8x16x2_t ggml_vld1q_u8_x2(const uint8_t * ptr) {
|
| 257 |
-
ggml_uint8x16x2_t res;
|
| 258 |
-
|
| 259 |
-
res.val[0] = vld1q_u8(ptr + 0);
|
| 260 |
-
res.val[1] = vld1q_u8(ptr + 16);
|
| 261 |
-
|
| 262 |
-
return res;
|
| 263 |
-
}
|
| 264 |
-
|
| 265 |
-
typedef struct ggml_uint8x16x4_t {
|
| 266 |
-
uint8x16_t val[4];
|
| 267 |
-
} ggml_uint8x16x4_t;
|
| 268 |
-
|
| 269 |
-
inline static ggml_uint8x16x4_t ggml_vld1q_u8_x4(const uint8_t * ptr) {
|
| 270 |
-
ggml_uint8x16x4_t res;
|
| 271 |
-
|
| 272 |
-
res.val[0] = vld1q_u8(ptr + 0);
|
| 273 |
-
res.val[1] = vld1q_u8(ptr + 16);
|
| 274 |
-
res.val[2] = vld1q_u8(ptr + 32);
|
| 275 |
-
res.val[3] = vld1q_u8(ptr + 48);
|
| 276 |
-
|
| 277 |
-
return res;
|
| 278 |
-
}
|
| 279 |
-
|
| 280 |
-
typedef struct ggml_int8x16x2_t {
|
| 281 |
-
int8x16_t val[2];
|
| 282 |
-
} ggml_int8x16x2_t;
|
| 283 |
-
|
| 284 |
-
inline static ggml_int8x16x2_t ggml_vld1q_s8_x2(const int8_t * ptr) {
|
| 285 |
-
ggml_int8x16x2_t res;
|
| 286 |
-
|
| 287 |
-
res.val[0] = vld1q_s8(ptr + 0);
|
| 288 |
-
res.val[1] = vld1q_s8(ptr + 16);
|
| 289 |
-
|
| 290 |
-
return res;
|
| 291 |
-
}
|
| 292 |
-
|
| 293 |
-
typedef struct ggml_int8x16x4_t {
|
| 294 |
-
int8x16_t val[4];
|
| 295 |
-
} ggml_int8x16x4_t;
|
| 296 |
-
|
| 297 |
-
inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
|
| 298 |
-
ggml_int8x16x4_t res;
|
| 299 |
-
|
| 300 |
-
res.val[0] = vld1q_s8(ptr + 0);
|
| 301 |
-
res.val[1] = vld1q_s8(ptr + 16);
|
| 302 |
-
res.val[2] = vld1q_s8(ptr + 32);
|
| 303 |
-
res.val[3] = vld1q_s8(ptr + 48);
|
| 304 |
-
|
| 305 |
-
return res;
|
| 306 |
-
}
|
| 307 |
-
|
| 308 |
-
// NOTE: not tested
|
| 309 |
-
inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
|
| 310 |
-
int8x16_t res;
|
| 311 |
-
|
| 312 |
-
res[ 0] = a[b[ 0]];
|
| 313 |
-
res[ 1] = a[b[ 1]];
|
| 314 |
-
res[ 2] = a[b[ 2]];
|
| 315 |
-
res[ 3] = a[b[ 3]];
|
| 316 |
-
res[ 4] = a[b[ 4]];
|
| 317 |
-
res[ 5] = a[b[ 5]];
|
| 318 |
-
res[ 6] = a[b[ 6]];
|
| 319 |
-
res[ 7] = a[b[ 7]];
|
| 320 |
-
res[ 8] = a[b[ 8]];
|
| 321 |
-
res[ 9] = a[b[ 9]];
|
| 322 |
-
res[10] = a[b[10]];
|
| 323 |
-
res[11] = a[b[11]];
|
| 324 |
-
res[12] = a[b[12]];
|
| 325 |
-
res[13] = a[b[13]];
|
| 326 |
-
res[14] = a[b[14]];
|
| 327 |
-
res[15] = a[b[15]];
|
| 328 |
-
|
| 329 |
-
return res;
|
| 330 |
-
}
|
| 331 |
-
|
| 332 |
-
// NOTE: not tested
|
| 333 |
-
inline static uint8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
|
| 334 |
-
uint8x16_t res;
|
| 335 |
-
|
| 336 |
-
res[ 0] = a[b[ 0]];
|
| 337 |
-
res[ 1] = a[b[ 1]];
|
| 338 |
-
res[ 2] = a[b[ 2]];
|
| 339 |
-
res[ 3] = a[b[ 3]];
|
| 340 |
-
res[ 4] = a[b[ 4]];
|
| 341 |
-
res[ 5] = a[b[ 5]];
|
| 342 |
-
res[ 6] = a[b[ 6]];
|
| 343 |
-
res[ 7] = a[b[ 7]];
|
| 344 |
-
res[ 8] = a[b[ 8]];
|
| 345 |
-
res[ 9] = a[b[ 9]];
|
| 346 |
-
res[10] = a[b[10]];
|
| 347 |
-
res[11] = a[b[11]];
|
| 348 |
-
res[12] = a[b[12]];
|
| 349 |
-
res[13] = a[b[13]];
|
| 350 |
-
res[14] = a[b[14]];
|
| 351 |
-
res[15] = a[b[15]];
|
| 352 |
-
|
| 353 |
-
return res;
|
| 354 |
-
}
|
| 355 |
-
|
| 356 |
-
#else
|
| 357 |
-
|
| 358 |
-
#define ggml_int16x8x2_t int16x8x2_t
|
| 359 |
-
#define ggml_uint8x16x2_t uint8x16x2_t
|
| 360 |
-
#define ggml_uint8x16x4_t uint8x16x4_t
|
| 361 |
-
#define ggml_int8x16x2_t int8x16x2_t
|
| 362 |
-
#define ggml_int8x16x4_t int8x16x4_t
|
| 363 |
-
|
| 364 |
-
#define ggml_vld1q_s16_x2 vld1q_s16_x2
|
| 365 |
-
#define ggml_vld1q_u8_x2 vld1q_u8_x2
|
| 366 |
-
#define ggml_vld1q_u8_x4 vld1q_u8_x4
|
| 367 |
-
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
| 368 |
-
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
| 369 |
-
#define ggml_vqtbl1q_s8 vqtbl1q_s8
|
| 370 |
-
#define ggml_vqtbl1q_u8 vqtbl1q_u8
|
| 371 |
-
|
| 372 |
-
#endif // !defined(__aarch64__)
|
| 373 |
-
|
| 374 |
-
#if !defined(__ARM_FEATURE_DOTPROD)
|
| 375 |
-
|
| 376 |
-
inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) {
|
| 377 |
-
const int16x8_t p0 = vmull_s8(vget_low_s8 (a), vget_low_s8 (b));
|
| 378 |
-
const int16x8_t p1 = vmull_s8(vget_high_s8(a), vget_high_s8(b));
|
| 379 |
-
|
| 380 |
-
return vaddq_s32(acc, vaddq_s32(vpaddlq_s16(p0), vpaddlq_s16(p1)));
|
| 381 |
-
}
|
| 382 |
-
|
| 383 |
-
#else
|
| 384 |
-
|
| 385 |
-
#define ggml_vdotq_s32(a, b, c) vdotq_s32(a, b, c)
|
| 386 |
-
|
| 387 |
-
#endif // !defined(__ARM_FEATURE_DOTPROD)
|
| 388 |
-
|
| 389 |
-
#endif // defined(__ARM_NEON)
|
| 390 |
-
|
| 391 |
-
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
| 392 |
-
|
| 393 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 394 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
| 395 |
-
|
| 396 |
-
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 397 |
-
|
| 398 |
-
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
| 399 |
-
ggml_fp16_internal_t tmp;
|
| 400 |
-
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
| 401 |
-
return (float)tmp;
|
| 402 |
-
}
|
| 403 |
-
|
| 404 |
-
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
| 405 |
-
ggml_fp16_t res;
|
| 406 |
-
ggml_fp16_internal_t tmp = f;
|
| 407 |
-
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
| 408 |
-
return res;
|
| 409 |
-
}
|
| 410 |
-
|
| 411 |
-
#else
|
| 412 |
-
|
| 413 |
-
#ifdef __wasm_simd128__
|
| 414 |
-
#include <wasm_simd128.h>
|
| 415 |
-
#else
|
| 416 |
-
#ifdef __POWER9_VECTOR__
|
| 417 |
-
#include <altivec.h>
|
| 418 |
-
#undef bool
|
| 419 |
-
#define bool _Bool
|
| 420 |
-
#else
|
| 421 |
-
#if defined(_MSC_VER) || defined(__MINGW32__)
|
| 422 |
-
#include <intrin.h>
|
| 423 |
-
#else
|
| 424 |
-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
|
| 425 |
-
#if !defined(__riscv)
|
| 426 |
-
#include <immintrin.h>
|
| 427 |
-
#endif
|
| 428 |
-
#endif
|
| 429 |
-
#endif
|
| 430 |
-
#endif
|
| 431 |
-
#endif
|
| 432 |
-
|
| 433 |
-
#ifdef __riscv_v_intrinsic
|
| 434 |
-
#include <riscv_vector.h>
|
| 435 |
-
#endif
|
| 436 |
-
|
| 437 |
-
#if defined(__loongarch64)
|
| 438 |
-
#if defined(__loongarch_asx)
|
| 439 |
-
#include <lasxintrin.h>
|
| 440 |
-
#endif
|
| 441 |
-
#if defined(__loongarch_sx)
|
| 442 |
-
#include <lsxintrin.h>
|
| 443 |
-
#endif
|
| 444 |
-
#endif
|
| 445 |
-
|
| 446 |
-
#if defined(__loongarch_asx)
|
| 447 |
-
|
| 448 |
-
typedef union {
|
| 449 |
-
int32_t i;
|
| 450 |
-
float f;
|
| 451 |
-
} ft_union;
|
| 452 |
-
|
| 453 |
-
/* float type data load instructions */
|
| 454 |
-
static __m128 __lsx_vreplfr2vr_s(float val) {
|
| 455 |
-
ft_union fi_tmpval = {.f = val};
|
| 456 |
-
return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
|
| 457 |
-
}
|
| 458 |
-
|
| 459 |
-
static __m256 __lasx_xvreplfr2vr_s(float val) {
|
| 460 |
-
ft_union fi_tmpval = {.f = val};
|
| 461 |
-
return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
|
| 462 |
-
}
|
| 463 |
-
#endif
|
| 464 |
-
|
| 465 |
-
#ifdef __F16C__
|
| 466 |
-
|
| 467 |
-
#ifdef _MSC_VER
|
| 468 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
|
| 469 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
|
| 470 |
-
#else
|
| 471 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
| 472 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
| 473 |
-
#endif
|
| 474 |
-
|
| 475 |
-
#elif defined(__POWER9_VECTOR__)
|
| 476 |
-
|
| 477 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 478 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
| 479 |
-
/* the inline asm below is about 12% faster than the lookup method */
|
| 480 |
-
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
| 481 |
-
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
| 482 |
-
|
| 483 |
-
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
| 484 |
-
register float f;
|
| 485 |
-
register double d;
|
| 486 |
-
__asm__(
|
| 487 |
-
"mtfprd %0,%2\n"
|
| 488 |
-
"xscvhpdp %0,%0\n"
|
| 489 |
-
"frsp %1,%0\n" :
|
| 490 |
-
/* temp */ "=d"(d),
|
| 491 |
-
/* out */ "=f"(f):
|
| 492 |
-
/* in */ "r"(h));
|
| 493 |
-
return f;
|
| 494 |
-
}
|
| 495 |
-
|
| 496 |
-
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
| 497 |
-
register double d;
|
| 498 |
-
register ggml_fp16_t r;
|
| 499 |
-
__asm__( /* xscvdphp can work on double or single precision */
|
| 500 |
-
"xscvdphp %0,%2\n"
|
| 501 |
-
"mffprd %1,%0\n" :
|
| 502 |
-
/* temp */ "=d"(d),
|
| 503 |
-
/* out */ "=r"(r):
|
| 504 |
-
/* in */ "f"(f));
|
| 505 |
-
return r;
|
| 506 |
-
}
|
| 507 |
-
|
| 508 |
-
#else
|
| 509 |
-
|
| 510 |
-
// FP16 <-> FP32
|
| 511 |
-
// ref: https://github.com/Maratyszcza/FP16
|
| 512 |
-
|
| 513 |
-
static inline float fp32_from_bits(uint32_t w) {
|
| 514 |
-
union {
|
| 515 |
-
uint32_t as_bits;
|
| 516 |
-
float as_value;
|
| 517 |
-
} fp32;
|
| 518 |
-
fp32.as_bits = w;
|
| 519 |
-
return fp32.as_value;
|
| 520 |
-
}
|
| 521 |
-
|
| 522 |
-
static inline uint32_t fp32_to_bits(float f) {
|
| 523 |
-
union {
|
| 524 |
-
float as_value;
|
| 525 |
-
uint32_t as_bits;
|
| 526 |
-
} fp32;
|
| 527 |
-
fp32.as_value = f;
|
| 528 |
-
return fp32.as_bits;
|
| 529 |
-
}
|
| 530 |
-
|
| 531 |
-
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
| 532 |
-
const uint32_t w = (uint32_t) h << 16;
|
| 533 |
-
const uint32_t sign = w & UINT32_C(0x80000000);
|
| 534 |
-
const uint32_t two_w = w + w;
|
| 535 |
-
|
| 536 |
-
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
| 537 |
-
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
| 538 |
-
const float exp_scale = 0x1.0p-112f;
|
| 539 |
-
#else
|
| 540 |
-
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
| 541 |
-
#endif
|
| 542 |
-
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
| 543 |
-
|
| 544 |
-
const uint32_t magic_mask = UINT32_C(126) << 23;
|
| 545 |
-
const float magic_bias = 0.5f;
|
| 546 |
-
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
| 547 |
-
|
| 548 |
-
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
| 549 |
-
const uint32_t result = sign |
|
| 550 |
-
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
| 551 |
-
return fp32_from_bits(result);
|
| 552 |
-
}
|
| 553 |
-
|
| 554 |
-
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
| 555 |
-
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
| 556 |
-
const float scale_to_inf = 0x1.0p+112f;
|
| 557 |
-
const float scale_to_zero = 0x1.0p-110f;
|
| 558 |
-
#else
|
| 559 |
-
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
| 560 |
-
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
| 561 |
-
#endif
|
| 562 |
-
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
| 563 |
-
|
| 564 |
-
const uint32_t w = fp32_to_bits(f);
|
| 565 |
-
const uint32_t shl1_w = w + w;
|
| 566 |
-
const uint32_t sign = w & UINT32_C(0x80000000);
|
| 567 |
-
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
| 568 |
-
if (bias < UINT32_C(0x71000000)) {
|
| 569 |
-
bias = UINT32_C(0x71000000);
|
| 570 |
-
}
|
| 571 |
-
|
| 572 |
-
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
|
| 573 |
-
const uint32_t bits = fp32_to_bits(base);
|
| 574 |
-
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
|
| 575 |
-
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
|
| 576 |
-
const uint32_t nonsign = exp_bits + mantissa_bits;
|
| 577 |
-
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
|
| 578 |
-
}
|
| 579 |
-
|
| 580 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 581 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
| 582 |
-
|
| 583 |
-
#endif // __F16C__
|
| 584 |
-
|
| 585 |
-
#endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
|
| 586 |
-
|
| 587 |
-
#ifdef __ARM_FEATURE_SVE
|
| 588 |
-
#include <arm_sve.h>
|
| 589 |
-
#endif // __ARM_FEATURE_SVE
|
| 590 |
-
|
| 591 |
-
// precomputed f32 table for f16 (256 KB)
|
| 592 |
-
// defined in ggml.c, initialized in ggml_init()
|
| 593 |
-
extern float ggml_table_f32_f16[1 << 16];
|
| 594 |
-
|
| 595 |
-
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
| 596 |
-
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
| 597 |
-
// This is also true for POWER9.
|
| 598 |
-
#if !defined(GGML_FP16_TO_FP32)
|
| 599 |
-
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
| 600 |
-
uint16_t s;
|
| 601 |
-
memcpy(&s, &f, sizeof(uint16_t));
|
| 602 |
-
return ggml_table_f32_f16[s];
|
| 603 |
-
}
|
| 604 |
-
|
| 605 |
-
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
| 606 |
-
#endif
|
| 607 |
-
|
| 608 |
-
#if !defined(GGML_FP32_TO_FP16)
|
| 609 |
-
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
| 610 |
-
#endif
|
| 611 |
-
|
| 612 |
-
#ifdef __cplusplus
|
| 613 |
-
}
|
| 614 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/whisper.cpp
CHANGED
|
@@ -4268,18 +4268,15 @@ const char * whisper_print_system_info(void) {
|
|
| 4268 |
s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
|
| 4269 |
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
|
| 4270 |
s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
|
| 4271 |
-
s += "METAL = " + std::to_string(ggml_cpu_has_metal()) + " | ";
|
| 4272 |
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
|
| 4273 |
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
|
| 4274 |
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
|
| 4275 |
-
s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
|
| 4276 |
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
|
| 4277 |
s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
|
| 4278 |
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
|
| 4279 |
-
s += "CUDA = " + std::to_string(ggml_cpu_has_cuda()) + " | ";
|
| 4280 |
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
|
| 4281 |
s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
|
| 4282 |
-
|
| 4283 |
return s.c_str();
|
| 4284 |
}
|
| 4285 |
|
|
|
|
| 4268 |
s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
|
| 4269 |
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
|
| 4270 |
s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
|
|
|
|
| 4271 |
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
|
| 4272 |
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
|
| 4273 |
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
|
|
|
|
| 4274 |
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
|
| 4275 |
s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
|
| 4276 |
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
|
|
|
|
| 4277 |
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
|
| 4278 |
s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
|
| 4279 |
+
|
| 4280 |
return s.c_str();
|
| 4281 |
}
|
| 4282 |
|