ggerganov commited on
Commit
ec6f374
·
1 Parent(s): 1568fc8

whisper : adapt to new ggml (wip)

Browse files
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  *.o
2
  *.a
 
3
  .cache/
4
  .coreml/
5
  .test/
@@ -19,6 +20,9 @@ build-*/
19
  .swiftpm
20
  *.metallib
21
 
 
 
 
22
  /main
23
  /stream
24
  /command
 
1
  *.o
2
  *.a
3
+ *.d
4
  .cache/
5
  .coreml/
6
  .test/
 
20
  .swiftpm
21
  *.metallib
22
 
23
+ ggml-metal-embed.metal
24
+ ggml-metal-embed.metal.tmp
25
+
26
  /main
27
  /stream
28
  /command
Makefile CHANGED
@@ -444,17 +444,17 @@ endif
444
  else
445
  MK_CFLAGS += -march=rv64gcv -mabi=lp64d
446
  MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
447
- endif
448
 
449
  ifndef GGML_NO_ACCELERATE
450
  # Mac OS - include Accelerate framework.
451
  # `-framework Accelerate` works both with Apple Silicon and Mac Intel
452
  ifeq ($(UNAME_S),Darwin)
453
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
454
  MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
455
  MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
456
  MK_LDFLAGS += -framework Accelerate
457
- OBJ_GGML += ggml/src/ggml-blas.o
458
  endif
459
  endif # GGML_NO_ACCELERATE
460
 
@@ -464,29 +464,38 @@ ifndef GGML_NO_OPENMP
464
  MK_CXXFLAGS += -fopenmp
465
  endif # GGML_NO_OPENMP
466
 
 
 
 
 
 
 
 
 
 
467
  ifdef GGML_OPENBLAS
468
  MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
469
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
470
  MK_LDFLAGS += $(shell pkg-config --libs openblas)
471
- OBJ_GGML += ggml/src/ggml-blas.o
472
  endif # GGML_OPENBLAS
473
 
474
  ifdef GGML_OPENBLAS64
475
  MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
476
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
477
  MK_LDFLAGS += $(shell pkg-config --libs openblas64)
478
- OBJ_GGML += ggml/src/ggml-blas.o
479
  endif # GGML_OPENBLAS64
480
 
481
  ifdef GGML_BLIS
482
  MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
483
  MK_LDFLAGS += -lblis -L/usr/local/lib
484
- OBJ_GGML += ggml/src/ggml-blas.o
485
  endif # GGML_BLIS
486
 
487
  ifdef GGML_RPC
488
  MK_CPPFLAGS += -DGGML_USE_RPC
489
- OBJ_GGML += ggml/src/ggml-rpc.o
490
  endif # GGML_RPC
491
 
492
  OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
@@ -513,7 +522,7 @@ ifdef GGML_CUDA
513
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
514
  MK_NVCCFLAGS += -use_fast_math
515
 
516
- OBJ_GGML += ggml/src/ggml-cuda.o
517
  OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
518
  OBJ_GGML += $(OBJ_CUDA_TMPL)
519
  ifdef WHISPER_FATAL_WARNINGS
@@ -615,11 +624,11 @@ ggml/src/ggml-cuda/%.o: \
615
  ggml/src/ggml-cuda/common.cuh
616
  $(NVCC_COMPILE)
617
 
618
- ggml/src/ggml-cuda.o: \
619
- ggml/src/ggml-cuda.cu \
 
620
  ggml/include/ggml.h \
621
  ggml/include/ggml-backend.h \
622
- ggml/include/ggml-cuda.h \
623
  ggml/src/ggml-backend-impl.h \
624
  ggml/src/ggml-common.h \
625
  $(wildcard ggml/src/ggml-cuda/*.cuh)
@@ -742,50 +751,43 @@ endif # GGML_HIPBLAS
742
  ifdef GGML_METAL
743
  MK_CPPFLAGS += -DGGML_USE_METAL
744
  MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
745
- OBJ_GGML += ggml/src/ggml-metal.o
746
  ifdef GGML_METAL_NDEBUG
747
  MK_CPPFLAGS += -DGGML_METAL_NDEBUG
748
  endif
749
 
750
  ifdef GGML_METAL_EMBED_LIBRARY
751
  MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
752
- OBJ_GGML += ggml/src/ggml-metal-embed.o
753
  endif
754
  endif # GGML_METAL
755
 
756
- ifdef WHISPER_COREML
757
- MK_CXXFLAGS += -DWHISPER_USE_COREML
758
- LDFLAGS += -framework Foundation -framework CoreML
759
-
760
- ifdef WHISPER_COREML_ALLOW_FALLBACK
761
- MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
762
- endif
763
- endif
764
-
765
- # ===
766
-
767
  ifdef GGML_METAL
768
- ggml/src/ggml-metal.o: \
769
- ggml/src/ggml-metal.m \
 
770
  ggml/include/ggml-metal.h \
771
  ggml/include/ggml.h
772
  $(CC) $(CFLAGS) -c $< -o $@
773
 
774
  ifdef GGML_METAL_EMBED_LIBRARY
775
- ggml/src/ggml-metal-embed.o: \
776
- ggml/src/ggml-metal.metal \
 
777
  ggml/src/ggml-common.h
778
  @echo "Embedding Metal library"
779
- @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
780
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
781
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
782
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
783
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
784
- @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
785
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
786
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
787
- @$(AS) $(TEMP_ASSEMBLY) -o $@
788
- @rm -f ${TEMP_ASSEMBLY}
 
 
789
  endif
790
  endif # GGML_METAL
791
 
@@ -801,11 +803,17 @@ endif
801
 
802
  OBJ_GGML += \
803
  ggml/src/ggml.o \
804
- ggml/src/ggml-cpu.o \
805
  ggml/src/ggml-alloc.o \
806
  ggml/src/ggml-backend.o \
 
 
807
  ggml/src/ggml-quants.o \
808
- ggml/src/ggml-aarch64.o
 
 
 
 
809
 
810
  OBJ_WHISPER += \
811
  src/whisper.o
@@ -910,114 +918,64 @@ endif
910
  # Build libraries
911
  #
912
 
913
- # ggml
914
-
915
- ggml/src/ggml.o: \
916
- ggml/src/ggml.c \
917
- ggml/include/ggml.h
918
- $(CC) $(CFLAGS) -c $< -o $@
919
-
920
- ggml/src/ggml-cpu.o: \
921
- ggml/src/ggml-cpu.c \
922
- ggml/include/ggml.h \
923
- ggml/src/ggml-common.h
924
- $(CC) $(CFLAGS) -c $< -o $@
925
-
926
- ggml/src/ggml-alloc.o: \
927
- ggml/src/ggml-alloc.c \
928
- ggml/include/ggml.h \
929
- ggml/include/ggml-alloc.h
930
- $(CC) $(CFLAGS) -c $< -o $@
931
-
932
- ggml/src/ggml-backend.o: \
933
- ggml/src/ggml-backend.cpp \
934
- ggml/include/ggml.h \
935
- ggml/include/ggml-backend.h
936
- $(CXX) $(CXXFLAGS) -c $< -o $@
937
-
938
- ggml/src/ggml-quants.o: \
939
- ggml/src/ggml-quants.c \
940
- ggml/include/ggml.h \
941
- ggml/src/ggml-quants.h \
942
- ggml/src/ggml-common.h
943
- $(CC) $(CFLAGS) -c $< -o $@
944
-
945
- ggml/src/ggml-aarch64.o: \
946
- ggml/src/ggml-aarch64.c \
947
- ggml/include/ggml.h \
948
- ggml/src/ggml-aarch64.h \
949
- ggml/src/ggml-common.h
950
- $(CC) $(CFLAGS) -c $< -o $@
951
 
952
- ggml/src/ggml-blas.o: \
953
- ggml/src/ggml-blas.cpp \
954
- ggml/include/ggml-blas.h
955
- $(CXX) $(CXXFLAGS) -c $< -o $@
956
 
957
- ifdef GGML_LLAMAFILE
958
- ggml/src/sgemm.o: \
959
- ggml/src/sgemm.cpp \
960
- ggml/src/sgemm.h \
961
- ggml/include/ggml.h
962
- $(CXX) $(CXXFLAGS) -c $< -o $@
963
- endif # GGML_LLAMAFILE
964
 
965
- ifdef GGML_RPC
966
- ggml/src/ggml-rpc.o: \
967
- ggml/src/ggml-rpc.cpp \
968
- ggml/include/ggml-rpc.h
969
- $(CXX) $(CXXFLAGS) -c $< -o $@
970
- endif # GGML_RPC
971
 
972
- $(LIB_GGML): \
973
- $(OBJ_GGML)
974
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
975
 
976
- $(LIB_GGML_S): \
977
- $(OBJ_GGML)
978
- ar rcs $(LIB_GGML_S) $^
979
 
980
- # whisper
 
981
 
982
- src/whisper.o: \
983
- src/whisper.cpp \
984
- include/whisper.h \
 
 
985
  ggml/include/ggml.h \
986
  ggml/include/ggml-alloc.h \
987
- ggml/include/ggml-backend.h \
988
- ggml/include/ggml-cuda.h \
989
- ggml/include/ggml-metal.h
990
- $(CXX) $(CXXFLAGS) -c $< -o $@
991
 
992
- $(LIB_WHISPER): \
993
- $(OBJ_WHISPER) \
994
- $(LIB_GGML)
995
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
996
 
997
- $(LIB_WHISPER_S): \
998
- $(OBJ_WHISPER) \
999
- $(OBJ_GGML)
1000
- ar rcs $(LIB_WHISPER_S) $^
1001
 
1002
- # common
 
1003
 
1004
- examples/common.o: \
1005
- examples/common.cpp \
1006
- examples/common.h
1007
- $(CXX) $(CXXFLAGS) -c $< -o $@
1008
 
1009
- examples/common-ggml.o: \
1010
- examples/common-ggml.cpp \
1011
- examples/common-ggml.h
1012
- $(CXX) $(CXXFLAGS) -c $< -o $@
 
 
1013
 
1014
- $(LIB_COMMON): \
1015
- $(OBJ_COMMON)
1016
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1017
 
1018
- $(LIB_COMMON_S): \
1019
- $(OBJ_COMMON)
1020
- ar rcs $(LIB_COMMON_S) $^
1021
 
1022
  # common-sdl
1023
 
@@ -1029,34 +987,21 @@ examples/common-sdl.o: \
1029
  examples/common-sdl.h
1030
  $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
1031
 
1032
- $(LIB_COMMON_SDL): \
1033
- $(OBJ_SDL)
1034
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) $(LDFLAGS_SDL)
 
 
1035
 
1036
- $(LIB_COMMON_SDL_S): \
1037
- $(OBJ_SDL)
1038
- ar rcs $(LIB_COMMON_SDL_S) $^
1039
 
 
1040
  clean:
1041
- rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS)
1042
- rm -rvf src/*.o
1043
- rm -rvf src/coreml/*.o
1044
- rm -rvf tests/*.o
1045
- rm -rvf examples/*.o
1046
- rm -rvf *.a
1047
- rm -rvf *.dll
1048
- rm -rvf *.so
1049
- rm -rvf *.dot
1050
- rm -rvf ggml/*.a
1051
- rm -rvf ggml/*.dll
1052
- rm -rvf ggml/*.so
1053
- rm -vrf ggml/src/*.o
1054
- rm -vrf ggml/src/ggml-metal-embed.metal
1055
- rm -vrf ggml/src/ggml-cuda/*.o
1056
- rm -vrf ggml/src/ggml-cuda/template-instances/*.o
1057
- rm -rvf $(BUILD_TARGETS)
1058
- rm -rvf $(TEST_TARGETS)
1059
- find examples -type f -name "*.o" -delete
1060
 
1061
  #
1062
  # Examples
 
444
  else
445
  MK_CFLAGS += -march=rv64gcv -mabi=lp64d
446
  MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
447
+ endif # RISCV
448
 
449
  ifndef GGML_NO_ACCELERATE
450
  # Mac OS - include Accelerate framework.
451
  # `-framework Accelerate` works both with Apple Silicon and Mac Intel
452
  ifeq ($(UNAME_S),Darwin)
453
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
454
  MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
455
  MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
456
  MK_LDFLAGS += -framework Accelerate
457
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
458
  endif
459
  endif # GGML_NO_ACCELERATE
460
 
 
464
  MK_CXXFLAGS += -fopenmp
465
  endif # GGML_NO_OPENMP
466
 
467
+ ifdef WHISPER_COREML
468
+ MK_CXXFLAGS += -DWHISPER_USE_COREML
469
+ LDFLAGS += -framework Foundation -framework CoreML
470
+
471
+ ifdef WHISPER_COREML_ALLOW_FALLBACK
472
+ MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
473
+ endif
474
+ endif # WHISPER_COREML
475
+
476
  ifdef GGML_OPENBLAS
477
  MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
478
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
479
  MK_LDFLAGS += $(shell pkg-config --libs openblas)
480
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
481
  endif # GGML_OPENBLAS
482
 
483
  ifdef GGML_OPENBLAS64
484
  MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
485
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
486
  MK_LDFLAGS += $(shell pkg-config --libs openblas64)
487
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
488
  endif # GGML_OPENBLAS64
489
 
490
  ifdef GGML_BLIS
491
  MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
492
  MK_LDFLAGS += -lblis -L/usr/local/lib
493
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
494
  endif # GGML_BLIS
495
 
496
  ifdef GGML_RPC
497
  MK_CPPFLAGS += -DGGML_USE_RPC
498
+ OBJ_GGML += ggml/src/ggml-rpc/ggml-rpc.o
499
  endif # GGML_RPC
500
 
501
  OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
 
522
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
523
  MK_NVCCFLAGS += -use_fast_math
524
 
525
+ OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
526
  OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
527
  OBJ_GGML += $(OBJ_CUDA_TMPL)
528
  ifdef WHISPER_FATAL_WARNINGS
 
624
  ggml/src/ggml-cuda/common.cuh
625
  $(NVCC_COMPILE)
626
 
627
+ ggml/src/ggml-cuda/ggml-cuda.o: \
628
+ ggml/src/ggml-cuda/ggml-cuda.cu \
629
+ ggml/include/ggml-cuda.h \
630
  ggml/include/ggml.h \
631
  ggml/include/ggml-backend.h \
 
632
  ggml/src/ggml-backend-impl.h \
633
  ggml/src/ggml-common.h \
634
  $(wildcard ggml/src/ggml-cuda/*.cuh)
 
751
  ifdef GGML_METAL
752
  MK_CPPFLAGS += -DGGML_USE_METAL
753
  MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
754
+ OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o
755
  ifdef GGML_METAL_NDEBUG
756
  MK_CPPFLAGS += -DGGML_METAL_NDEBUG
757
  endif
758
 
759
  ifdef GGML_METAL_EMBED_LIBRARY
760
  MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
761
+ OBJ_GGML += ggml/src/ggml-metal/ggml-metal-embed.o
762
  endif
763
  endif # GGML_METAL
764
 
 
 
 
 
 
 
 
 
 
 
 
765
  ifdef GGML_METAL
766
+ ggml/src/ggml-metal/ggml-metal.o: \
767
+ ggml/src/ggml-metal/ggml-metal.m \
768
+ ggml/src/ggml-metal/ggml-metal-impl.h \
769
  ggml/include/ggml-metal.h \
770
  ggml/include/ggml.h
771
  $(CC) $(CFLAGS) -c $< -o $@
772
 
773
  ifdef GGML_METAL_EMBED_LIBRARY
774
+ ggml/src/ggml-metal/ggml-metal-embed.o: \
775
+ ggml/src/ggml-metal/ggml-metal.metal \
776
+ ggml/src/ggml-metal/ggml-metal-impl.h \
777
  ggml/src/ggml-common.h
778
  @echo "Embedding Metal library"
779
+ @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
780
+ @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
781
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
782
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
783
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
784
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
785
+ @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
786
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
787
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
788
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
789
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
790
+ @rmdir ${TEMP_ASSEMBLY}
791
  endif
792
  endif # GGML_METAL
793
 
 
803
 
804
  OBJ_GGML += \
805
  ggml/src/ggml.o \
806
+ ggml/src/ggml-aarch64.o \
807
  ggml/src/ggml-alloc.o \
808
  ggml/src/ggml-backend.o \
809
+ ggml/src/ggml-backend-reg.o \
810
+ ggml/src/ggml-opt.o \
811
  ggml/src/ggml-quants.o \
812
+ ggml/src/ggml-threading.o \
813
+ ggml/src/ggml-cpu/ggml-cpu.o \
814
+ ggml/src/ggml-cpu/ggml-cpu-cpp.o \
815
+ ggml/src/ggml-cpu/ggml-cpu-aarch64.o \
816
+ ggml/src/ggml-cpu/ggml-cpu-quants.o
817
 
818
  OBJ_WHISPER += \
819
  src/whisper.o
 
918
  # Build libraries
919
  #
920
 
921
+ LIB_GGML = libggml.so
922
+ LIB_GGML_S = libggml.a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
923
 
924
+ LIB_LLAMA = libllama.so
925
+ LIB_LLAMA_S = libllama.a
 
 
926
 
927
+ LIB_COMMON = libcommon.so
928
+ LIB_COMMON_S = libcommon.a
 
 
 
 
 
929
 
930
+ LIB_COMMON_SDL = libcommon-sdl.so
931
+ LIB_COMMON_SDL_S = libcommon-sdl.a
 
 
 
 
932
 
933
+ # Targets
934
+ BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S)
 
935
 
936
+ # Dependency files
937
+ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
 
938
 
939
+ # Default target
940
+ all: $(BUILD_TARGETS)
941
 
942
+ # Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
943
+ # g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
944
+ ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
945
+ ggml/src/ggml-cpu/ggml-cpu.cpp \
946
+ ggml/include/ggml-backend.h \
947
  ggml/include/ggml.h \
948
  ggml/include/ggml-alloc.h \
949
+ ggml/src/ggml-backend-impl.h \
950
+ ggml/include/ggml-cpu.h \
951
+ ggml/src/ggml-impl.h
952
+ $(CXX) $(CXXFLAGS) -c $< -o $@
953
 
954
+ # Rules for building object files
955
+ ggml/%.o: ggml/%.c
956
+ $(CC) $(CFLAGS) -MMD -c $< -o $@
 
957
 
958
+ ggml/%.o: ggml/%.cpp
959
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
 
 
960
 
961
+ src/%.o: src/%.cpp
962
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
963
 
964
+ examples/%.o: examples/%.cpp
965
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
 
 
966
 
967
+ # Rules for building libraries
968
+ $(LIB_GGML): $(OBJ_GGML)
969
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
970
+
971
+ $(LIB_GGML_S): $(OBJ_GGML)
972
+ ar rcs $(LIB_GGML_S) $^
973
 
974
+ $(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML)
 
975
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
976
 
977
+ $(LIB_LLAMA_S): $(OBJ_LLAMA)
978
+ ar rcs $(LIB_LLAMA_S) $^
 
979
 
980
  # common-sdl
981
 
 
987
  examples/common-sdl.h
988
  $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
989
 
990
+ $(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML)
991
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
992
+
993
+ $(LIB_COMMON_S): $(OBJ_COMMON)
994
+ ar rcs $(LIB_COMMON_S) $^
995
 
996
+ # Include dependency files
997
+ -include $(DEP_FILES)
 
998
 
999
+ # Clean rule
1000
  clean:
1001
+ rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
1002
+ rm -rvf *.a *.dll *.so *.dot
1003
+ find ggml src tests examples -type f -name "*.o" -delete
1004
+ find ggml src tests examples -type f -name "*.d" -delete
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1005
 
1006
  #
1007
  # Examples
Package.swift CHANGED
@@ -28,7 +28,7 @@ let package = Package(
28
  "tests",
29
  "CMakeLists.txt",
30
  "Makefile",
31
- "ggml/src/ggml-metal-embed.metal"
32
  ],
33
  sources: [
34
  "ggml/src/ggml.c",
@@ -36,16 +36,22 @@ let package = Package(
36
  "ggml/src/ggml-aarch64.c",
37
  "ggml/src/ggml-alloc.c",
38
  "ggml/src/ggml-backend.cpp",
39
- "ggml/src/ggml-cpu.c",
 
 
 
 
40
  "ggml/src/ggml-quants.c",
41
- "ggml/src/ggml-metal.m"
 
42
  ],
43
- resources: [.process("ggml/src/ggml-metal.metal")],
44
  publicHeadersPath: "spm-headers",
45
  cSettings: [
46
  .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
47
- .define("GGML_USE_ACCELERATE"),
48
  .unsafeFlags(["-fno-objc-arc"]),
 
 
49
  .define("GGML_USE_METAL")
50
  // NOTE: NEW_LAPACK will required iOS version 16.4+
51
  // We should consider add this in the future when we drop support for iOS 14
 
28
  "tests",
29
  "CMakeLists.txt",
30
  "Makefile",
31
+ "ggml/src/ggml-metal/ggml-metal-embed.metal"
32
  ],
33
  sources: [
34
  "ggml/src/ggml.c",
 
36
  "ggml/src/ggml-aarch64.c",
37
  "ggml/src/ggml-alloc.c",
38
  "ggml/src/ggml-backend.cpp",
39
+ "ggml/src/ggml-backend-reg.cpp",
40
+ "ggml/src/ggml-cpu/ggml-cpu.c",
41
+ "ggml/src/ggml-cpu/ggml-cpu.cpp",
42
+ "ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
43
+ "ggml/src/ggml-cpu/ggml-cpu-quants.c",
44
  "ggml/src/ggml-quants.c",
45
+ "ggml/src/ggml-threading.cpp",
46
+ "ggml/src/ggml-metal/ggml-metal.m"
47
  ],
48
+ resources: [.process("ggml/src/ggml-metal/ggml-metal.metal")],
49
  publicHeadersPath: "spm-headers",
50
  cSettings: [
51
  .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
 
52
  .unsafeFlags(["-fno-objc-arc"]),
53
+ .headerSearchPath("ggml/src"),
54
+ .define("GGML_USE_ACCELERATE"),
55
  .define("GGML_USE_METAL")
56
  // NOTE: NEW_LAPACK will required iOS version 16.4+
57
  // We should consider add this in the future when we drop support for iOS 14
examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt CHANGED
@@ -19,11 +19,16 @@ if (NOT GGML_HOME)
19
  SOURCE_FILES
20
  ${SOURCE_FILES}
21
  ${WHISPER_LIB_DIR}/ggml/src/ggml.c
22
- ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu.c
23
  ${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
24
  ${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
25
  ${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
 
26
  ${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
 
 
 
 
 
27
  )
28
  endif()
29
 
 
19
  SOURCE_FILES
20
  ${SOURCE_FILES}
21
  ${WHISPER_LIB_DIR}/ggml/src/ggml.c
 
22
  ${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
23
  ${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
24
  ${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
25
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-backend-reg.cpp
26
  ${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
27
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-threading.cpp
28
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.c
29
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.cpp
30
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-aarch64.c
31
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
32
  )
33
  endif()
34
 
examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj CHANGED
@@ -25,6 +25,11 @@
25
  18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
26
  18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
27
  18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
 
 
 
 
 
28
  7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
29
  7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
30
  7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
@@ -50,8 +55,8 @@
50
  18133C7F2C64E342005CEAAC /* ggml-aarch64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-aarch64.c"; path = "../../../ggml/src/ggml-aarch64.c"; sourceTree = "<group>"; };
51
  184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
52
  184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
53
- 1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal.m"; sourceTree = "<group>"; };
54
- 1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal.metal"; sourceTree = "<group>"; };
55
  18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
56
  18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
57
  18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
@@ -77,8 +82,17 @@
77
  18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
78
  18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
79
  18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
80
- 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu.c"; sourceTree = "<group>"; };
81
  18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
 
 
 
 
 
 
 
 
 
82
  7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
83
  7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
84
  7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
@@ -118,6 +132,15 @@
118
  18627C7829052BDF00BD2A04 /* whisper.objc */ = {
119
  isa = PBXGroup;
120
  children = (
 
 
 
 
 
 
 
 
 
121
  18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
122
  18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
123
  18133C7F2C64E342005CEAAC /* ggml-aarch64.c */,
@@ -252,11 +275,16 @@
252
  18627C9629052C5800BD2A04 /* ggml.c in Sources */,
253
  18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
254
  7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
 
 
255
  1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
 
 
256
  18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
257
  18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
258
  18627C8C29052BE000BD2A04 /* main.m in Sources */,
259
  18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
 
260
  1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
261
  7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
262
  );
@@ -335,6 +363,7 @@
335
  GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
336
  GCC_WARN_UNUSED_FUNCTION = YES;
337
  GCC_WARN_UNUSED_VARIABLE = YES;
 
338
  IPHONEOS_DEPLOYMENT_TARGET = 16.0;
339
  MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
340
  MTL_FAST_MATH = YES;
@@ -388,6 +417,7 @@
388
  GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
389
  GCC_WARN_UNUSED_FUNCTION = YES;
390
  GCC_WARN_UNUSED_VARIABLE = YES;
 
391
  IPHONEOS_DEPLOYMENT_TARGET = 16.0;
392
  MTL_ENABLE_DEBUG_INFO = NO;
393
  MTL_FAST_MATH = YES;
@@ -410,6 +440,7 @@
410
  DEVELOPMENT_TEAM = P8JZH34X63;
411
  GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
412
  GENERATE_INFOPLIST_FILE = YES;
 
413
  INFOPLIST_FILE = whisper.objc/Info.plist;
414
  INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
415
  INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
@@ -439,6 +470,7 @@
439
  DEVELOPMENT_TEAM = P8JZH34X63;
440
  GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
441
  GENERATE_INFOPLIST_FILE = YES;
 
442
  INFOPLIST_FILE = whisper.objc/Info.plist;
443
  INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
444
  INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
 
25
  18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
26
  18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
27
  18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
28
+ 18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */; };
29
+ 18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */; };
30
+ 18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */; };
31
+ 18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */; };
32
+ 18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */; };
33
  7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
34
  7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
35
  7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
 
55
  18133C7F2C64E342005CEAAC /* ggml-aarch64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-aarch64.c"; path = "../../../ggml/src/ggml-aarch64.c"; sourceTree = "<group>"; };
56
  184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
57
  184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
58
+ 1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
59
+ 1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
60
  18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
61
  18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
62
  18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
 
82
  18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
83
  18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
84
  18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
85
+ 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.c"; sourceTree = "<group>"; };
86
  18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
87
+ 18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-threading.h"; path = "../../../ggml/src/ggml-threading.h"; sourceTree = "<group>"; };
88
+ 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-threading.cpp"; path = "../../../ggml/src/ggml-threading.cpp"; sourceTree = "<group>"; };
89
+ 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.cpp"; sourceTree = "<group>"; };
90
+ 18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-aarch64.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.h"; sourceTree = "<group>"; };
91
+ 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-aarch64.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.c"; sourceTree = "<group>"; };
92
+ 18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-impl.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-impl.h"; sourceTree = "<group>"; };
93
+ 18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-quants.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.h"; sourceTree = "<group>"; };
94
+ 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-quants.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.c"; sourceTree = "<group>"; };
95
+ 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-backend-reg.cpp"; path = "../../../ggml/src/ggml-backend-reg.cpp"; sourceTree = "<group>"; };
96
  7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
97
  7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
98
  7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
 
132
  18627C7829052BDF00BD2A04 /* whisper.objc */ = {
133
  isa = PBXGroup;
134
  children = (
135
+ 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */,
136
+ 18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */,
137
+ 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */,
138
+ 18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */,
139
+ 18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */,
140
+ 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */,
141
+ 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */,
142
+ 18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */,
143
+ 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */,
144
  18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
145
  18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
146
  18133C7F2C64E342005CEAAC /* ggml-aarch64.c */,
 
275
  18627C9629052C5800BD2A04 /* ggml.c in Sources */,
276
  18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
277
  7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
278
+ 18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */,
279
+ 18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */,
280
  1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
281
+ 18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */,
282
+ 18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */,
283
  18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
284
  18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
285
  18627C8C29052BE000BD2A04 /* main.m in Sources */,
286
  18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
287
+ 18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
288
  1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
289
  7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
290
  );
 
363
  GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
364
  GCC_WARN_UNUSED_FUNCTION = YES;
365
  GCC_WARN_UNUSED_VARIABLE = YES;
366
+ HEADER_SEARCH_PATHS = "";
367
  IPHONEOS_DEPLOYMENT_TARGET = 16.0;
368
  MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
369
  MTL_FAST_MATH = YES;
 
417
  GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
418
  GCC_WARN_UNUSED_FUNCTION = YES;
419
  GCC_WARN_UNUSED_VARIABLE = YES;
420
+ HEADER_SEARCH_PATHS = "";
421
  IPHONEOS_DEPLOYMENT_TARGET = 16.0;
422
  MTL_ENABLE_DEBUG_INFO = NO;
423
  MTL_FAST_MATH = YES;
 
440
  DEVELOPMENT_TEAM = P8JZH34X63;
441
  GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
442
  GENERATE_INFOPLIST_FILE = YES;
443
+ HEADER_SEARCH_PATHS = ../../../ggml/src/;
444
  INFOPLIST_FILE = whisper.objc/Info.plist;
445
  INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
446
  INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
 
470
  DEVELOPMENT_TEAM = P8JZH34X63;
471
  GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
472
  GENERATE_INFOPLIST_FILE = YES;
473
+ HEADER_SEARCH_PATHS = ../../../ggml/src/;
474
  INFOPLIST_FILE = whisper.objc/Info.plist;
475
  INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
476
  INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift CHANGED
@@ -67,8 +67,6 @@ actor WhisperContext {
67
  private func systemInfo() -> String {
68
  var info = ""
69
  if (ggml_cpu_has_neon() != 0) { info += "NEON " }
70
- if (ggml_cpu_has_metal() != 0) { info += "METAL " }
71
- if (ggml_cpu_has_blas() != 0) { info += "BLAS " }
72
  return String(info.dropLast())
73
  }
74
 
 
67
  private func systemInfo() -> String {
68
  var info = ""
69
  if (ggml_cpu_has_neon() != 0) { info += "NEON " }
 
 
70
  return String(info.dropLast())
71
  }
72
 
ggml/ggml_vk_generate_shaders.py DELETED
@@ -1,220 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import logging
4
- import argparse
5
- import asyncio
6
- import os
7
- from tempfile import gettempdir
8
-
9
- logger = logging.getLogger("ggml-vk-generate-shaders")
10
-
11
- GLSLC = "glslc"
12
-
13
- type_names = [
14
- "f32",
15
- "f16",
16
- "q4_0",
17
- "q4_1",
18
- "q5_0",
19
- "q5_1",
20
- "q8_0",
21
- "q2_k",
22
- "q3_k",
23
- "q4_k",
24
- "q5_k",
25
- "q6_k",
26
- ]
27
-
28
- ASYNCIO_CONCURRENCY = 64
29
-
30
- input_dir = "vulkan-shaders"
31
- output_dir = gettempdir()
32
-
33
- lock = asyncio.Lock()
34
- shader_fnames = []
35
-
36
-
37
- async def string_to_spv(name, in_fname, defines, fp16=True):
38
- name = f"{name}{'_fp32' if not fp16 else ''}"
39
- out_fname = os.path.join(output_dir, f"{name}.spv")
40
-
41
- in_path = os.path.join(input_dir, in_fname)
42
-
43
- cmd = [GLSLC, "-fshader-stage=compute", "--target-env=vulkan1.2", "-O", in_path, "-o", out_fname]
44
-
45
- cmd.extend([f"-D{key}={value}" for key, value in defines.items()])
46
-
47
- proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
48
-
49
- stdout, stderr = await proc.communicate()
50
-
51
- stdout = stdout.decode()
52
- error = stderr.decode()
53
-
54
- if proc.returncode:
55
- cmd = " ".join(cmd)
56
- logger.error(f"cannot compile {name}\n\n{cmd}\n\n{error}")
57
- return
58
-
59
- async with lock:
60
- shader_fnames.append((name, out_fname))
61
-
62
-
63
- def matmul_shaders(tasks, fp16, matmul_id):
64
- if fp16:
65
- load_vec = "8"
66
- aligned_b_type_f32 = "mat2x4"
67
- aligned_b_type_f16 = "f16mat2x4"
68
- else:
69
- load_vec = "4"
70
- aligned_b_type_f32 = "vec4"
71
- aligned_b_type_f16 = "f16vec4"
72
-
73
- base_dict = {"FLOAT_TYPE": "float" if not fp16 else "float16_t"}
74
- shader_name = "matmul"
75
-
76
- if matmul_id:
77
- base_dict["MUL_MAT_ID"] = "1"
78
- shader_name = "matmul_id"
79
-
80
- if fp16:
81
- base_dict["FLOAT16"] = "1"
82
-
83
- # Shaders with f16 B_TYPE
84
- tasks.append(string_to_spv(f"{shader_name}_f32_f16", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
85
- tasks.append(string_to_spv(f"{shader_name}_f32_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
86
-
87
- tasks.append(string_to_spv(f"{shader_name}_f16", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
88
- tasks.append(string_to_spv(f"{shader_name}_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
89
-
90
- for tname in type_names:
91
- data_a_key = f"DATA_A_{tname.upper()}"
92
- load_vec_a = load_vec if tname in ("f32", "f16") else "2"
93
- tasks.append(string_to_spv(f"{shader_name}_{tname}_f32", "mul_mm.comp", base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
94
- tasks.append(string_to_spv(f"{shader_name}_{tname}_f32_aligned", "mul_mm.comp", base_dict | {data_a_key: "2", "LOAD_VEC_A": load_vec_a, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f32, "D_TYPE": "float"}, fp16))
95
-
96
-
97
- async def main():
98
- logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V")
99
-
100
- tasks = []
101
-
102
- for fp16 in (False, True):
103
- # MUL_MAT
104
- matmul_shaders(tasks, fp16, False)
105
- # MUL_MAT_ID
106
- matmul_shaders(tasks, fp16, True)
107
-
108
- for tname in type_names:
109
- base_dict = {"FLOAT_TYPE": "float"}
110
-
111
- # mul mat vec
112
- data_a_key = f"DATA_A_{tname.upper()}"
113
- shader = f"mul_mat_vec_{tname}.comp" if tname.endswith("_k") else "mul_mat_vec.comp"
114
-
115
- tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f32_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
116
- tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f16_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float16_t", "D_TYPE": "float"}))
117
-
118
- tasks.append(string_to_spv(f"mul_mat_vec_id_{tname}_f32", shader, base_dict | {"MUL_MAT_ID": "1", data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
119
-
120
- # Dequant shaders
121
- if tname != "f16":
122
- tasks.append(string_to_spv(f"dequant_{tname}", f"dequant_{tname}.comp", base_dict | {data_a_key: "1", "D_TYPE": "float16_t"}))
123
-
124
- # get_rows
125
- if not tname.endswith("_k"):
126
- shader = "get_rows.comp" if tname in ("f32", "f16") else "get_rows_quant.comp"
127
-
128
- if tname == "f16":
129
- tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
130
- else:
131
- tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t"}))
132
- tasks.append(string_to_spv(f"get_rows_{tname}_f32", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float"}))
133
-
134
- tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
135
- tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
136
-
137
- # Norms
138
- tasks.append(string_to_spv("norm_f32", "norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
139
- tasks.append(string_to_spv("rms_norm_f32", "rms_norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
140
-
141
- tasks.append(string_to_spv("cpy_f32_f32", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
142
- tasks.append(string_to_spv("cpy_f32_f16", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
143
- tasks.append(string_to_spv("cpy_f16_f16", "copy.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
144
-
145
- tasks.append(string_to_spv("add_f32", "add.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
146
-
147
- tasks.append(string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {}))
148
-
149
- tasks.append(string_to_spv("mul_f32", "mul.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
150
-
151
- tasks.append(string_to_spv("div_f32", "div.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
152
-
153
- tasks.append(string_to_spv("scale_f32", "scale.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
154
-
155
- tasks.append(string_to_spv("sqr_f32", "square.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
156
-
157
- tasks.append(string_to_spv("clamp_f32", "clamp.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
158
-
159
- tasks.append(string_to_spv("gelu_f32", "gelu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
160
- tasks.append(string_to_spv("silu_f32", "silu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
161
- tasks.append(string_to_spv("relu_f32", "relu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
162
-
163
- tasks.append(string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
164
-
165
- tasks.append(string_to_spv("soft_max_f32", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}))
166
- tasks.append(string_to_spv("soft_max_f32_f16", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float16_t", "D_TYPE": "float"}))
167
-
168
- tasks.append(string_to_spv("rope_norm_f32", "rope_norm.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
169
- tasks.append(string_to_spv("rope_norm_f16", "rope_norm.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
170
-
171
- tasks.append(string_to_spv("rope_neox_f32", "rope_neox.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
172
- tasks.append(string_to_spv("rope_neox_f16", "rope_neox.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
173
-
174
- tasks.append(string_to_spv("argsort_f32", "argsort.comp", {"A_TYPE": "float"}))
175
-
176
- tasks.append(string_to_spv("sum_rows_f32", "sum_rows.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
177
-
178
- # Helper to decorate tasks with semaphore acquisition.
179
- async def withSemaphore(sem, task):
180
- async with sem:
181
- return await task
182
-
183
- # Run tasks concurrently guarded by a concurrency limit.
184
- sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY)
185
- await asyncio.gather(*(withSemaphore(sem, task) for task in tasks))
186
-
187
- with open("ggml-vulkan-shaders.hpp", "w") as f:
188
- f.write("#include <cstdint>\n\n")
189
- for name, path in sorted(shader_fnames):
190
-
191
- with open(path, "rb") as spv:
192
- counter = 0
193
- newline_counter = 0
194
- f.write(f"unsigned char {name}_data[] = {{\n")
195
- for val in spv.read():
196
- f.write(f"0x{val:02x},")
197
- newline_counter += 1
198
- counter += 1
199
- if newline_counter >= 12:
200
- newline_counter = 0
201
- f.write("\n")
202
- f.write("\n};\n")
203
- f.write(f"const uint64_t {name}_len = {counter};\n\n")
204
- os.remove(path)
205
-
206
-
207
- if __name__ == "__main__":
208
- parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator")
209
-
210
- parser.add_argument("--glslc", help="Path to glslc")
211
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
212
-
213
- args = parser.parse_args()
214
-
215
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
216
-
217
- if args.glslc:
218
- GLSLC = args.glslc
219
-
220
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ggml-cpu-impl.h DELETED
@@ -1,614 +0,0 @@
1
- #pragma once
2
-
3
- // GGML CPU internal header
4
-
5
- #include "ggml.h"
6
- #include "ggml-impl.h"
7
- #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
8
- //#include <stddef.h>
9
- #include <stdbool.h>
10
- #include <string.h> // memcpy
11
- #include <math.h> // fabsf
12
-
13
-
14
- #ifdef __cplusplus
15
- extern "C" {
16
- #endif
17
-
18
- #if defined(_MSC_VER)
19
-
20
- #define m512bh(p) p
21
- #define m512i(p) p
22
-
23
- #else
24
-
25
- #define m512bh(p) (__m512bh)(p)
26
- #define m512i(p) (__m512i)(p)
27
-
28
- #endif
29
-
30
- /**
31
- * Converts brain16 to float32.
32
- *
33
- * The bfloat16 floating point format has the following structure:
34
- *
35
- * ┌sign
36
- * │
37
- * │ ┌exponent
38
- * │ │
39
- * │ │ ┌mantissa
40
- * │ │ │
41
- * │┌──┴───┐┌─┴───┐
42
- * 0b0000000000000000 brain16
43
- *
44
- * Since bf16 has the same number of exponent bits as a 32bit float,
45
- * encoding and decoding numbers becomes relatively straightforward.
46
- *
47
- * ┌sign
48
- * │
49
- * │ ┌exponent
50
- * │ │
51
- * │ │ ┌mantissa
52
- * │ │ │
53
- * │┌──┴───┐┌─┴───────────────────┐
54
- * 0b00000000000000000000000000000000 IEEE binary32
55
- *
56
- * For comparison, the standard fp16 format has fewer exponent bits.
57
- *
58
- * ┌sign
59
- * │
60
- * │ ┌exponent
61
- * │ │
62
- * │ │ ┌mantissa
63
- * │ │ │
64
- * │┌─┴─┐┌─┴──────┐
65
- * 0b0000000000000000 IEEE binary16
66
- *
67
- * @see IEEE 754-2008
68
- */
69
- static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
70
- union {
71
- float f;
72
- uint32_t i;
73
- } u;
74
- u.i = (uint32_t)h.bits << 16;
75
- return u.f;
76
- }
77
-
78
- /**
79
- * Converts float32 to brain16.
80
- *
81
- * This is binary identical with Google Brain float conversion.
82
- * Floats shall round to nearest even, and NANs shall be quiet.
83
- * Subnormals aren't flushed to zero, except perhaps when used.
84
- * This code should vectorize nicely if using modern compilers.
85
- */
86
- static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
87
- ggml_bf16_t h;
88
- union {
89
- float f;
90
- uint32_t i;
91
- } u;
92
- u.f = s;
93
- if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
94
- h.bits = (u.i >> 16) | 64; /* force to quiet */
95
- return h;
96
- }
97
- h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
98
- return h;
99
- }
100
-
101
- #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
102
- #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
103
-
104
- // __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
105
- #if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
106
- #ifndef __FMA__
107
- #define __FMA__
108
- #endif
109
- #ifndef __F16C__
110
- #define __F16C__
111
- #endif
112
- #endif
113
-
114
- // __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
115
- #if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
116
- #ifndef __SSE3__
117
- #define __SSE3__
118
- #endif
119
- #ifndef __SSSE3__
120
- #define __SSSE3__
121
- #endif
122
- #endif
123
-
124
- #if defined(__ARM_FEATURE_SVE)
125
- #include <arm_sve.h>
126
- #include <sys/prctl.h>
127
- #endif
128
-
129
- // 16-bit float
130
- // on Arm, we use __fp16
131
- // on x86, we use uint16_t
132
- #if defined(__ARM_NEON)
133
-
134
- // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
135
- //
136
- // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
137
- //
138
- #include <arm_neon.h>
139
-
140
- #ifdef _MSC_VER
141
-
142
- typedef uint16_t ggml_fp16_internal_t;
143
-
144
- #define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
145
-
146
- #else
147
-
148
- typedef __fp16 ggml_fp16_internal_t;
149
-
150
- #define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
151
-
152
- #endif // _MSC_VER
153
-
154
- #if !defined(__aarch64__)
155
-
156
- // 32-bit ARM compatibility
157
-
158
- // vaddlvq_s16
159
- // vpaddq_s16
160
- // vpaddq_s32
161
- // vaddvq_s32
162
- // vaddvq_f32
163
- // vmaxvq_f32
164
- // vcvtnq_s32_f32
165
- // vzip1_u8
166
- // vzip2_u8
167
-
168
- inline static int32_t vaddlvq_s16(int16x8_t v) {
169
- int32x4_t v0 = vreinterpretq_s32_s64(vpaddlq_s32(vpaddlq_s16(v)));
170
- return vgetq_lane_s32(v0, 0) + vgetq_lane_s32(v0, 2);
171
- }
172
-
173
- inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
174
- int16x4_t a0 = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
175
- int16x4_t b0 = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
176
- return vcombine_s16(a0, b0);
177
- }
178
-
179
- inline static int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
180
- int32x2_t a0 = vpadd_s32(vget_low_s32(a), vget_high_s32(a));
181
- int32x2_t b0 = vpadd_s32(vget_low_s32(b), vget_high_s32(b));
182
- return vcombine_s32(a0, b0);
183
- }
184
-
185
- inline static int32_t vaddvq_s32(int32x4_t v) {
186
- return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
187
- }
188
-
189
- inline static float vaddvq_f32(float32x4_t v) {
190
- return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
191
- }
192
-
193
- inline static float vmaxvq_f32(float32x4_t v) {
194
- return
195
- MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
196
- MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
197
- }
198
-
199
- inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
200
- int32x4_t res;
201
-
202
- res[0] = roundf(vgetq_lane_f32(v, 0));
203
- res[1] = roundf(vgetq_lane_f32(v, 1));
204
- res[2] = roundf(vgetq_lane_f32(v, 2));
205
- res[3] = roundf(vgetq_lane_f32(v, 3));
206
-
207
- return res;
208
- }
209
-
210
- inline static uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
211
- uint8x8_t res;
212
-
213
- res[0] = a[0]; res[1] = b[0];
214
- res[2] = a[1]; res[3] = b[1];
215
- res[4] = a[2]; res[5] = b[2];
216
- res[6] = a[3]; res[7] = b[3];
217
-
218
- return res;
219
- }
220
-
221
- inline static uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
222
- uint8x8_t res;
223
-
224
- res[0] = a[4]; res[1] = b[4];
225
- res[2] = a[5]; res[3] = b[5];
226
- res[4] = a[6]; res[5] = b[6];
227
- res[6] = a[7]; res[7] = b[7];
228
-
229
- return res;
230
- }
231
-
232
- // vld1q_s16_x2
233
- // vld1q_u8_x2
234
- // vld1q_u8_x4
235
- // vld1q_s8_x2
236
- // vld1q_s8_x4
237
- // TODO: double-check these work correctly
238
-
239
- typedef struct ggml_int16x8x2_t {
240
- int16x8_t val[2];
241
- } ggml_int16x8x2_t;
242
-
243
- inline static ggml_int16x8x2_t ggml_vld1q_s16_x2(const int16_t * ptr) {
244
- ggml_int16x8x2_t res;
245
-
246
- res.val[0] = vld1q_s16(ptr + 0);
247
- res.val[1] = vld1q_s16(ptr + 8);
248
-
249
- return res;
250
- }
251
-
252
- typedef struct ggml_uint8x16x2_t {
253
- uint8x16_t val[2];
254
- } ggml_uint8x16x2_t;
255
-
256
- inline static ggml_uint8x16x2_t ggml_vld1q_u8_x2(const uint8_t * ptr) {
257
- ggml_uint8x16x2_t res;
258
-
259
- res.val[0] = vld1q_u8(ptr + 0);
260
- res.val[1] = vld1q_u8(ptr + 16);
261
-
262
- return res;
263
- }
264
-
265
- typedef struct ggml_uint8x16x4_t {
266
- uint8x16_t val[4];
267
- } ggml_uint8x16x4_t;
268
-
269
- inline static ggml_uint8x16x4_t ggml_vld1q_u8_x4(const uint8_t * ptr) {
270
- ggml_uint8x16x4_t res;
271
-
272
- res.val[0] = vld1q_u8(ptr + 0);
273
- res.val[1] = vld1q_u8(ptr + 16);
274
- res.val[2] = vld1q_u8(ptr + 32);
275
- res.val[3] = vld1q_u8(ptr + 48);
276
-
277
- return res;
278
- }
279
-
280
- typedef struct ggml_int8x16x2_t {
281
- int8x16_t val[2];
282
- } ggml_int8x16x2_t;
283
-
284
- inline static ggml_int8x16x2_t ggml_vld1q_s8_x2(const int8_t * ptr) {
285
- ggml_int8x16x2_t res;
286
-
287
- res.val[0] = vld1q_s8(ptr + 0);
288
- res.val[1] = vld1q_s8(ptr + 16);
289
-
290
- return res;
291
- }
292
-
293
- typedef struct ggml_int8x16x4_t {
294
- int8x16_t val[4];
295
- } ggml_int8x16x4_t;
296
-
297
- inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
298
- ggml_int8x16x4_t res;
299
-
300
- res.val[0] = vld1q_s8(ptr + 0);
301
- res.val[1] = vld1q_s8(ptr + 16);
302
- res.val[2] = vld1q_s8(ptr + 32);
303
- res.val[3] = vld1q_s8(ptr + 48);
304
-
305
- return res;
306
- }
307
-
308
- // NOTE: not tested
309
- inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
310
- int8x16_t res;
311
-
312
- res[ 0] = a[b[ 0]];
313
- res[ 1] = a[b[ 1]];
314
- res[ 2] = a[b[ 2]];
315
- res[ 3] = a[b[ 3]];
316
- res[ 4] = a[b[ 4]];
317
- res[ 5] = a[b[ 5]];
318
- res[ 6] = a[b[ 6]];
319
- res[ 7] = a[b[ 7]];
320
- res[ 8] = a[b[ 8]];
321
- res[ 9] = a[b[ 9]];
322
- res[10] = a[b[10]];
323
- res[11] = a[b[11]];
324
- res[12] = a[b[12]];
325
- res[13] = a[b[13]];
326
- res[14] = a[b[14]];
327
- res[15] = a[b[15]];
328
-
329
- return res;
330
- }
331
-
332
- // NOTE: not tested
333
- inline static uint8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
334
- uint8x16_t res;
335
-
336
- res[ 0] = a[b[ 0]];
337
- res[ 1] = a[b[ 1]];
338
- res[ 2] = a[b[ 2]];
339
- res[ 3] = a[b[ 3]];
340
- res[ 4] = a[b[ 4]];
341
- res[ 5] = a[b[ 5]];
342
- res[ 6] = a[b[ 6]];
343
- res[ 7] = a[b[ 7]];
344
- res[ 8] = a[b[ 8]];
345
- res[ 9] = a[b[ 9]];
346
- res[10] = a[b[10]];
347
- res[11] = a[b[11]];
348
- res[12] = a[b[12]];
349
- res[13] = a[b[13]];
350
- res[14] = a[b[14]];
351
- res[15] = a[b[15]];
352
-
353
- return res;
354
- }
355
-
356
- #else
357
-
358
- #define ggml_int16x8x2_t int16x8x2_t
359
- #define ggml_uint8x16x2_t uint8x16x2_t
360
- #define ggml_uint8x16x4_t uint8x16x4_t
361
- #define ggml_int8x16x2_t int8x16x2_t
362
- #define ggml_int8x16x4_t int8x16x4_t
363
-
364
- #define ggml_vld1q_s16_x2 vld1q_s16_x2
365
- #define ggml_vld1q_u8_x2 vld1q_u8_x2
366
- #define ggml_vld1q_u8_x4 vld1q_u8_x4
367
- #define ggml_vld1q_s8_x2 vld1q_s8_x2
368
- #define ggml_vld1q_s8_x4 vld1q_s8_x4
369
- #define ggml_vqtbl1q_s8 vqtbl1q_s8
370
- #define ggml_vqtbl1q_u8 vqtbl1q_u8
371
-
372
- #endif // !defined(__aarch64__)
373
-
374
- #if !defined(__ARM_FEATURE_DOTPROD)
375
-
376
- inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) {
377
- const int16x8_t p0 = vmull_s8(vget_low_s8 (a), vget_low_s8 (b));
378
- const int16x8_t p1 = vmull_s8(vget_high_s8(a), vget_high_s8(b));
379
-
380
- return vaddq_s32(acc, vaddq_s32(vpaddlq_s16(p0), vpaddlq_s16(p1)));
381
- }
382
-
383
- #else
384
-
385
- #define ggml_vdotq_s32(a, b, c) vdotq_s32(a, b, c)
386
-
387
- #endif // !defined(__ARM_FEATURE_DOTPROD)
388
-
389
- #endif // defined(__ARM_NEON)
390
-
391
- #if defined(__ARM_NEON) && !defined(_MSC_VER)
392
-
393
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
394
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
395
-
396
- #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
397
-
398
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
399
- ggml_fp16_internal_t tmp;
400
- memcpy(&tmp, &h, sizeof(ggml_fp16_t));
401
- return (float)tmp;
402
- }
403
-
404
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
405
- ggml_fp16_t res;
406
- ggml_fp16_internal_t tmp = f;
407
- memcpy(&res, &tmp, sizeof(ggml_fp16_t));
408
- return res;
409
- }
410
-
411
- #else
412
-
413
- #ifdef __wasm_simd128__
414
- #include <wasm_simd128.h>
415
- #else
416
- #ifdef __POWER9_VECTOR__
417
- #include <altivec.h>
418
- #undef bool
419
- #define bool _Bool
420
- #else
421
- #if defined(_MSC_VER) || defined(__MINGW32__)
422
- #include <intrin.h>
423
- #else
424
- #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
425
- #if !defined(__riscv)
426
- #include <immintrin.h>
427
- #endif
428
- #endif
429
- #endif
430
- #endif
431
- #endif
432
-
433
- #ifdef __riscv_v_intrinsic
434
- #include <riscv_vector.h>
435
- #endif
436
-
437
- #if defined(__loongarch64)
438
- #if defined(__loongarch_asx)
439
- #include <lasxintrin.h>
440
- #endif
441
- #if defined(__loongarch_sx)
442
- #include <lsxintrin.h>
443
- #endif
444
- #endif
445
-
446
- #if defined(__loongarch_asx)
447
-
448
- typedef union {
449
- int32_t i;
450
- float f;
451
- } ft_union;
452
-
453
- /* float type data load instructions */
454
- static __m128 __lsx_vreplfr2vr_s(float val) {
455
- ft_union fi_tmpval = {.f = val};
456
- return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
457
- }
458
-
459
- static __m256 __lasx_xvreplfr2vr_s(float val) {
460
- ft_union fi_tmpval = {.f = val};
461
- return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
462
- }
463
- #endif
464
-
465
- #ifdef __F16C__
466
-
467
- #ifdef _MSC_VER
468
- #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
469
- #define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
470
- #else
471
- #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
472
- #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
473
- #endif
474
-
475
- #elif defined(__POWER9_VECTOR__)
476
-
477
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
478
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
479
- /* the inline asm below is about 12% faster than the lookup method */
480
- #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
481
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
482
-
483
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
484
- register float f;
485
- register double d;
486
- __asm__(
487
- "mtfprd %0,%2\n"
488
- "xscvhpdp %0,%0\n"
489
- "frsp %1,%0\n" :
490
- /* temp */ "=d"(d),
491
- /* out */ "=f"(f):
492
- /* in */ "r"(h));
493
- return f;
494
- }
495
-
496
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
497
- register double d;
498
- register ggml_fp16_t r;
499
- __asm__( /* xscvdphp can work on double or single precision */
500
- "xscvdphp %0,%2\n"
501
- "mffprd %1,%0\n" :
502
- /* temp */ "=d"(d),
503
- /* out */ "=r"(r):
504
- /* in */ "f"(f));
505
- return r;
506
- }
507
-
508
- #else
509
-
510
- // FP16 <-> FP32
511
- // ref: https://github.com/Maratyszcza/FP16
512
-
513
- static inline float fp32_from_bits(uint32_t w) {
514
- union {
515
- uint32_t as_bits;
516
- float as_value;
517
- } fp32;
518
- fp32.as_bits = w;
519
- return fp32.as_value;
520
- }
521
-
522
- static inline uint32_t fp32_to_bits(float f) {
523
- union {
524
- float as_value;
525
- uint32_t as_bits;
526
- } fp32;
527
- fp32.as_value = f;
528
- return fp32.as_bits;
529
- }
530
-
531
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
532
- const uint32_t w = (uint32_t) h << 16;
533
- const uint32_t sign = w & UINT32_C(0x80000000);
534
- const uint32_t two_w = w + w;
535
-
536
- const uint32_t exp_offset = UINT32_C(0xE0) << 23;
537
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
538
- const float exp_scale = 0x1.0p-112f;
539
- #else
540
- const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
541
- #endif
542
- const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
543
-
544
- const uint32_t magic_mask = UINT32_C(126) << 23;
545
- const float magic_bias = 0.5f;
546
- const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
547
-
548
- const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
549
- const uint32_t result = sign |
550
- (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
551
- return fp32_from_bits(result);
552
- }
553
-
554
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
555
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
556
- const float scale_to_inf = 0x1.0p+112f;
557
- const float scale_to_zero = 0x1.0p-110f;
558
- #else
559
- const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
560
- const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
561
- #endif
562
- float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
563
-
564
- const uint32_t w = fp32_to_bits(f);
565
- const uint32_t shl1_w = w + w;
566
- const uint32_t sign = w & UINT32_C(0x80000000);
567
- uint32_t bias = shl1_w & UINT32_C(0xFF000000);
568
- if (bias < UINT32_C(0x71000000)) {
569
- bias = UINT32_C(0x71000000);
570
- }
571
-
572
- base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
573
- const uint32_t bits = fp32_to_bits(base);
574
- const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
575
- const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
576
- const uint32_t nonsign = exp_bits + mantissa_bits;
577
- return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
578
- }
579
-
580
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
581
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
582
-
583
- #endif // __F16C__
584
-
585
- #endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
586
-
587
- #ifdef __ARM_FEATURE_SVE
588
- #include <arm_sve.h>
589
- #endif // __ARM_FEATURE_SVE
590
-
591
- // precomputed f32 table for f16 (256 KB)
592
- // defined in ggml.c, initialized in ggml_init()
593
- extern float ggml_table_f32_f16[1 << 16];
594
-
595
- // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
596
- // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
597
- // This is also true for POWER9.
598
- #if !defined(GGML_FP16_TO_FP32)
599
- inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
600
- uint16_t s;
601
- memcpy(&s, &f, sizeof(uint16_t));
602
- return ggml_table_f32_f16[s];
603
- }
604
-
605
- #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
606
- #endif
607
-
608
- #if !defined(GGML_FP32_TO_FP16)
609
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
610
- #endif
611
-
612
- #ifdef __cplusplus
613
- }
614
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/whisper.cpp CHANGED
@@ -4268,18 +4268,15 @@ const char * whisper_print_system_info(void) {
4268
  s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
4269
  s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
4270
  s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
4271
- s += "METAL = " + std::to_string(ggml_cpu_has_metal()) + " | ";
4272
  s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
4273
  s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
4274
  s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
4275
- s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
4276
  s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
4277
  s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
4278
  s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
4279
- s += "CUDA = " + std::to_string(ggml_cpu_has_cuda()) + " | ";
4280
  s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
4281
  s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
4282
- s += "CANN = " + std::to_string(ggml_cpu_has_cann()) ;
4283
  return s.c_str();
4284
  }
4285
 
 
4268
  s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
4269
  s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
4270
  s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
 
4271
  s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
4272
  s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
4273
  s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
 
4274
  s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
4275
  s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
4276
  s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
 
4277
  s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
4278
  s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
4279
+
4280
  return s.c_str();
4281
  }
4282