Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
ba20d5c
1
Parent(s):
68520c4
ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (llama/10167)
Browse files- ggml/src/ggml-cpu.c +2 -3
- ggml/src/ggml.c +6 -3
ggml/src/ggml-cpu.c
CHANGED
|
@@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
| 304 |
.nrows = 1,
|
| 305 |
},
|
| 306 |
[GGML_TYPE_Q8_0] = {
|
|
|
|
| 307 |
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
| 308 |
.vec_dot_type = GGML_TYPE_Q8_0,
|
| 309 |
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
|
@@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) {
|
|
| 13692 |
uint16_t u16;
|
| 13693 |
ggml_fp16_t fp16;
|
| 13694 |
} u = {i};
|
| 13695 |
-
|
| 13696 |
-
// current code depends on ggml_init initializing this table
|
| 13697 |
-
float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
| 13698 |
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
|
| 13699 |
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
| 13700 |
}
|
|
|
|
| 304 |
.nrows = 1,
|
| 305 |
},
|
| 306 |
[GGML_TYPE_Q8_0] = {
|
| 307 |
+
.from_float_to_mat = quantize_mat_q8_0,
|
| 308 |
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
| 309 |
.vec_dot_type = GGML_TYPE_Q8_0,
|
| 310 |
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
|
|
|
| 13693 |
uint16_t u16;
|
| 13694 |
ggml_fp16_t fp16;
|
| 13695 |
} u = {i};
|
| 13696 |
+
float f = GGML_FP16_TO_FP32(u.fp16);
|
|
|
|
|
|
|
| 13697 |
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
|
| 13698 |
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
| 13699 |
}
|
ggml/src/ggml.c
CHANGED
|
@@ -220,8 +220,10 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
|
|
| 220 |
|
| 221 |
|
| 222 |
void * ggml_aligned_malloc(size_t size) {
|
|
|
|
|
|
|
| 223 |
#if defined(_MSC_VER) || defined(__MINGW32__)
|
| 224 |
-
return _aligned_malloc(size,
|
| 225 |
#else
|
| 226 |
if (size == 0) {
|
| 227 |
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
|
|
@@ -229,8 +231,9 @@ void * ggml_aligned_malloc(size_t size) {
|
|
| 229 |
}
|
| 230 |
void * aligned_memory = NULL;
|
| 231 |
#ifdef GGML_USE_CPU_HBM
|
| 232 |
-
int result = hbw_posix_memalign(&aligned_memory,
|
| 233 |
#elif TARGET_OS_OSX
|
|
|
|
| 234 |
kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
|
| 235 |
int result = EFAULT;
|
| 236 |
switch (alloc_status) {
|
|
@@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) {
|
|
| 248 |
break;
|
| 249 |
}
|
| 250 |
#else
|
| 251 |
-
int result = posix_memalign(&aligned_memory,
|
| 252 |
#endif
|
| 253 |
if (result != 0) {
|
| 254 |
// Handle allocation failure
|
|
|
|
| 220 |
|
| 221 |
|
| 222 |
void * ggml_aligned_malloc(size_t size) {
|
| 223 |
+
const int alignment = 64;
|
| 224 |
+
|
| 225 |
#if defined(_MSC_VER) || defined(__MINGW32__)
|
| 226 |
+
return _aligned_malloc(size, alignment);
|
| 227 |
#else
|
| 228 |
if (size == 0) {
|
| 229 |
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
|
|
|
|
| 231 |
}
|
| 232 |
void * aligned_memory = NULL;
|
| 233 |
#ifdef GGML_USE_CPU_HBM
|
| 234 |
+
int result = hbw_posix_memalign(&aligned_memory, alignment, size);
|
| 235 |
#elif TARGET_OS_OSX
|
| 236 |
+
GGML_UNUSED(alignment);
|
| 237 |
kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
|
| 238 |
int result = EFAULT;
|
| 239 |
switch (alloc_status) {
|
|
|
|
| 251 |
break;
|
| 252 |
}
|
| 253 |
#else
|
| 254 |
+
int result = posix_memalign(&aligned_memory, alignment, size);
|
| 255 |
#endif
|
| 256 |
if (result != 0) {
|
| 257 |
// Handle allocation failure
|