Diego Devesa commited on
Commit
ba20d5c
·
1 Parent(s): 68520c4

ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (llama/10167)

Browse files
Files changed (2) hide show
  1. ggml/src/ggml-cpu.c +2 -3
  2. ggml/src/ggml.c +6 -3
ggml/src/ggml-cpu.c CHANGED
@@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
304
  .nrows = 1,
305
  },
306
  [GGML_TYPE_Q8_0] = {
 
307
  .vec_dot = ggml_vec_dot_q8_0_q8_0,
308
  .vec_dot_type = GGML_TYPE_Q8_0,
309
  #if defined (__ARM_FEATURE_MATMUL_INT8)
@@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) {
13692
  uint16_t u16;
13693
  ggml_fp16_t fp16;
13694
  } u = {i};
13695
- // FIXME: this table is used in conversion functions outside of compute
13696
- // current code depends on ggml_init initializing this table
13697
- float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
13698
  ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
13699
  ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
13700
  }
 
304
  .nrows = 1,
305
  },
306
  [GGML_TYPE_Q8_0] = {
307
+ .from_float_to_mat = quantize_mat_q8_0,
308
  .vec_dot = ggml_vec_dot_q8_0_q8_0,
309
  .vec_dot_type = GGML_TYPE_Q8_0,
310
  #if defined (__ARM_FEATURE_MATMUL_INT8)
 
13693
  uint16_t u16;
13694
  ggml_fp16_t fp16;
13695
  } u = {i};
13696
+ float f = GGML_FP16_TO_FP32(u.fp16);
 
 
13697
  ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
13698
  ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
13699
  }
ggml/src/ggml.c CHANGED
@@ -220,8 +220,10 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
220
 
221
 
222
  void * ggml_aligned_malloc(size_t size) {
 
 
223
  #if defined(_MSC_VER) || defined(__MINGW32__)
224
- return _aligned_malloc(size, TENSOR_ALIGNMENT);
225
  #else
226
  if (size == 0) {
227
  GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
@@ -229,8 +231,9 @@ void * ggml_aligned_malloc(size_t size) {
229
  }
230
  void * aligned_memory = NULL;
231
  #ifdef GGML_USE_CPU_HBM
232
- int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
233
  #elif TARGET_OS_OSX
 
234
  kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
235
  int result = EFAULT;
236
  switch (alloc_status) {
@@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) {
248
  break;
249
  }
250
  #else
251
- int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
252
  #endif
253
  if (result != 0) {
254
  // Handle allocation failure
 
220
 
221
 
222
  void * ggml_aligned_malloc(size_t size) {
223
+ const int alignment = 64;
224
+
225
  #if defined(_MSC_VER) || defined(__MINGW32__)
226
+ return _aligned_malloc(size, alignment);
227
  #else
228
  if (size == 0) {
229
  GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
 
231
  }
232
  void * aligned_memory = NULL;
233
  #ifdef GGML_USE_CPU_HBM
234
+ int result = hbw_posix_memalign(&aligned_memory, alignment, size);
235
  #elif TARGET_OS_OSX
236
+ GGML_UNUSED(alignment);
237
  kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
238
  int result = EFAULT;
239
  switch (alloc_status) {
 
251
  break;
252
  }
253
  #else
254
+ int result = posix_memalign(&aligned_memory, alignment, size);
255
  #endif
256
  if (result != 0) {
257
  // Handle allocation failure