Spaces:
Sleeping
Sleeping
Commit
·
a67a8ec
1
Parent(s):
c90c972
SYCL: Migrate away from deprecated ggml_tensor->backend (llama/10840)
Browse files* Migrate to tensor->buffer for checking backend buffer type: 1
* SYCL: common.cpp try to migrate away from tensor->backend
* SYCL: fix assertions and add proper comments
* SYCL: remove extra space
* SYCL: Add back static to ggml_backend_buffer_is_sycl_split function
* SYCL: Add pragma directive to suppress warning spam
* SYCL: Integrate debug logs with GGML_LOG and other fixes
* Revert "SYCL: Integrate debug logs with GGML_LOG and other fixes"
This reverts commit 2607b7de0f0d2f4f1f690226f86fa861aa39cb97.
Let's keep the current SYCL specific logging mechanism for now
* SYCL: Use GGML_SYCL_DEBUG after reverting
* SYCL: reg_get_proc_address func, update to the current func signature
* SYCL: Refactor SYCL buffer checks in ggml_sycl_cpy_tensor_2d
ggml/src/ggml-sycl/common.cpp
CHANGED
|
@@ -11,6 +11,8 @@
|
|
| 11 |
//
|
| 12 |
|
| 13 |
#include "common.hpp"
|
|
|
|
|
|
|
| 14 |
#include "ggml-impl.h"
|
| 15 |
|
| 16 |
int get_current_device_id() {
|
|
@@ -65,9 +67,9 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|
| 65 |
const ggml_sycl_op_flatten_t op) try {
|
| 66 |
|
| 67 |
const bool use_src1 = src1 != nullptr;
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
GGML_ASSERT(
|
| 71 |
|
| 72 |
// dd = data device
|
| 73 |
float * src0_ddf = (float *) src0->data;
|
|
|
|
| 11 |
//
|
| 12 |
|
| 13 |
#include "common.hpp"
|
| 14 |
+
|
| 15 |
+
#include "ggml-backend-impl.h"
|
| 16 |
#include "ggml-impl.h"
|
| 17 |
|
| 18 |
int get_current_device_id() {
|
|
|
|
| 67 |
const ggml_sycl_op_flatten_t op) try {
|
| 68 |
|
| 69 |
const bool use_src1 = src1 != nullptr;
|
| 70 |
+
if(use_src1)
|
| 71 |
+
GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
|
| 72 |
+
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
|
| 73 |
|
| 74 |
// dd = data device
|
| 75 |
float * src0_ddf = (float *) src0->data;
|
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -26,7 +26,11 @@
|
|
| 26 |
|
| 27 |
#define GGML_COMMON_DECL_SYCL
|
| 28 |
#define GGML_COMMON_IMPL_SYCL
|
|
|
|
|
|
|
|
|
|
| 29 |
#include "ggml-common.h"
|
|
|
|
| 30 |
|
| 31 |
void* ggml_sycl_host_malloc(size_t size);
|
| 32 |
void ggml_sycl_host_free(void* ptr);
|
|
|
|
| 26 |
|
| 27 |
#define GGML_COMMON_DECL_SYCL
|
| 28 |
#define GGML_COMMON_IMPL_SYCL
|
| 29 |
+
/* suppress warning spam */
|
| 30 |
+
#pragma clang diagnostic push
|
| 31 |
+
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
| 32 |
#include "ggml-common.h"
|
| 33 |
+
#pragma clang diagnostic pop
|
| 34 |
|
| 35 |
void* ggml_sycl_host_malloc(size_t size);
|
| 36 |
void ggml_sycl_host_free(void* ptr);
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -288,10 +288,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 288 |
ggml_tensor *tensor) try {
|
| 289 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 290 |
|
| 291 |
-
if (tensor->view_src != NULL
|
| 292 |
assert(tensor->view_src->buffer->buft == buffer->buft);
|
| 293 |
-
tensor->backend = tensor->view_src->backend;
|
| 294 |
-
tensor->extra = tensor->view_src->extra;
|
| 295 |
return;
|
| 296 |
}
|
| 297 |
|
|
@@ -539,7 +537,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
|
| 539 |
auto dev_count = ggml_backend_sycl_get_device_count();
|
| 540 |
|
| 541 |
if (device>=dev_count or device<0) {
|
| 542 |
-
|
| 543 |
device, dev_count-1);
|
| 544 |
GGML_ASSERT(device<dev_count);
|
| 545 |
}
|
|
@@ -567,7 +565,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_conte
|
|
| 567 |
|
| 568 |
int device = ctx->device;
|
| 569 |
if (device>=ggml_sycl_info().device_count or device<0) {
|
| 570 |
-
|
| 571 |
device, ggml_sycl_info().device_count-1);
|
| 572 |
GGML_ASSERT(device<ggml_sycl_info().device_count);
|
| 573 |
}
|
|
@@ -746,7 +744,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 746 |
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
| 747 |
}
|
| 748 |
|
| 749 |
-
// FIXME: do not crash if
|
| 750 |
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
|
| 751 |
ggml_sycl_set_device(i);
|
| 752 |
const queue_ptr stream = ctx->streams[i];
|
|
@@ -788,7 +786,6 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 788 |
CHECK_TRY_ERROR(extra->events[i][is] = new sycl::event()));
|
| 789 |
}
|
| 790 |
}
|
| 791 |
-
tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 792 |
tensor->extra = extra;
|
| 793 |
}
|
| 794 |
catch (sycl::exception const &exc) {
|
|
@@ -2349,12 +2346,22 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|
| 2349 |
|
| 2350 |
dpct::memcpy_direction kind;
|
| 2351 |
char * src_ptr;
|
| 2352 |
-
if (src->
|
| 2353 |
kind = dpct::host_to_device;
|
|
|
|
| 2354 |
src_ptr = (char *) src->data;
|
| 2355 |
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
|
| 2356 |
-
} else if (src->
|
| 2357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2358 |
kind = dpct::device_to_device;
|
| 2359 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
| 2360 |
int id;
|
|
@@ -2857,8 +2864,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 2857 |
const int nb2 = dst->nb[2];
|
| 2858 |
const int nb3 = dst->nb[3];
|
| 2859 |
|
| 2860 |
-
GGML_ASSERT(dst->
|
| 2861 |
-
GGML_ASSERT(src1->
|
| 2862 |
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
|
| 2863 |
|
| 2864 |
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
|
|
@@ -2878,7 +2885,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 2878 |
|
| 2879 |
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
|
| 2880 |
|
| 2881 |
-
const bool split = src0->
|
| 2882 |
GGML_ASSERT(!(split && ne02 > 1));
|
| 2883 |
GGML_ASSERT(!(split && ne03 > 1));
|
| 2884 |
GGML_ASSERT(!(split && ne02 < ne12));
|
|
@@ -3198,7 +3205,7 @@ static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const gg
|
|
| 3198 |
const ggml_tensor *src1,
|
| 3199 |
ggml_tensor *dst) try {
|
| 3200 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 3201 |
-
GGML_ASSERT(src0->
|
| 3202 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
|
| 3203 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
|
| 3204 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
@@ -3231,7 +3238,7 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml
|
|
| 3231 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 3232 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 3233 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 3234 |
-
GGML_ASSERT(src0->
|
| 3235 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 3236 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 3237 |
|
|
@@ -3293,7 +3300,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx,
|
|
| 3293 |
ggml_tensor *dst) try {
|
| 3294 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 3295 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 3296 |
-
GGML_ASSERT(src0->
|
| 3297 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 3298 |
|
| 3299 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
@@ -4638,10 +4645,9 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re
|
|
| 4638 |
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
|
| 4639 |
GGML_UNUSED(reg);
|
| 4640 |
|
| 4641 |
-
|
| 4642 |
-
|
| 4643 |
-
|
| 4644 |
-
//}
|
| 4645 |
|
| 4646 |
// SYCL doesn't support registering host memory, left here for reference
|
| 4647 |
// "ggml_backend_register_host_buffer"
|
|
|
|
| 288 |
ggml_tensor *tensor) try {
|
| 289 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 290 |
|
| 291 |
+
if (tensor->view_src != NULL) {
|
| 292 |
assert(tensor->view_src->buffer->buft == buffer->buft);
|
|
|
|
|
|
|
| 293 |
return;
|
| 294 |
}
|
| 295 |
|
|
|
|
| 537 |
auto dev_count = ggml_backend_sycl_get_device_count();
|
| 538 |
|
| 539 |
if (device>=dev_count or device<0) {
|
| 540 |
+
GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
| 541 |
device, dev_count-1);
|
| 542 |
GGML_ASSERT(device<dev_count);
|
| 543 |
}
|
|
|
|
| 565 |
|
| 566 |
int device = ctx->device;
|
| 567 |
if (device>=ggml_sycl_info().device_count or device<0) {
|
| 568 |
+
GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
| 569 |
device, ggml_sycl_info().device_count-1);
|
| 570 |
GGML_ASSERT(device<ggml_sycl_info().device_count);
|
| 571 |
}
|
|
|
|
| 744 |
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
| 745 |
}
|
| 746 |
|
| 747 |
+
// FIXME: do not crash if SYCL Buffer alloc fails
|
| 748 |
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
|
| 749 |
ggml_sycl_set_device(i);
|
| 750 |
const queue_ptr stream = ctx->streams[i];
|
|
|
|
| 786 |
CHECK_TRY_ERROR(extra->events[i][is] = new sycl::event()));
|
| 787 |
}
|
| 788 |
}
|
|
|
|
| 789 |
tensor->extra = extra;
|
| 790 |
}
|
| 791 |
catch (sycl::exception const &exc) {
|
|
|
|
| 2346 |
|
| 2347 |
dpct::memcpy_direction kind;
|
| 2348 |
char * src_ptr;
|
| 2349 |
+
if (ggml_backend_buffer_is_host(src->buffer)) {
|
| 2350 |
kind = dpct::host_to_device;
|
| 2351 |
+
//GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n", __func__);
|
| 2352 |
src_ptr = (char *) src->data;
|
| 2353 |
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
|
| 2354 |
+
} else if (ggml_backend_buffer_is_sycl(src->buffer)) {
|
| 2355 |
+
// If buffer is a SYCL buffer
|
| 2356 |
+
//GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__);
|
| 2357 |
+
kind = dpct::device_to_device;
|
| 2358 |
+
src_ptr = (char *) src->data;
|
| 2359 |
+
} else if (ggml_backend_buffer_is_sycl_split(src->buffer)) {
|
| 2360 |
+
/*
|
| 2361 |
+
If buffer is a SYCL split buffer
|
| 2362 |
+
*/
|
| 2363 |
+
//GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__);
|
| 2364 |
+
GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]);
|
| 2365 |
kind = dpct::device_to_device;
|
| 2366 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
| 2367 |
int id;
|
|
|
|
| 2864 |
const int nb2 = dst->nb[2];
|
| 2865 |
const int nb3 = dst->nb[3];
|
| 2866 |
|
| 2867 |
+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
|
| 2868 |
+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer));
|
| 2869 |
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
|
| 2870 |
|
| 2871 |
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
|
|
|
|
| 2885 |
|
| 2886 |
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
|
| 2887 |
|
| 2888 |
+
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
|
| 2889 |
GGML_ASSERT(!(split && ne02 > 1));
|
| 2890 |
GGML_ASSERT(!(split && ne03 > 1));
|
| 2891 |
GGML_ASSERT(!(split && ne02 < ne12));
|
|
|
|
| 3205 |
const ggml_tensor *src1,
|
| 3206 |
ggml_tensor *dst) try {
|
| 3207 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 3208 |
+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
|
| 3209 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
|
| 3210 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
|
| 3211 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
|
|
| 3238 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 3239 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 3240 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 3241 |
+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
|
| 3242 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 3243 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 3244 |
|
|
|
|
| 3300 |
ggml_tensor *dst) try {
|
| 3301 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 3302 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 3303 |
+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
|
| 3304 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 3305 |
|
| 3306 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
|
|
| 4645 |
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
|
| 4646 |
GGML_UNUSED(reg);
|
| 4647 |
|
| 4648 |
+
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
| 4649 |
+
return (void *)ggml_backend_sycl_split_buffer_type;
|
| 4650 |
+
}
|
|
|
|
| 4651 |
|
| 4652 |
// SYCL doesn't support registering host memory, left here for reference
|
| 4653 |
// "ggml_backend_register_host_buffer"
|