Spaces:
Running
Running
Romain Biessy
commited on
Commit
·
4da3fb6
1
Parent(s):
09c03ad
sycl: Add more debug prints (llama/13640)
Browse files- ggml/src/ggml-sycl/binbcast.cpp +5 -10
- ggml/src/ggml-sycl/common.hpp +87 -5
- ggml/src/ggml-sycl/concat.cpp +31 -33
- ggml/src/ggml-sycl/conv.cpp +1 -0
- ggml/src/ggml-sycl/cpy.cpp +4 -5
- ggml/src/ggml-sycl/dmmv.cpp +2 -0
- ggml/src/ggml-sycl/element_wise.cpp +24 -49
- ggml/src/ggml-sycl/getrows.cpp +1 -3
- ggml/src/ggml-sycl/ggml-sycl.cpp +84 -35
- ggml/src/ggml-sycl/gla.cpp +1 -0
- ggml/src/ggml-sycl/mmvq.cpp +2 -0
- ggml/src/ggml-sycl/outprod.cpp +1 -0
- ggml/src/ggml-sycl/rope.cpp +1 -2
- ggml/src/ggml-sycl/softmax.cpp +1 -4
- ggml/src/ggml-sycl/tsembd.cpp +2 -3
- ggml/src/ggml-sycl/wkv.cpp +2 -14
ggml/src/ggml-sycl/binbcast.cpp
CHANGED
|
@@ -319,32 +319,27 @@ inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor *ds
|
|
| 319 |
|
| 320 |
|
| 321 |
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 322 |
-
|
| 323 |
ggml_sycl_op_add(ctx, dst);
|
| 324 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 325 |
}
|
| 326 |
|
| 327 |
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 328 |
-
|
| 329 |
ggml_sycl_op_sub(ctx, dst);
|
| 330 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 331 |
}
|
| 332 |
|
| 333 |
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 334 |
-
|
| 335 |
ggml_sycl_op_mul(ctx, dst);
|
| 336 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 337 |
}
|
| 338 |
|
| 339 |
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 340 |
-
|
| 341 |
ggml_sycl_op_div(ctx, dst);
|
| 342 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 343 |
}
|
| 344 |
|
| 345 |
void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 346 |
-
|
| 347 |
ggml_sycl_op_repeat(ctx, dst);
|
| 348 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 349 |
}
|
| 350 |
|
|
|
|
| 319 |
|
| 320 |
|
| 321 |
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 322 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 323 |
ggml_sycl_op_add(ctx, dst);
|
|
|
|
| 324 |
}
|
| 325 |
|
| 326 |
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 327 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 328 |
ggml_sycl_op_sub(ctx, dst);
|
|
|
|
| 329 |
}
|
| 330 |
|
| 331 |
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 332 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 333 |
ggml_sycl_op_mul(ctx, dst);
|
|
|
|
| 334 |
}
|
| 335 |
|
| 336 |
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 337 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 338 |
ggml_sycl_op_div(ctx, dst);
|
|
|
|
| 339 |
}
|
| 340 |
|
| 341 |
void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 342 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 343 |
ggml_sycl_op_repeat(ctx, dst);
|
|
|
|
| 344 |
}
|
| 345 |
|
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -15,6 +15,7 @@
|
|
| 15 |
|
| 16 |
#include <fstream>
|
| 17 |
#include <iostream>
|
|
|
|
| 18 |
|
| 19 |
#include "dpct/helper.hpp"
|
| 20 |
#include "ggml-sycl.h"
|
|
@@ -44,11 +45,20 @@ extern int g_ggml_sycl_debug;
|
|
| 44 |
extern int g_ggml_sycl_disable_optimize;
|
| 45 |
extern int g_ggml_sycl_prioritize_dmmv;
|
| 46 |
|
| 47 |
-
#
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
#define CHECK_TRY_ERROR(expr) \
|
| 54 |
[&]() { \
|
|
@@ -490,4 +500,76 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
|
|
| 490 |
}
|
| 491 |
|
| 492 |
bool gpu_has_xmx(sycl::device &dev);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
#endif // GGML_SYCL_COMMON_HPP
|
|
|
|
| 15 |
|
| 16 |
#include <fstream>
|
| 17 |
#include <iostream>
|
| 18 |
+
#include <string>
|
| 19 |
|
| 20 |
#include "dpct/helper.hpp"
|
| 21 |
#include "ggml-sycl.h"
|
|
|
|
| 45 |
extern int g_ggml_sycl_disable_optimize;
|
| 46 |
extern int g_ggml_sycl_prioritize_dmmv;
|
| 47 |
|
| 48 |
+
#if defined(__clang__) && __has_builtin(__builtin_expect)
|
| 49 |
+
// Hint the optimizer to pipeline the more likely following instruction in branches
|
| 50 |
+
# define LIKELY(expr) __builtin_expect(expr, true)
|
| 51 |
+
# define UNLIKELY(expr) __builtin_expect(expr, false)
|
| 52 |
+
#else
|
| 53 |
+
# define LIKELY(expr) (expr)
|
| 54 |
+
# define UNLIKELY(expr) (expr)
|
| 55 |
+
#endif
|
| 56 |
+
|
| 57 |
+
#define GGML_SYCL_DEBUG(...) \
|
| 58 |
+
do { \
|
| 59 |
+
if (UNLIKELY(g_ggml_sycl_debug)) \
|
| 60 |
+
fprintf(stderr, __VA_ARGS__); \
|
| 61 |
+
} while (0)
|
| 62 |
|
| 63 |
#define CHECK_TRY_ERROR(expr) \
|
| 64 |
[&]() { \
|
|
|
|
| 500 |
}
|
| 501 |
|
| 502 |
bool gpu_has_xmx(sycl::device &dev);
|
| 503 |
+
|
| 504 |
+
template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) {
|
| 505 |
+
if (LIKELY(!g_ggml_sycl_debug)) {
|
| 506 |
+
return;
|
| 507 |
+
}
|
| 508 |
+
std::stringstream ss;
|
| 509 |
+
ss << prefix << "=[";
|
| 510 |
+
for (std::size_t i = 0; i < N - 1; ++i) {
|
| 511 |
+
ss << array[i] << ", ";
|
| 512 |
+
}
|
| 513 |
+
if constexpr (N > 0) {
|
| 514 |
+
ss << array[N - 1];
|
| 515 |
+
}
|
| 516 |
+
ss << "]";
|
| 517 |
+
GGML_SYCL_DEBUG("%s", ss.str().c_str());
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor,
|
| 521 |
+
const std::string & suffix = "") {
|
| 522 |
+
if (LIKELY(!g_ggml_sycl_debug)) {
|
| 523 |
+
return;
|
| 524 |
+
}
|
| 525 |
+
GGML_SYCL_DEBUG("%s=", prefix.c_str());
|
| 526 |
+
if (tensor) {
|
| 527 |
+
GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
|
| 528 |
+
debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne);
|
| 529 |
+
debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb);
|
| 530 |
+
if (!ggml_is_contiguous(tensor)) {
|
| 531 |
+
GGML_SYCL_DEBUG(";strided");
|
| 532 |
+
}
|
| 533 |
+
if (ggml_is_permuted(tensor)) {
|
| 534 |
+
GGML_SYCL_DEBUG(";permuted");
|
| 535 |
+
}
|
| 536 |
+
} else {
|
| 537 |
+
GGML_SYCL_DEBUG("nullptr");
|
| 538 |
+
}
|
| 539 |
+
GGML_SYCL_DEBUG("%s", suffix.c_str());
|
| 540 |
+
}
|
| 541 |
+
|
| 542 |
+
// Use scope_op_debug_print to log operations coming from running a model
|
| 543 |
+
struct scope_op_debug_print {
|
| 544 |
+
// Use string_views to avoid the cost of creating a string and concatenating them
|
| 545 |
+
// string_views must be alive for as long as the object is alive
|
| 546 |
+
// scope_op_debug_print are used with string literals in practice which are stored in constant space so always accessible
|
| 547 |
+
scope_op_debug_print(const std::string_view & func, const std::string_view & func_suffix, const ggml_tensor * dst,
|
| 548 |
+
std::size_t num_src, const std::string_view & suffix = "") :
|
| 549 |
+
func(func),
|
| 550 |
+
func_suffix(func_suffix) {
|
| 551 |
+
if (LIKELY(!g_ggml_sycl_debug)) {
|
| 552 |
+
return;
|
| 553 |
+
}
|
| 554 |
+
GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
|
| 555 |
+
debug_print_tensor(" dst", dst);
|
| 556 |
+
if (dst) {
|
| 557 |
+
for (std::size_t i = 0; i < num_src; ++i) {
|
| 558 |
+
debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
|
| 559 |
+
}
|
| 560 |
+
}
|
| 561 |
+
GGML_SYCL_DEBUG("%s\n", suffix.data());
|
| 562 |
+
}
|
| 563 |
+
|
| 564 |
+
scope_op_debug_print(const std::string_view & func, const ggml_tensor * dst, std::size_t num_src,
|
| 565 |
+
const std::string_view & suffix = "") :
|
| 566 |
+
scope_op_debug_print(func, "", dst, num_src, suffix) {}
|
| 567 |
+
|
| 568 |
+
~scope_op_debug_print() { GGML_SYCL_DEBUG("[SYCL][OP] call %s%s done\n", func.data(), func_suffix.data()); }
|
| 569 |
+
|
| 570 |
+
private:
|
| 571 |
+
std::string_view func;
|
| 572 |
+
std::string_view func_suffix;
|
| 573 |
+
};
|
| 574 |
+
|
| 575 |
#endif // GGML_SYCL_COMMON_HPP
|
ggml/src/ggml-sycl/concat.cpp
CHANGED
|
@@ -159,39 +159,37 @@ static void concat_f32_sycl_non_cont(
|
|
| 159 |
}
|
| 160 |
|
| 161 |
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
} else {
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
|
| 188 |
}
|
| 189 |
-
} else
|
| 190 |
-
concat_f32_sycl_non_cont(
|
| 191 |
-
stream, (const char *)src0->data, (const char *)src1->data,
|
| 192 |
-
(char *)dst->data, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
|
| 193 |
-
src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src1->ne[0],
|
| 194 |
-
src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1],
|
| 195 |
-
src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
|
| 196 |
-
dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
|
| 197 |
}
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 162 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 163 |
+
const ggml_tensor * src0 = dst->src[0];
|
| 164 |
+
const ggml_tensor * src1 = dst->src[1];
|
| 165 |
+
queue_ptr stream = ctx.stream();
|
| 166 |
+
|
| 167 |
+
const int32_t dim = ((int32_t *) dst->op_params)[0];
|
| 168 |
+
|
| 169 |
+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
|
| 170 |
+
const float * src0_d = (const float *) src0->data;
|
| 171 |
+
const float * src1_d = (const float *) src1->data;
|
| 172 |
+
|
| 173 |
+
float * dst_d = (float *) dst->data;
|
| 174 |
+
|
| 175 |
+
if (dim != 3) {
|
| 176 |
+
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
|
| 177 |
+
concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
|
| 178 |
+
dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
|
| 179 |
+
dst->ne[1], dst->ne[2], dim, stream);
|
| 180 |
+
}
|
| 181 |
+
} else {
|
| 182 |
+
const size_t size0 = ggml_nbytes(src0);
|
| 183 |
+
const size_t size1 = ggml_nbytes(src1);
|
| 184 |
+
|
| 185 |
+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
|
| 186 |
+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
|
| 187 |
+
}
|
| 188 |
} else {
|
| 189 |
+
concat_f32_sycl_non_cont(stream, (const char *) src0->data, (const char *) src1->data, (char *) dst->data,
|
| 190 |
+
src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1],
|
| 191 |
+
src0->nb[2], src0->nb[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
|
| 192 |
+
src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
|
| 193 |
+
dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
|
|
|
|
| 194 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
}
|
ggml/src/ggml-sycl/conv.cpp
CHANGED
|
@@ -72,6 +72,7 @@ static void conv_transpose_1d_f32_f32_sycl(
|
|
| 72 |
}
|
| 73 |
|
| 74 |
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
|
|
| 75 |
const ggml_tensor *src0 = dst->src[0];
|
| 76 |
const ggml_tensor *src1 = dst->src[1];
|
| 77 |
const float * src0_d = (const float *)src0->data;
|
|
|
|
| 72 |
}
|
| 73 |
|
| 74 |
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 75 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 76 |
const ggml_tensor *src0 = dst->src[0];
|
| 77 |
const ggml_tensor *src1 = dst->src[1];
|
| 78 |
const float * src0_d = (const float *)src0->data;
|
ggml/src/ggml-sycl/cpy.cpp
CHANGED
|
@@ -616,6 +616,9 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co
|
|
| 616 |
}
|
| 617 |
|
| 618 |
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
|
|
|
|
|
|
|
|
|
|
| 619 |
const int64_t ne = ggml_nelements(src0);
|
| 620 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 621 |
|
|
@@ -629,8 +632,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
|
|
| 629 |
|
| 630 |
char * src0_ddc = (char *) src0->data;
|
| 631 |
char * src1_ddc = (char *) src1->data;
|
| 632 |
-
GGML_SYCL_DEBUG("[SYCL] %s: Tensor supplied: %s to %s\n", __func__, ggml_type_name(src0->type),
|
| 633 |
-
ggml_type_name(src1->type));
|
| 634 |
|
| 635 |
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
|
| 636 |
ggml_cpy_f32_f32_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10,
|
|
@@ -694,8 +695,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
|
|
| 694 |
}
|
| 695 |
|
| 696 |
void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 697 |
-
|
| 698 |
-
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
| 699 |
ggml_sycl_cpy(ctx, dst->src[0], dst);
|
| 700 |
-
GGML_SYCL_DEBUG("[SYCL] call %s done\n", __func__);
|
| 701 |
}
|
|
|
|
| 616 |
}
|
| 617 |
|
| 618 |
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
|
| 619 |
+
// Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
|
| 620 |
+
scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0,
|
| 621 |
+
std::string(" src0 type=") + ggml_type_name(src0->type));
|
| 622 |
const int64_t ne = ggml_nelements(src0);
|
| 623 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 624 |
|
|
|
|
| 632 |
|
| 633 |
char * src0_ddc = (char *) src0->data;
|
| 634 |
char * src1_ddc = (char *) src1->data;
|
|
|
|
|
|
|
| 635 |
|
| 636 |
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
|
| 637 |
ggml_cpy_f32_f32_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10,
|
|
|
|
| 695 |
}
|
| 696 |
|
| 697 |
void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 698 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
|
|
|
| 699 |
ggml_sycl_cpy(ctx, dst->src[0], dst);
|
|
|
|
| 700 |
}
|
ggml/src/ggml-sycl/dmmv.cpp
CHANGED
|
@@ -1092,6 +1092,8 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
|
|
| 1092 |
src0->type == GGML_TYPE_Q8_0 || src0->type == GGML_TYPE_F16;
|
| 1093 |
|
| 1094 |
if (src1_convert_f16) {
|
|
|
|
|
|
|
| 1095 |
src1_dfloat = src1_dfloat_a.alloc(ne00);
|
| 1096 |
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
|
| 1097 |
GGML_ASSERT(to_fp16_sycl != nullptr);
|
|
|
|
| 1092 |
src0->type == GGML_TYPE_Q8_0 || src0->type == GGML_TYPE_F16;
|
| 1093 |
|
| 1094 |
if (src1_convert_f16) {
|
| 1095 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
|
| 1096 |
+
" : converting src1 to fp16");
|
| 1097 |
src1_dfloat = src1_dfloat_a.alloc(ne00);
|
| 1098 |
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
|
| 1099 |
GGML_ASSERT(to_fp16_sycl != nullptr);
|
ggml/src/ggml-sycl/element_wise.cpp
CHANGED
|
@@ -1391,146 +1391,121 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
|
|
| 1391 |
|
| 1392 |
|
| 1393 |
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1394 |
-
|
| 1395 |
ggml_sycl_op_sqrt(ctx, dst);
|
| 1396 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1397 |
}
|
| 1398 |
|
| 1399 |
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1400 |
-
|
| 1401 |
ggml_sycl_op_sin(ctx, dst);
|
| 1402 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1403 |
}
|
| 1404 |
|
| 1405 |
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1406 |
-
|
| 1407 |
ggml_sycl_op_cos(ctx, dst);
|
| 1408 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1409 |
}
|
| 1410 |
|
| 1411 |
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1412 |
-
|
| 1413 |
ggml_sycl_op_acc(ctx, dst);
|
| 1414 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1415 |
}
|
| 1416 |
|
| 1417 |
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1418 |
-
|
| 1419 |
ggml_sycl_op_gelu(ctx, dst);
|
| 1420 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1421 |
}
|
| 1422 |
|
| 1423 |
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1424 |
-
|
| 1425 |
ggml_sycl_op_silu(ctx, dst);
|
| 1426 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1427 |
}
|
| 1428 |
|
| 1429 |
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1430 |
-
|
| 1431 |
ggml_sycl_op_gelu_quick(ctx, dst);
|
| 1432 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1433 |
}
|
| 1434 |
|
| 1435 |
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1436 |
-
|
| 1437 |
ggml_sycl_op_tanh(ctx, dst);
|
| 1438 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1439 |
}
|
| 1440 |
|
| 1441 |
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1442 |
-
|
| 1443 |
ggml_sycl_op_relu(ctx, dst);
|
| 1444 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1445 |
}
|
| 1446 |
|
| 1447 |
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1448 |
-
|
| 1449 |
ggml_sycl_op_sigmoid(ctx, dst);
|
| 1450 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1451 |
}
|
| 1452 |
|
| 1453 |
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1454 |
-
|
| 1455 |
ggml_sycl_op_hardsigmoid(ctx, dst);
|
| 1456 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1457 |
}
|
| 1458 |
|
| 1459 |
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1460 |
-
|
| 1461 |
ggml_sycl_op_hardswish(ctx, dst);
|
| 1462 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1463 |
}
|
| 1464 |
|
| 1465 |
-
|
| 1466 |
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1467 |
-
|
| 1468 |
ggml_sycl_op_exp(ctx, dst);
|
| 1469 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1470 |
}
|
| 1471 |
|
| 1472 |
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1473 |
-
|
| 1474 |
ggml_sycl_op_log(ctx, dst);
|
| 1475 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1476 |
}
|
| 1477 |
|
| 1478 |
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1479 |
-
|
| 1480 |
ggml_sycl_op_neg(ctx, dst);
|
| 1481 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1482 |
}
|
| 1483 |
|
| 1484 |
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1485 |
-
|
| 1486 |
ggml_sycl_op_step(ctx, dst);
|
| 1487 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1488 |
}
|
| 1489 |
|
| 1490 |
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1491 |
-
|
| 1492 |
ggml_sycl_op_leaky_relu(ctx, dst);
|
| 1493 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1494 |
}
|
| 1495 |
|
| 1496 |
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1497 |
-
|
| 1498 |
ggml_sycl_op_sqr(ctx, dst);
|
| 1499 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1500 |
}
|
| 1501 |
|
| 1502 |
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1503 |
-
|
| 1504 |
ggml_sycl_op_upscale(ctx, dst);
|
| 1505 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1506 |
}
|
| 1507 |
|
| 1508 |
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1509 |
-
|
| 1510 |
ggml_sycl_op_pad(ctx, dst);
|
| 1511 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1512 |
}
|
| 1513 |
|
| 1514 |
void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1515 |
-
|
| 1516 |
ggml_sycl_op_clamp(ctx, dst);
|
| 1517 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1518 |
}
|
| 1519 |
|
| 1520 |
void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1521 |
-
|
| 1522 |
ggml_sycl_op_sgn(ctx, dst);
|
| 1523 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1524 |
}
|
| 1525 |
|
| 1526 |
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1527 |
-
|
| 1528 |
ggml_sycl_op_abs(ctx, dst);
|
| 1529 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1530 |
}
|
| 1531 |
|
| 1532 |
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1533 |
-
|
| 1534 |
ggml_sycl_op_elu(ctx, dst);
|
| 1535 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 1536 |
}
|
|
|
|
| 1391 |
|
| 1392 |
|
| 1393 |
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1394 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1395 |
ggml_sycl_op_sqrt(ctx, dst);
|
|
|
|
| 1396 |
}
|
| 1397 |
|
| 1398 |
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1399 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1400 |
ggml_sycl_op_sin(ctx, dst);
|
|
|
|
| 1401 |
}
|
| 1402 |
|
| 1403 |
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1404 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1405 |
ggml_sycl_op_cos(ctx, dst);
|
|
|
|
| 1406 |
}
|
| 1407 |
|
| 1408 |
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1409 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 1410 |
ggml_sycl_op_acc(ctx, dst);
|
|
|
|
| 1411 |
}
|
| 1412 |
|
| 1413 |
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1414 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1415 |
ggml_sycl_op_gelu(ctx, dst);
|
|
|
|
| 1416 |
}
|
| 1417 |
|
| 1418 |
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1419 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1420 |
ggml_sycl_op_silu(ctx, dst);
|
|
|
|
| 1421 |
}
|
| 1422 |
|
| 1423 |
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1424 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1425 |
ggml_sycl_op_gelu_quick(ctx, dst);
|
|
|
|
| 1426 |
}
|
| 1427 |
|
| 1428 |
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1429 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1430 |
ggml_sycl_op_tanh(ctx, dst);
|
|
|
|
| 1431 |
}
|
| 1432 |
|
| 1433 |
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1434 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1435 |
ggml_sycl_op_relu(ctx, dst);
|
|
|
|
| 1436 |
}
|
| 1437 |
|
| 1438 |
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1439 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1440 |
ggml_sycl_op_sigmoid(ctx, dst);
|
|
|
|
| 1441 |
}
|
| 1442 |
|
| 1443 |
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1444 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1445 |
ggml_sycl_op_hardsigmoid(ctx, dst);
|
|
|
|
| 1446 |
}
|
| 1447 |
|
| 1448 |
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1449 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1450 |
ggml_sycl_op_hardswish(ctx, dst);
|
|
|
|
| 1451 |
}
|
| 1452 |
|
|
|
|
| 1453 |
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1454 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1455 |
ggml_sycl_op_exp(ctx, dst);
|
|
|
|
| 1456 |
}
|
| 1457 |
|
| 1458 |
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1459 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1460 |
ggml_sycl_op_log(ctx, dst);
|
|
|
|
| 1461 |
}
|
| 1462 |
|
| 1463 |
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1464 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1465 |
ggml_sycl_op_neg(ctx, dst);
|
|
|
|
| 1466 |
}
|
| 1467 |
|
| 1468 |
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1469 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1470 |
ggml_sycl_op_step(ctx, dst);
|
|
|
|
| 1471 |
}
|
| 1472 |
|
| 1473 |
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1474 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1475 |
ggml_sycl_op_leaky_relu(ctx, dst);
|
|
|
|
| 1476 |
}
|
| 1477 |
|
| 1478 |
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1479 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1480 |
ggml_sycl_op_sqr(ctx, dst);
|
|
|
|
| 1481 |
}
|
| 1482 |
|
| 1483 |
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1484 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1485 |
ggml_sycl_op_upscale(ctx, dst);
|
|
|
|
| 1486 |
}
|
| 1487 |
|
| 1488 |
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1489 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1490 |
ggml_sycl_op_pad(ctx, dst);
|
|
|
|
| 1491 |
}
|
| 1492 |
|
| 1493 |
void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1494 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1495 |
ggml_sycl_op_clamp(ctx, dst);
|
|
|
|
| 1496 |
}
|
| 1497 |
|
| 1498 |
void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1499 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1500 |
ggml_sycl_op_sgn(ctx, dst);
|
|
|
|
| 1501 |
}
|
| 1502 |
|
| 1503 |
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1504 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1505 |
ggml_sycl_op_abs(ctx, dst);
|
|
|
|
| 1506 |
}
|
| 1507 |
|
| 1508 |
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 1509 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 1510 |
ggml_sycl_op_elu(ctx, dst);
|
|
|
|
| 1511 |
}
|
ggml/src/ggml-sycl/getrows.cpp
CHANGED
|
@@ -257,8 +257,7 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
|
|
| 257 |
GGML_UNUSED(ctx);
|
| 258 |
}
|
| 259 |
|
| 260 |
-
void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 261 |
-
|
| 262 |
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
|
| 263 |
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 264 |
|
|
@@ -308,4 +307,3 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
| 308 |
GGML_ABORT("fatal error");
|
| 309 |
}
|
| 310 |
}
|
| 311 |
-
|
|
|
|
| 257 |
GGML_UNUSED(ctx);
|
| 258 |
}
|
| 259 |
|
| 260 |
+
void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 261 |
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
|
| 262 |
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 263 |
|
|
|
|
| 307 |
GGML_ABORT("fatal error");
|
| 308 |
}
|
| 309 |
}
|
|
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -346,6 +346,8 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 346 |
static enum ggml_status
|
| 347 |
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 348 |
ggml_tensor *tensor) try {
|
|
|
|
|
|
|
| 349 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 350 |
|
| 351 |
if (tensor->view_src != NULL) {
|
|
@@ -381,7 +383,9 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
| 381 |
ggml_tensor *tensor,
|
| 382 |
const void *data, size_t offset,
|
| 383 |
size_t size) try {
|
| 384 |
-
|
|
|
|
|
|
|
| 385 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 386 |
ggml_sycl_set_device(ctx->device);
|
| 387 |
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
|
|
@@ -407,7 +411,9 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
| 407 |
const ggml_tensor *tensor,
|
| 408 |
void *data, size_t offset,
|
| 409 |
size_t size) try {
|
| 410 |
-
|
|
|
|
|
|
|
| 411 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 412 |
|
| 413 |
ggml_sycl_set_device(ctx->device);
|
|
@@ -435,7 +441,12 @@ static bool
|
|
| 435 |
ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
| 436 |
const ggml_tensor *src,
|
| 437 |
ggml_tensor *dst) try {
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
|
| 440 |
ggml_backend_sycl_buffer_context * dst_ctx = (ggml_backend_sycl_buffer_context *)dst->buffer->context;
|
| 441 |
|
|
@@ -492,7 +503,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
|
| 492 |
|
| 493 |
static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
|
| 494 |
uint8_t value) try {
|
| 495 |
-
|
|
|
|
| 496 |
|
| 497 |
ggml_sycl_set_device(ctx->device);
|
| 498 |
queue_ptr stream = ctx->stream;
|
|
@@ -511,7 +523,9 @@ catch (sycl::exception const &exc) {
|
|
| 511 |
|
| 512 |
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
|
| 513 |
size_t offset, size_t size) {
|
| 514 |
-
GGML_SYCL_DEBUG("
|
|
|
|
|
|
|
| 515 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
| 516 |
SYCL_CHECK(ggml_sycl_set_device(ctx->device));
|
| 517 |
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
|
|
@@ -789,6 +803,8 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff
|
|
| 789 |
static enum ggml_status
|
| 790 |
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 791 |
ggml_tensor *tensor) try {
|
|
|
|
|
|
|
| 792 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
| 793 |
|
| 794 |
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
@@ -873,6 +889,9 @@ static void
|
|
| 873 |
ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
| 874 |
ggml_tensor *tensor, const void *data,
|
| 875 |
size_t offset, size_t size) try {
|
|
|
|
|
|
|
|
|
|
| 876 |
// split tensors must always be set in their entirety at once
|
| 877 |
GGML_ASSERT(offset == 0);
|
| 878 |
GGML_ASSERT(size == ggml_nbytes(tensor));
|
|
@@ -926,6 +945,9 @@ static void
|
|
| 926 |
ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
| 927 |
const ggml_tensor *tensor, void *data,
|
| 928 |
size_t offset, size_t size) try {
|
|
|
|
|
|
|
|
|
|
| 929 |
// split tensors must always be set in their entirety at once
|
| 930 |
GGML_ASSERT(offset == 0);
|
| 931 |
GGML_ASSERT(size == ggml_nbytes(tensor));
|
|
@@ -2015,12 +2037,12 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 2015 |
#else
|
| 2016 |
bool use_fp16 = false;
|
| 2017 |
#endif
|
| 2018 |
-
if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
| 2019 |
-
|
| 2020 |
-
dst->op_params[0] == GGML_PREC_DEFAULT) {
|
| 2021 |
-
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
|
| 2022 |
ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
|
| 2023 |
if (src0->type != GGML_TYPE_F16) {
|
|
|
|
|
|
|
| 2024 |
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type, dst);
|
| 2025 |
GGML_ASSERT(to_fp16_sycl != nullptr);
|
| 2026 |
size_t ne = row_diff*ne00;
|
|
@@ -2033,6 +2055,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 2033 |
|
| 2034 |
ggml_sycl_pool_alloc<sycl::half> src1_as_f16(ctx.pool());
|
| 2035 |
if (src1->type != GGML_TYPE_F16) {
|
|
|
|
|
|
|
| 2036 |
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
|
| 2037 |
GGML_ASSERT(to_fp16_sycl != nullptr);
|
| 2038 |
size_t ne = src1_ncols*ne10;
|
|
@@ -2049,6 +2073,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 2049 |
DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
|
| 2050 |
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
|
| 2051 |
dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
|
|
|
|
|
|
|
| 2052 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
| 2053 |
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
|
| 2054 |
}
|
|
@@ -2064,21 +2090,25 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 2064 |
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
|
| 2065 |
dst_f16.get(), dpct::library_data_t::real_half, ldc,
|
| 2066 |
dpct::library_data_t::real_half)));
|
|
|
|
|
|
|
| 2067 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
| 2068 |
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
|
| 2069 |
}
|
| 2070 |
-
}
|
| 2071 |
-
else {
|
| 2072 |
-
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n");
|
| 2073 |
ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
|
| 2074 |
ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
|
| 2075 |
if (src0->type != GGML_TYPE_F32) {
|
|
|
|
|
|
|
| 2076 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src0->type, dst);
|
| 2077 |
GGML_ASSERT(to_fp32_sycl != nullptr);
|
| 2078 |
src0_ddq_as_f32.alloc(row_diff*ne00);
|
| 2079 |
to_fp32_sycl(src0_dd_i, src0_ddq_as_f32.get(), row_diff*ne00, stream);
|
| 2080 |
}
|
| 2081 |
if (src1->type != GGML_TYPE_F32) {
|
|
|
|
|
|
|
| 2082 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src1->type, dst);
|
| 2083 |
GGML_ASSERT(to_fp32_sycl != nullptr);
|
| 2084 |
src1_ddq_as_f32.alloc(src1_ncols*ne10);
|
|
@@ -2114,8 +2144,7 @@ catch (sycl::exception const &exc) {
|
|
| 2114 |
std::exit(1);
|
| 2115 |
}
|
| 2116 |
|
| 2117 |
-
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 2118 |
-
|
| 2119 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2120 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2121 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
@@ -2167,8 +2196,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
|
|
| 2167 |
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
|
| 2168 |
}
|
| 2169 |
|
| 2170 |
-
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 2171 |
-
|
| 2172 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2173 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2174 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
@@ -2199,8 +2227,7 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor *
|
|
| 2199 |
argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream);
|
| 2200 |
}
|
| 2201 |
|
| 2202 |
-
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 2203 |
-
|
| 2204 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2205 |
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
| 2206 |
|
|
@@ -2215,8 +2242,7 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *ds
|
|
| 2215 |
argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
|
| 2216 |
}
|
| 2217 |
|
| 2218 |
-
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tensor *dst) {
|
| 2219 |
-
|
| 2220 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2221 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2222 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
@@ -2233,8 +2259,7 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tens
|
|
| 2233 |
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
|
| 2234 |
}
|
| 2235 |
|
| 2236 |
-
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
| 2237 |
-
|
| 2238 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2239 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2240 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
@@ -2421,6 +2446,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 2421 |
dev[i].src1_ddq = dev[i].src1_ddq_alloc.alloc(ctx.pool(i), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
|
| 2422 |
|
| 2423 |
if (src1_on_device && src1_is_contiguous) {
|
|
|
|
|
|
|
| 2424 |
quantize_row_q8_1_sycl(dev[i].src1_ddf, dev[i].src1_ddq, ne10, nrows1, src1_padded_col_size, stream);
|
| 2425 |
/*
|
| 2426 |
DPCT1010:90: SYCL uses exceptions to report errors and does not
|
|
@@ -2525,6 +2552,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 2525 |
}
|
| 2526 |
|
| 2527 |
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
|
|
|
|
|
|
|
| 2528 |
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
| 2529 |
/*
|
| 2530 |
DPCT1010:92: SYCL uses exceptions to report errors and does
|
|
@@ -2619,33 +2648,28 @@ catch (sycl::exception const &exc) {
|
|
| 2619 |
|
| 2620 |
|
| 2621 |
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2622 |
-
|
| 2623 |
ggml_sycl_op_get_rows(ctx, dst);
|
| 2624 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 2625 |
}
|
| 2626 |
|
| 2627 |
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2628 |
-
|
| 2629 |
ggml_sycl_op_norm(ctx, dst);
|
| 2630 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 2631 |
}
|
| 2632 |
|
| 2633 |
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2634 |
-
|
| 2635 |
ggml_sycl_op_rms_norm(ctx, dst);
|
| 2636 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 2637 |
}
|
| 2638 |
|
| 2639 |
static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2640 |
-
|
| 2641 |
ggml_sycl_op_l2_norm(ctx, dst);
|
| 2642 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 2643 |
}
|
| 2644 |
|
| 2645 |
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2646 |
-
|
| 2647 |
ggml_sycl_op_group_norm(ctx, dst);
|
| 2648 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 2649 |
}
|
| 2650 |
|
| 2651 |
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -2773,6 +2797,8 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
|
| 2773 |
|
| 2774 |
// convert src1 to fp16
|
| 2775 |
if (src1->type != GGML_TYPE_F16) {
|
|
|
|
|
|
|
| 2776 |
const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type);
|
| 2777 |
GGML_ASSERT(to_fp16_nc_sycl != nullptr);
|
| 2778 |
const int64_t ne_src1 = ggml_nelements(src1);
|
|
@@ -3076,6 +3102,7 @@ static bool can_use_mul_mat_vec_q(const ggml_tensor * src0, const ggml_tensor *
|
|
| 3076 |
}
|
| 3077 |
|
| 3078 |
static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
|
|
|
| 3079 |
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
|
| 3080 |
int64_t min_compute_capability = INT_MAX;
|
| 3081 |
|
|
@@ -3153,7 +3180,6 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
| 3153 |
constexpr bool convert_src1_to_q8_1 = false;
|
| 3154 |
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_mul_mat_sycl, convert_src1_to_q8_1);
|
| 3155 |
}
|
| 3156 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 3157 |
}
|
| 3158 |
|
| 3159 |
|
|
@@ -3224,6 +3250,7 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
|
|
| 3224 |
|
| 3225 |
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
|
| 3226 |
ggml_tensor *dst) try {
|
|
|
|
| 3227 |
const ggml_tensor *src0 = dst->src[0];
|
| 3228 |
const ggml_tensor *src1 = dst->src[1];
|
| 3229 |
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
|
|
@@ -3392,37 +3419,45 @@ catch (sycl::exception const &exc) {
|
|
| 3392 |
}
|
| 3393 |
|
| 3394 |
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3395 |
ggml_sycl_op_scale(ctx, dst);
|
| 3396 |
}
|
| 3397 |
|
| 3398 |
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3399 |
ggml_sycl_op_diag_mask_inf(ctx, dst);
|
| 3400 |
}
|
| 3401 |
|
| 3402 |
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3403 |
ggml_sycl_op_pool2d(ctx, dst);
|
| 3404 |
}
|
| 3405 |
|
| 3406 |
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3407 |
ggml_sycl_op_im2col(ctx, dst);
|
| 3408 |
}
|
| 3409 |
|
| 3410 |
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3411 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3412 |
ggml_sycl_op_sum(ctx, dst);
|
| 3413 |
}
|
| 3414 |
|
| 3415 |
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3416 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3417 |
ggml_sycl_op_sum_rows(ctx, dst);
|
| 3418 |
}
|
| 3419 |
|
| 3420 |
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3421 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3422 |
ggml_sycl_op_argsort(ctx, dst);
|
| 3423 |
}
|
| 3424 |
|
| 3425 |
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 3426 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3427 |
ggml_sycl_op_argmax(ctx, dst);
|
| 3428 |
}
|
|
@@ -3716,6 +3751,9 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
|
|
| 3716 |
ggml_tensor *tensor,
|
| 3717 |
const void *data, size_t offset,
|
| 3718 |
size_t size) try {
|
|
|
|
|
|
|
|
|
|
| 3719 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3720 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 3721 |
|
|
@@ -3734,6 +3772,9 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
|
|
| 3734 |
const ggml_tensor *tensor,
|
| 3735 |
void *data, size_t offset,
|
| 3736 |
size_t size) try {
|
|
|
|
|
|
|
|
|
|
| 3737 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3738 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 3739 |
|
|
@@ -3752,7 +3793,13 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
|
|
| 3752 |
const ggml_tensor *src,
|
| 3753 |
ggml_tensor *dst) try {
|
| 3754 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3755 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3756 |
/*
|
| 3757 |
DPCT1009:215: SYCL uses exceptions to report errors and does not use the
|
| 3758 |
error codes. The original code was commented out and a warning string
|
|
@@ -3773,6 +3820,7 @@ catch (sycl::exception const &exc) {
|
|
| 3773 |
}
|
| 3774 |
|
| 3775 |
static void ggml_backend_sycl_synchronize(ggml_backend_t backend) try {
|
|
|
|
| 3776 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3777 |
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
|
| 3778 |
SYCL_CHECK(CHECK_TRY_ERROR((stream)->wait()));
|
|
@@ -3906,7 +3954,7 @@ catch (sycl::exception const &exc)
|
|
| 3906 |
}
|
| 3907 |
|
| 3908 |
static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
|
| 3909 |
-
|
| 3910 |
sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
|
| 3911 |
|
| 3912 |
if (ggml_backend_is_sycl(backend)) {
|
|
@@ -4301,6 +4349,7 @@ static void ggml_backend_sycl_device_event_free(ggml_backend_dev_t dev, ggml_bac
|
|
| 4301 |
|
| 4302 |
static void ggml_backend_sycl_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) try {
|
| 4303 |
GGML_UNUSED(dev);
|
|
|
|
| 4304 |
|
| 4305 |
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
|
| 4306 |
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
|
|
|
|
| 346 |
static enum ggml_status
|
| 347 |
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 348 |
ggml_tensor *tensor) try {
|
| 349 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 350 |
+
debug_print_tensor(": tensor=", tensor, "\n");
|
| 351 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 352 |
|
| 353 |
if (tensor->view_src != NULL) {
|
|
|
|
| 383 |
ggml_tensor *tensor,
|
| 384 |
const void *data, size_t offset,
|
| 385 |
size_t size) try {
|
| 386 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 387 |
+
debug_print_tensor(": tensor=", tensor);
|
| 388 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 389 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 390 |
ggml_sycl_set_device(ctx->device);
|
| 391 |
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
|
|
|
|
| 411 |
const ggml_tensor *tensor,
|
| 412 |
void *data, size_t offset,
|
| 413 |
size_t size) try {
|
| 414 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 415 |
+
debug_print_tensor(": tensor=", tensor);
|
| 416 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 417 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 418 |
|
| 419 |
ggml_sycl_set_device(ctx->device);
|
|
|
|
| 441 |
ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
| 442 |
const ggml_tensor *src,
|
| 443 |
ggml_tensor *dst) try {
|
| 444 |
+
bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
|
| 445 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 446 |
+
debug_print_tensor(": dst=", dst);
|
| 447 |
+
debug_print_tensor(" src=", src);
|
| 448 |
+
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
| 449 |
+
if (is_cpy_supported) {
|
| 450 |
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
|
| 451 |
ggml_backend_sycl_buffer_context * dst_ctx = (ggml_backend_sycl_buffer_context *)dst->buffer->context;
|
| 452 |
|
|
|
|
| 503 |
|
| 504 |
static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
|
| 505 |
uint8_t value) try {
|
| 506 |
+
GGML_SYCL_DEBUG("[SYCL] call %s: size=%zu\n", __func__, buffer->size);
|
| 507 |
+
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
| 508 |
|
| 509 |
ggml_sycl_set_device(ctx->device);
|
| 510 |
queue_ptr stream = ctx->stream;
|
|
|
|
| 523 |
|
| 524 |
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
|
| 525 |
size_t offset, size_t size) {
|
| 526 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 527 |
+
debug_print_tensor(": tensor=", tensor);
|
| 528 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
|
| 529 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
| 530 |
SYCL_CHECK(ggml_sycl_set_device(ctx->device));
|
| 531 |
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
|
|
|
|
| 803 |
static enum ggml_status
|
| 804 |
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 805 |
ggml_tensor *tensor) try {
|
| 806 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 807 |
+
debug_print_tensor(": tensor=", tensor, "\n");
|
| 808 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
| 809 |
|
| 810 |
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
|
|
| 889 |
ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
| 890 |
ggml_tensor *tensor, const void *data,
|
| 891 |
size_t offset, size_t size) try {
|
| 892 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 893 |
+
debug_print_tensor(": tensor=", tensor);
|
| 894 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 895 |
// split tensors must always be set in their entirety at once
|
| 896 |
GGML_ASSERT(offset == 0);
|
| 897 |
GGML_ASSERT(size == ggml_nbytes(tensor));
|
|
|
|
| 945 |
ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
| 946 |
const ggml_tensor *tensor, void *data,
|
| 947 |
size_t offset, size_t size) try {
|
| 948 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 949 |
+
debug_print_tensor(": tensor=", tensor);
|
| 950 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 951 |
// split tensors must always be set in their entirety at once
|
| 952 |
GGML_ASSERT(offset == 0);
|
| 953 |
GGML_ASSERT(size == ggml_nbytes(tensor));
|
|
|
|
| 2037 |
#else
|
| 2038 |
bool use_fp16 = false;
|
| 2039 |
#endif
|
| 2040 |
+
if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && use_fp16 && ggml_is_contiguous(src0) &&
|
| 2041 |
+
row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
|
|
|
|
|
|
|
| 2042 |
ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
|
| 2043 |
if (src0->type != GGML_TYPE_F16) {
|
| 2044 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
|
| 2045 |
+
" : converting src0 to fp16");
|
| 2046 |
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type, dst);
|
| 2047 |
GGML_ASSERT(to_fp16_sycl != nullptr);
|
| 2048 |
size_t ne = row_diff*ne00;
|
|
|
|
| 2055 |
|
| 2056 |
ggml_sycl_pool_alloc<sycl::half> src1_as_f16(ctx.pool());
|
| 2057 |
if (src1->type != GGML_TYPE_F16) {
|
| 2058 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
|
| 2059 |
+
" : converting src1 to fp16");
|
| 2060 |
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
|
| 2061 |
GGML_ASSERT(to_fp16_sycl != nullptr);
|
| 2062 |
size_t ne = src1_ncols*ne10;
|
|
|
|
| 2073 |
DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
|
| 2074 |
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
|
| 2075 |
dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
|
| 2076 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
| 2077 |
+
" : converting dst to fp32");
|
| 2078 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
| 2079 |
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
|
| 2080 |
}
|
|
|
|
| 2090 |
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
|
| 2091 |
dst_f16.get(), dpct::library_data_t::real_half, ldc,
|
| 2092 |
dpct::library_data_t::real_half)));
|
| 2093 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
| 2094 |
+
" : converting dst to fp32");
|
| 2095 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
| 2096 |
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
|
| 2097 |
}
|
| 2098 |
+
} else {
|
|
|
|
|
|
|
| 2099 |
ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
|
| 2100 |
ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
|
| 2101 |
if (src0->type != GGML_TYPE_F32) {
|
| 2102 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
| 2103 |
+
" : converting src0 to fp32");
|
| 2104 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src0->type, dst);
|
| 2105 |
GGML_ASSERT(to_fp32_sycl != nullptr);
|
| 2106 |
src0_ddq_as_f32.alloc(row_diff*ne00);
|
| 2107 |
to_fp32_sycl(src0_dd_i, src0_ddq_as_f32.get(), row_diff*ne00, stream);
|
| 2108 |
}
|
| 2109 |
if (src1->type != GGML_TYPE_F32) {
|
| 2110 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
| 2111 |
+
" : converting src1 to fp32");
|
| 2112 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src1->type, dst);
|
| 2113 |
GGML_ASSERT(to_fp32_sycl != nullptr);
|
| 2114 |
src1_ddq_as_f32.alloc(src1_ncols*ne10);
|
|
|
|
| 2144 |
std::exit(1);
|
| 2145 |
}
|
| 2146 |
|
| 2147 |
+
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 2148 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2149 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2150 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
|
|
| 2196 |
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
|
| 2197 |
}
|
| 2198 |
|
| 2199 |
+
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 2200 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2201 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2202 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
|
|
| 2227 |
argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream);
|
| 2228 |
}
|
| 2229 |
|
| 2230 |
+
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 2231 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2232 |
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
| 2233 |
|
|
|
|
| 2242 |
argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
|
| 2243 |
}
|
| 2244 |
|
| 2245 |
+
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 2246 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2247 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2248 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
|
|
| 2259 |
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
|
| 2260 |
}
|
| 2261 |
|
| 2262 |
+
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 2263 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 2264 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 2265 |
dpct::queue_ptr main_stream = ctx.stream();
|
|
|
|
| 2446 |
dev[i].src1_ddq = dev[i].src1_ddq_alloc.alloc(ctx.pool(i), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
|
| 2447 |
|
| 2448 |
if (src1_on_device && src1_is_contiguous) {
|
| 2449 |
+
scope_op_debug_print scope_dbg_print(__func__, "/quantize_row_q8_1_sycl", dst,
|
| 2450 |
+
/*num_src=*/2, " : converting src1 to Q8_1");
|
| 2451 |
quantize_row_q8_1_sycl(dev[i].src1_ddf, dev[i].src1_ddq, ne10, nrows1, src1_padded_col_size, stream);
|
| 2452 |
/*
|
| 2453 |
DPCT1010:90: SYCL uses exceptions to report errors and does not
|
|
|
|
| 2552 |
}
|
| 2553 |
|
| 2554 |
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
|
| 2555 |
+
scope_op_debug_print scope_dbg_print(__func__, "/quantize_row_q8_1_sycl", dst,
|
| 2556 |
+
/*num_src=*/2, " : converting src1 to Q8_1");
|
| 2557 |
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
| 2558 |
/*
|
| 2559 |
DPCT1010:92: SYCL uses exceptions to report errors and does
|
|
|
|
| 2648 |
|
| 2649 |
|
| 2650 |
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2651 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 2652 |
ggml_sycl_op_get_rows(ctx, dst);
|
|
|
|
| 2653 |
}
|
| 2654 |
|
| 2655 |
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2656 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 2657 |
ggml_sycl_op_norm(ctx, dst);
|
|
|
|
| 2658 |
}
|
| 2659 |
|
| 2660 |
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2661 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 2662 |
ggml_sycl_op_rms_norm(ctx, dst);
|
|
|
|
| 2663 |
}
|
| 2664 |
|
| 2665 |
static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2666 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 2667 |
ggml_sycl_op_l2_norm(ctx, dst);
|
|
|
|
| 2668 |
}
|
| 2669 |
|
| 2670 |
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 2671 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 2672 |
ggml_sycl_op_group_norm(ctx, dst);
|
|
|
|
| 2673 |
}
|
| 2674 |
|
| 2675 |
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
|
|
| 2797 |
|
| 2798 |
// convert src1 to fp16
|
| 2799 |
if (src1->type != GGML_TYPE_F16) {
|
| 2800 |
+
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_nc_sycl", dst, /*num_src=*/2,
|
| 2801 |
+
" : converting src1 to fp16");
|
| 2802 |
const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type);
|
| 2803 |
GGML_ASSERT(to_fp16_nc_sycl != nullptr);
|
| 2804 |
const int64_t ne_src1 = ggml_nelements(src1);
|
|
|
|
| 3102 |
}
|
| 3103 |
|
| 3104 |
static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 3105 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 3106 |
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
|
| 3107 |
int64_t min_compute_capability = INT_MAX;
|
| 3108 |
|
|
|
|
| 3180 |
constexpr bool convert_src1_to_q8_1 = false;
|
| 3181 |
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_mul_mat_sycl, convert_src1_to_q8_1);
|
| 3182 |
}
|
|
|
|
| 3183 |
}
|
| 3184 |
|
| 3185 |
|
|
|
|
| 3250 |
|
| 3251 |
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
|
| 3252 |
ggml_tensor *dst) try {
|
| 3253 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/3);
|
| 3254 |
const ggml_tensor *src0 = dst->src[0];
|
| 3255 |
const ggml_tensor *src1 = dst->src[1];
|
| 3256 |
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
|
|
|
|
| 3419 |
}
|
| 3420 |
|
| 3421 |
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3422 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3423 |
ggml_sycl_op_scale(ctx, dst);
|
| 3424 |
}
|
| 3425 |
|
| 3426 |
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3427 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3428 |
ggml_sycl_op_diag_mask_inf(ctx, dst);
|
| 3429 |
}
|
| 3430 |
|
| 3431 |
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3432 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3433 |
ggml_sycl_op_pool2d(ctx, dst);
|
| 3434 |
}
|
| 3435 |
|
| 3436 |
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3437 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 3438 |
ggml_sycl_op_im2col(ctx, dst);
|
| 3439 |
}
|
| 3440 |
|
| 3441 |
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3442 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3443 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3444 |
ggml_sycl_op_sum(ctx, dst);
|
| 3445 |
}
|
| 3446 |
|
| 3447 |
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3448 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3449 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3450 |
ggml_sycl_op_sum_rows(ctx, dst);
|
| 3451 |
}
|
| 3452 |
|
| 3453 |
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3454 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3455 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3456 |
ggml_sycl_op_argsort(ctx, dst);
|
| 3457 |
}
|
| 3458 |
|
| 3459 |
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 3460 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 3461 |
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
| 3462 |
ggml_sycl_op_argmax(ctx, dst);
|
| 3463 |
}
|
|
|
|
| 3751 |
ggml_tensor *tensor,
|
| 3752 |
const void *data, size_t offset,
|
| 3753 |
size_t size) try {
|
| 3754 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3755 |
+
debug_print_tensor(": tensor=", tensor);
|
| 3756 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 3757 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3758 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 3759 |
|
|
|
|
| 3772 |
const ggml_tensor *tensor,
|
| 3773 |
void *data, size_t offset,
|
| 3774 |
size_t size) try {
|
| 3775 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3776 |
+
debug_print_tensor(": tensor=", tensor);
|
| 3777 |
+
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 3778 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3779 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 3780 |
|
|
|
|
| 3793 |
const ggml_tensor *src,
|
| 3794 |
ggml_tensor *dst) try {
|
| 3795 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3796 |
+
bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
|
| 3797 |
+
ggml_backend_buffer_is_sycl(src->buffer);
|
| 3798 |
+
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3799 |
+
debug_print_tensor(": dst=", dst);
|
| 3800 |
+
debug_print_tensor(" src=", src);
|
| 3801 |
+
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
| 3802 |
+
if (is_cpy_supported) {
|
| 3803 |
/*
|
| 3804 |
DPCT1009:215: SYCL uses exceptions to report errors and does not use the
|
| 3805 |
error codes. The original code was commented out and a warning string
|
|
|
|
| 3820 |
}
|
| 3821 |
|
| 3822 |
static void ggml_backend_sycl_synchronize(ggml_backend_t backend) try {
|
| 3823 |
+
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
| 3824 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3825 |
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
|
| 3826 |
SYCL_CHECK(CHECK_TRY_ERROR((stream)->wait()));
|
|
|
|
| 3954 |
}
|
| 3955 |
|
| 3956 |
static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
|
| 3957 |
+
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
| 3958 |
sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
|
| 3959 |
|
| 3960 |
if (ggml_backend_is_sycl(backend)) {
|
|
|
|
| 4349 |
|
| 4350 |
static void ggml_backend_sycl_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) try {
|
| 4351 |
GGML_UNUSED(dev);
|
| 4352 |
+
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
| 4353 |
|
| 4354 |
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
|
| 4355 |
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
|
ggml/src/ggml-sycl/gla.cpp
CHANGED
|
@@ -76,6 +76,7 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B,
|
|
| 76 |
}
|
| 77 |
|
| 78 |
void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 79 |
const float * k_d = static_cast<const float *>(dst->src[0]->data);
|
| 80 |
const float * v_d = static_cast<const float *>(dst->src[1]->data);
|
| 81 |
const float * r_d = static_cast<const float *>(dst->src[2]->data);
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 79 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/5);
|
| 80 |
const float * k_d = static_cast<const float *>(dst->src[0]->data);
|
| 81 |
const float * v_d = static_cast<const float *>(dst->src[1]->data);
|
| 82 |
const float * r_d = static_cast<const float *>(dst->src[2]->data);
|
ggml/src/ggml-sycl/mmvq.cpp
CHANGED
|
@@ -1059,8 +1059,10 @@ void ggml_sycl_op_mul_mat_vec_q(ggml_backend_sycl_context & ctx, const ggml_tens
|
|
| 1059 |
case GGML_TYPE_Q4_K:
|
| 1060 |
if ((ggml_tensor_extra_gpu *) dst->src[0]->extra &&
|
| 1061 |
((ggml_tensor_extra_gpu *) dst->src[0]->extra)->optimized_feature.reorder) {
|
|
|
|
| 1062 |
reorder_mul_mat_vec_q4_k_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
| 1063 |
} else {
|
|
|
|
| 1064 |
mul_mat_vec_q4_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
| 1065 |
}
|
| 1066 |
break;
|
|
|
|
| 1059 |
case GGML_TYPE_Q4_K:
|
| 1060 |
if ((ggml_tensor_extra_gpu *) dst->src[0]->extra &&
|
| 1061 |
((ggml_tensor_extra_gpu *) dst->src[0]->extra)->optimized_feature.reorder) {
|
| 1062 |
+
GGML_SYCL_DEBUG("Calling reorder_mul_mat_vec_q4_k_q8_1_sycl\n");
|
| 1063 |
reorder_mul_mat_vec_q4_k_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
| 1064 |
} else {
|
| 1065 |
+
GGML_SYCL_DEBUG("Calling mul_mat_vec_q4_K_q8_1_sycl\n");
|
| 1066 |
mul_mat_vec_q4_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
| 1067 |
}
|
| 1068 |
break;
|
ggml/src/ggml-sycl/outprod.cpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
#include "outprod.hpp"
|
| 2 |
|
| 3 |
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
|
|
| 4 |
const ggml_tensor *src0 = dst->src[0];
|
| 5 |
const ggml_tensor *src1 = dst->src[1];
|
| 6 |
|
|
|
|
| 1 |
#include "outprod.hpp"
|
| 2 |
|
| 3 |
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
| 4 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 5 |
const ggml_tensor *src0 = dst->src[0];
|
| 6 |
const ggml_tensor *src1 = dst->src[1];
|
| 7 |
|
ggml/src/ggml-sycl/rope.cpp
CHANGED
|
@@ -355,8 +355,7 @@ inline void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
|
|
| 355 |
}
|
| 356 |
|
| 357 |
void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 358 |
-
|
| 359 |
ggml_sycl_op_rope(ctx, dst);
|
| 360 |
-
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
| 361 |
}
|
| 362 |
|
|
|
|
| 355 |
}
|
| 356 |
|
| 357 |
void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 358 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/3);
|
| 359 |
ggml_sycl_op_rope(ctx, dst);
|
|
|
|
| 360 |
}
|
| 361 |
|
ggml/src/ggml-sycl/softmax.cpp
CHANGED
|
@@ -225,7 +225,7 @@ static void soft_max_f32_sycl(const float * x, const T * mask,
|
|
| 225 |
}
|
| 226 |
|
| 227 |
void ggml_sycl_op_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 228 |
-
|
| 229 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 230 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 231 |
|
|
@@ -249,16 +249,13 @@ void ggml_sycl_op_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
| 249 |
|
| 250 |
if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F16) {
|
| 251 |
const sycl::half * src1_dd = static_cast<sycl::half *>(dst->src[1]->data);
|
| 252 |
-
GGML_SYCL_DEBUG("%s: F16 mask\n", __func__);
|
| 253 |
soft_max_f32_sycl<sycl::half>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias,
|
| 254 |
main_stream, ctx.device);
|
| 255 |
} else if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F32) {
|
| 256 |
const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
|
| 257 |
-
GGML_SYCL_DEBUG("%s: F32 mask\n", __func__);
|
| 258 |
soft_max_f32_sycl<float>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
|
| 259 |
} else {
|
| 260 |
/* mask unavailable */
|
| 261 |
-
GGML_SYCL_DEBUG("%s: No mask\n", __func__);
|
| 262 |
soft_max_f32_sycl<float>(src0_dd, nullptr, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
|
| 263 |
}
|
| 264 |
}
|
|
|
|
| 225 |
}
|
| 226 |
|
| 227 |
void ggml_sycl_op_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 228 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
| 229 |
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
| 230 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 231 |
|
|
|
|
| 249 |
|
| 250 |
if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F16) {
|
| 251 |
const sycl::half * src1_dd = static_cast<sycl::half *>(dst->src[1]->data);
|
|
|
|
| 252 |
soft_max_f32_sycl<sycl::half>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias,
|
| 253 |
main_stream, ctx.device);
|
| 254 |
} else if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F32) {
|
| 255 |
const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
|
|
|
|
| 256 |
soft_max_f32_sycl<float>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
|
| 257 |
} else {
|
| 258 |
/* mask unavailable */
|
|
|
|
| 259 |
soft_max_f32_sycl<float>(src0_dd, nullptr, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
|
| 260 |
}
|
| 261 |
}
|
ggml/src/ggml-sycl/tsembd.cpp
CHANGED
|
@@ -56,8 +56,8 @@ static void timestep_embedding_f32_sycl(
|
|
| 56 |
}
|
| 57 |
|
| 58 |
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 59 |
-
|
| 60 |
-
const ggml_tensor *
|
| 61 |
const float * src0_d = (const float *)src0->data;
|
| 62 |
float * dst_d = (float *)dst->data;
|
| 63 |
dpct::queue_ptr stream = ctx.stream();
|
|
@@ -69,5 +69,4 @@ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tenso
|
|
| 69 |
const int max_period = dst->op_params[1];
|
| 70 |
|
| 71 |
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
|
| 72 |
-
GGML_UNUSED(src1);
|
| 73 |
}
|
|
|
|
| 56 |
}
|
| 57 |
|
| 58 |
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
| 59 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
| 60 |
+
const ggml_tensor * src0 = dst->src[0];
|
| 61 |
const float * src0_d = (const float *)src0->data;
|
| 62 |
float * dst_d = (float *)dst->data;
|
| 63 |
dpct::queue_ptr stream = ctx.stream();
|
|
|
|
| 69 |
const int max_period = dst->op_params[1];
|
| 70 |
|
| 71 |
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
|
|
|
|
| 72 |
}
|
ggml/src/ggml-sycl/wkv.cpp
CHANGED
|
@@ -180,10 +180,7 @@ static void rwkv_wkv7_f32_kernel(
|
|
| 180 |
}
|
| 181 |
|
| 182 |
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
| 183 |
-
|
| 184 |
-
const ggml_tensor *src0 = dst->src[0];
|
| 185 |
-
const ggml_tensor *src1 = dst->src[1];
|
| 186 |
-
|
| 187 |
const float* k_d = (const float*)dst->src[0]->data;
|
| 188 |
const float* v_d = (const float*)dst->src[1]->data;
|
| 189 |
const float* r_d = (const float*)dst->src[2]->data;
|
|
@@ -236,16 +233,10 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
| 236 |
});
|
| 237 |
});
|
| 238 |
}
|
| 239 |
-
|
| 240 |
-
GGML_UNUSED(src0);
|
| 241 |
-
GGML_UNUSED(src1);
|
| 242 |
}
|
| 243 |
|
| 244 |
void ggml_sycl_op_rwkv_wkv7(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
| 245 |
-
|
| 246 |
-
const ggml_tensor *src0 = dst->src[0];
|
| 247 |
-
const ggml_tensor *src1 = dst->src[1];
|
| 248 |
-
|
| 249 |
const float* r_d = (const float*)dst->src[0]->data;
|
| 250 |
const float* w_d = (const float*)dst->src[1]->data;
|
| 251 |
const float* k_d = (const float*)dst->src[2]->data;
|
|
@@ -299,7 +290,4 @@ void ggml_sycl_op_rwkv_wkv7(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
| 299 |
});
|
| 300 |
});
|
| 301 |
}
|
| 302 |
-
|
| 303 |
-
GGML_UNUSED(src0);
|
| 304 |
-
GGML_UNUSED(src1);
|
| 305 |
}
|
|
|
|
| 180 |
}
|
| 181 |
|
| 182 |
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
| 183 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/6);
|
|
|
|
|
|
|
|
|
|
| 184 |
const float* k_d = (const float*)dst->src[0]->data;
|
| 185 |
const float* v_d = (const float*)dst->src[1]->data;
|
| 186 |
const float* r_d = (const float*)dst->src[2]->data;
|
|
|
|
| 233 |
});
|
| 234 |
});
|
| 235 |
}
|
|
|
|
|
|
|
|
|
|
| 236 |
}
|
| 237 |
|
| 238 |
void ggml_sycl_op_rwkv_wkv7(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
| 239 |
+
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/7);
|
|
|
|
|
|
|
|
|
|
| 240 |
const float* r_d = (const float*)dst->src[0]->data;
|
| 241 |
const float* w_d = (const float*)dst->src[1]->data;
|
| 242 |
const float* k_d = (const float*)dst->src[2]->data;
|
|
|
|
| 290 |
});
|
| 291 |
});
|
| 292 |
}
|
|
|
|
|
|
|
|
|
|
| 293 |
}
|