Spaces:
Sleeping
Sleeping
Commit
·
c24f7b1
1
Parent(s):
b480790
CUDA: fix 1D im2col, add tests (ggml/993)
Browse files- ggml/src/ggml-cuda.cu +0 -1
- ggml/src/ggml-cuda/im2col.cu +3 -3
ggml/src/ggml-cuda.cu
CHANGED
|
@@ -3141,7 +3141,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|
| 3141 |
case GGML_OP_ROPE:
|
| 3142 |
return ggml_is_contiguous(op->src[0]);
|
| 3143 |
case GGML_OP_IM2COL:
|
| 3144 |
-
return op->src[0]->type == GGML_TYPE_F16;
|
| 3145 |
case GGML_OP_POOL_2D:
|
| 3146 |
case GGML_OP_SUM:
|
| 3147 |
case GGML_OP_SUM_ROWS:
|
|
|
|
| 3141 |
case GGML_OP_ROPE:
|
| 3142 |
return ggml_is_contiguous(op->src[0]);
|
| 3143 |
case GGML_OP_IM2COL:
|
|
|
|
| 3144 |
case GGML_OP_POOL_2D:
|
| 3145 |
case GGML_OP_SUM:
|
| 3146 |
case GGML_OP_SUM_ROWS:
|
ggml/src/ggml-cuda/im2col.cu
CHANGED
|
@@ -91,9 +91,9 @@ void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 91 |
const int64_t OH = is_2D ? dst->ne[2] : 1;
|
| 92 |
const int64_t OW = dst->ne[1];
|
| 93 |
|
| 94 |
-
const size_t
|
| 95 |
-
const int64_t batch
|
| 96 |
-
const size_t
|
| 97 |
|
| 98 |
if(dst->type == GGML_TYPE_F16) {
|
| 99 |
im2col_cuda_f16(src1_d, (half *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream);
|
|
|
|
| 91 |
const int64_t OH = is_2D ? dst->ne[2] : 1;
|
| 92 |
const int64_t OW = dst->ne[1];
|
| 93 |
|
| 94 |
+
const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
|
| 95 |
+
const int64_t batch = src1->ne[is_2D ? 3 : 2];
|
| 96 |
+
const size_t batch_offset = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32
|
| 97 |
|
| 98 |
if(dst->type == GGML_TYPE_F16) {
|
| 99 |
im2col_cuda_f16(src1_d, (half *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream);
|