Spaces:
Sleeping
Sleeping
Add `ggml_roll` (ggml/1274)
Browse files* ggml : add ggml_roll
* use set/get_op_params & std::min
- ggml/include/ggml.h +12 -0
- ggml/src/ggml-cpu/ggml-cpu.c +5 -0
- ggml/src/ggml-cpu/ops.cpp +67 -0
- ggml/src/ggml-cpu/ops.h +1 -0
- ggml/src/ggml.c +32 -2
ggml/include/ggml.h
CHANGED
|
@@ -489,6 +489,7 @@ extern "C" {
|
|
| 489 |
GGML_OP_UPSCALE, // nearest interpolate
|
| 490 |
GGML_OP_PAD,
|
| 491 |
GGML_OP_PAD_REFLECT_1D,
|
|
|
|
| 492 |
GGML_OP_ARANGE,
|
| 493 |
GGML_OP_TIMESTEP_EMBEDDING,
|
| 494 |
GGML_OP_ARGSORT,
|
|
@@ -1801,6 +1802,17 @@ extern "C" {
|
|
| 1801 |
int p0,
|
| 1802 |
int p1);
|
| 1803 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1804 |
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
| 1805 |
// timesteps: [N,]
|
| 1806 |
// return: [N, dim]
|
|
|
|
| 489 |
GGML_OP_UPSCALE, // nearest interpolate
|
| 490 |
GGML_OP_PAD,
|
| 491 |
GGML_OP_PAD_REFLECT_1D,
|
| 492 |
+
GGML_OP_ROLL,
|
| 493 |
GGML_OP_ARANGE,
|
| 494 |
GGML_OP_TIMESTEP_EMBEDDING,
|
| 495 |
GGML_OP_ARGSORT,
|
|
|
|
| 1802 |
int p0,
|
| 1803 |
int p1);
|
| 1804 |
|
| 1805 |
+
// Move tensor elements by an offset given for each dimension. Elements that
|
| 1806 |
+
// are shifted beyond the last position are wrapped around to the beginning.
|
| 1807 |
+
GGML_API struct ggml_tensor * ggml_roll(
|
| 1808 |
+
struct ggml_context * ctx,
|
| 1809 |
+
struct ggml_tensor * a,
|
| 1810 |
+
int shift0,
|
| 1811 |
+
int shift1,
|
| 1812 |
+
int shift2,
|
| 1813 |
+
int shift3);
|
| 1814 |
+
|
| 1815 |
+
|
| 1816 |
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
| 1817 |
// timesteps: [N,]
|
| 1818 |
// return: [N, dim]
|
ggml/src/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -1967,6 +1967,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 1967 |
{
|
| 1968 |
ggml_compute_forward_pad_reflect_1d(params, tensor);
|
| 1969 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1970 |
case GGML_OP_ARANGE:
|
| 1971 |
{
|
| 1972 |
ggml_compute_forward_arange(params, tensor);
|
|
@@ -2291,6 +2295,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 2291 |
case GGML_OP_UPSCALE:
|
| 2292 |
case GGML_OP_PAD:
|
| 2293 |
case GGML_OP_PAD_REFLECT_1D:
|
|
|
|
| 2294 |
case GGML_OP_ARANGE:
|
| 2295 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
| 2296 |
case GGML_OP_ARGSORT:
|
|
|
|
| 1967 |
{
|
| 1968 |
ggml_compute_forward_pad_reflect_1d(params, tensor);
|
| 1969 |
} break;
|
| 1970 |
+
case GGML_OP_ROLL:
|
| 1971 |
+
{
|
| 1972 |
+
ggml_compute_forward_roll(params, tensor);
|
| 1973 |
+
} break;
|
| 1974 |
case GGML_OP_ARANGE:
|
| 1975 |
{
|
| 1976 |
ggml_compute_forward_arange(params, tensor);
|
|
|
|
| 2295 |
case GGML_OP_UPSCALE:
|
| 2296 |
case GGML_OP_PAD:
|
| 2297 |
case GGML_OP_PAD_REFLECT_1D:
|
| 2298 |
+
case GGML_OP_ROLL:
|
| 2299 |
case GGML_OP_ARANGE:
|
| 2300 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
| 2301 |
case GGML_OP_ARGSORT:
|
ggml/src/ggml-cpu/ops.cpp
CHANGED
|
@@ -6793,6 +6793,73 @@ void ggml_compute_forward_pad_reflect_1d(
|
|
| 6793 |
}
|
| 6794 |
}
|
| 6795 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6796 |
// ggml_compute_forward_arange
|
| 6797 |
|
| 6798 |
static void ggml_compute_forward_arange_f32(
|
|
|
|
| 6793 |
}
|
| 6794 |
}
|
| 6795 |
|
| 6796 |
+
// ggml_compute_forward_roll
|
| 6797 |
+
|
| 6798 |
+
static int64_t ggml_wrap_index(int64_t i, int64_t ne) {
|
| 6799 |
+
if (i < 0) {
|
| 6800 |
+
return i + ne;
|
| 6801 |
+
} else if (i >= ne) {
|
| 6802 |
+
return i - ne;
|
| 6803 |
+
}
|
| 6804 |
+
return i;
|
| 6805 |
+
}
|
| 6806 |
+
|
| 6807 |
+
static void ggml_compute_forward_roll_f32(
|
| 6808 |
+
const ggml_compute_params * params,
|
| 6809 |
+
ggml_tensor * dst) {
|
| 6810 |
+
|
| 6811 |
+
const ggml_tensor * src0 = dst->src[0];
|
| 6812 |
+
const float * src_data = (const float *) src0->data;
|
| 6813 |
+
float * dst_data = (float *) dst->data;
|
| 6814 |
+
|
| 6815 |
+
GGML_TENSOR_UNARY_OP_LOCALS
|
| 6816 |
+
|
| 6817 |
+
const int s0 = ggml_get_op_params_i32(dst, 0);
|
| 6818 |
+
const int s1 = ggml_get_op_params_i32(dst, 1);
|
| 6819 |
+
const int s2 = ggml_get_op_params_i32(dst, 2);
|
| 6820 |
+
const int s3 = ggml_get_op_params_i32(dst, 3);
|
| 6821 |
+
|
| 6822 |
+
const int64_t total = ne1 * ne2 * ne3;
|
| 6823 |
+
const int64_t per_thread = (total + params->nth) / params->nth;
|
| 6824 |
+
const int64_t start = params->ith * per_thread;
|
| 6825 |
+
const int64_t end = std::min(start + per_thread, total);
|
| 6826 |
+
|
| 6827 |
+
for (int64_t i = start; i < end; ++i) {
|
| 6828 |
+
const int64_t i1 = i % ne1;
|
| 6829 |
+
const int64_t i2 = (i / ne1) % ne2;
|
| 6830 |
+
const int64_t i3 = i / (ne2 * ne1);
|
| 6831 |
+
float * dst_row = dst_data + (i3*nb3 + i2*nb2 + i1*nb1) / sizeof(float);
|
| 6832 |
+
|
| 6833 |
+
const int64_t i01 = ggml_wrap_index(i1 - s1, ne01);
|
| 6834 |
+
const int64_t i02 = ggml_wrap_index(i2 - s2, ne02);
|
| 6835 |
+
const int64_t i03 = ggml_wrap_index(i3 - s3, ne03);
|
| 6836 |
+
const float * src_row = src_data + (i03*nb03 + i02*nb02 + i01*nb01) / sizeof(float);
|
| 6837 |
+
|
| 6838 |
+
const int64_t s = ggml_wrap_index(-s0, ne00);
|
| 6839 |
+
const int64_t n = ne00 - s;
|
| 6840 |
+
ggml_vec_cpy_f32(n, dst_row, src_row + s);
|
| 6841 |
+
ggml_vec_cpy_f32(s, dst_row + n, src_row);
|
| 6842 |
+
}
|
| 6843 |
+
}
|
| 6844 |
+
|
| 6845 |
+
void ggml_compute_forward_roll(
|
| 6846 |
+
const ggml_compute_params * params,
|
| 6847 |
+
ggml_tensor * dst) {
|
| 6848 |
+
|
| 6849 |
+
const ggml_tensor * src0 = dst->src[0];
|
| 6850 |
+
|
| 6851 |
+
switch (src0->type) {
|
| 6852 |
+
case GGML_TYPE_F32:
|
| 6853 |
+
{
|
| 6854 |
+
ggml_compute_forward_roll_f32(params, dst);
|
| 6855 |
+
} break;
|
| 6856 |
+
default:
|
| 6857 |
+
{
|
| 6858 |
+
GGML_ABORT("fatal error");
|
| 6859 |
+
}
|
| 6860 |
+
}
|
| 6861 |
+
}
|
| 6862 |
+
|
| 6863 |
// ggml_compute_forward_arange
|
| 6864 |
|
| 6865 |
static void ggml_compute_forward_arange_f32(
|
ggml/src/ggml-cpu/ops.h
CHANGED
|
@@ -72,6 +72,7 @@ void ggml_compute_forward_pool_2d_back(const struct ggml_compute_params * params
|
|
| 72 |
void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 73 |
void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 74 |
void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
|
|
| 75 |
void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 76 |
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 77 |
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
|
|
| 72 |
void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 73 |
void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 74 |
void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 75 |
+
void ggml_compute_forward_roll(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 76 |
void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 77 |
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
| 78 |
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
ggml/src/ggml.c
CHANGED
|
@@ -955,6 +955,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
| 955 |
"UPSCALE",
|
| 956 |
"PAD",
|
| 957 |
"PAD_REFLECT_1D",
|
|
|
|
| 958 |
"ARANGE",
|
| 959 |
"TIMESTEP_EMBEDDING",
|
| 960 |
"ARGSORT",
|
|
@@ -985,7 +986,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
| 985 |
"OPT_STEP_ADAMW",
|
| 986 |
};
|
| 987 |
|
| 988 |
-
static_assert(GGML_OP_COUNT ==
|
| 989 |
|
| 990 |
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
| 991 |
"none",
|
|
@@ -1050,6 +1051,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
| 1050 |
"upscale(x)",
|
| 1051 |
"pad(x)",
|
| 1052 |
"pad_reflect_1d(x)",
|
|
|
|
| 1053 |
"arange(start, stop, step)",
|
| 1054 |
"timestep_embedding(timesteps, dim, max_period)",
|
| 1055 |
"argsort(x)",
|
|
@@ -1080,7 +1082,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
| 1080 |
"adamw(x)",
|
| 1081 |
};
|
| 1082 |
|
| 1083 |
-
static_assert(GGML_OP_COUNT ==
|
| 1084 |
|
| 1085 |
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
| 1086 |
|
|
@@ -4341,6 +4343,34 @@ struct ggml_tensor * ggml_pad_reflect_1d(
|
|
| 4341 |
return result;
|
| 4342 |
}
|
| 4343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4344 |
// ggml_arange
|
| 4345 |
|
| 4346 |
struct ggml_tensor * ggml_arange(
|
|
|
|
| 955 |
"UPSCALE",
|
| 956 |
"PAD",
|
| 957 |
"PAD_REFLECT_1D",
|
| 958 |
+
"ROLL",
|
| 959 |
"ARANGE",
|
| 960 |
"TIMESTEP_EMBEDDING",
|
| 961 |
"ARGSORT",
|
|
|
|
| 986 |
"OPT_STEP_ADAMW",
|
| 987 |
};
|
| 988 |
|
| 989 |
+
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
|
| 990 |
|
| 991 |
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
| 992 |
"none",
|
|
|
|
| 1051 |
"upscale(x)",
|
| 1052 |
"pad(x)",
|
| 1053 |
"pad_reflect_1d(x)",
|
| 1054 |
+
"roll(x)",
|
| 1055 |
"arange(start, stop, step)",
|
| 1056 |
"timestep_embedding(timesteps, dim, max_period)",
|
| 1057 |
"argsort(x)",
|
|
|
|
| 1082 |
"adamw(x)",
|
| 1083 |
};
|
| 1084 |
|
| 1085 |
+
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
|
| 1086 |
|
| 1087 |
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
| 1088 |
|
|
|
|
| 4343 |
return result;
|
| 4344 |
}
|
| 4345 |
|
| 4346 |
+
// ggml_roll
|
| 4347 |
+
|
| 4348 |
+
struct ggml_tensor * ggml_roll(
|
| 4349 |
+
struct ggml_context * ctx,
|
| 4350 |
+
struct ggml_tensor * a,
|
| 4351 |
+
int shift0,
|
| 4352 |
+
int shift1,
|
| 4353 |
+
int shift2,
|
| 4354 |
+
int shift3) {
|
| 4355 |
+
GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
|
| 4356 |
+
GGML_ASSERT(abs(shift0) < a->ne[0]);
|
| 4357 |
+
GGML_ASSERT(abs(shift1) < a->ne[1]);
|
| 4358 |
+
GGML_ASSERT(abs(shift2) < a->ne[2]);
|
| 4359 |
+
GGML_ASSERT(abs(shift3) < a->ne[3]);
|
| 4360 |
+
|
| 4361 |
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
| 4362 |
+
|
| 4363 |
+
ggml_set_op_params_i32(result, 0, shift0);
|
| 4364 |
+
ggml_set_op_params_i32(result, 1, shift1);
|
| 4365 |
+
ggml_set_op_params_i32(result, 2, shift2);
|
| 4366 |
+
ggml_set_op_params_i32(result, 3, shift3);
|
| 4367 |
+
|
| 4368 |
+
result->op = GGML_OP_ROLL;
|
| 4369 |
+
result->src[0] = a;
|
| 4370 |
+
|
| 4371 |
+
return result;
|
| 4372 |
+
}
|
| 4373 |
+
|
| 4374 |
// ggml_arange
|
| 4375 |
|
| 4376 |
struct ggml_tensor * ggml_arange(
|