JohannesGaessler commited on
Commit
ad3f29d
·
1 Parent(s): 1d52105

ggml: fix gradient allocation logic (ggml/966)

Browse files

* ggml: fix gradient allocation logic

* gradient allocation in ggml_build_backward_expand

* fixup

* fix test-backend-ops grad

* suggestions by slaren

* fix test1.c

* fix legacy opt API

* fix test-grad0

* remove keep arg

Files changed (2) hide show
  1. ggml/include/ggml.h +20 -20
  2. ggml/src/ggml.c +442 -1024
ggml/include/ggml.h CHANGED
@@ -577,10 +577,10 @@ extern "C" {
577
 
578
  // this tensor...
579
  enum ggml_tensor_flag {
580
- GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
581
- GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
582
- GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
583
- GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
584
  };
585
 
586
  // n-dimensional tensor
@@ -1410,14 +1410,14 @@ extern "C" {
1410
  // supports 3D: a->ne[2] == b->ne[1]
1411
  GGML_API struct ggml_tensor * ggml_get_rows(
1412
  struct ggml_context * ctx,
1413
- struct ggml_tensor * a,
1414
- struct ggml_tensor * b);
1415
 
1416
  GGML_API struct ggml_tensor * ggml_get_rows_back(
1417
  struct ggml_context * ctx,
1418
- struct ggml_tensor * a,
1419
- struct ggml_tensor * b,
1420
- struct ggml_tensor * c);
1421
 
1422
  GGML_API struct ggml_tensor * ggml_diag(
1423
  struct ggml_context * ctx,
@@ -1568,9 +1568,9 @@ extern "C" {
1568
  // a - dy
1569
  GGML_API struct ggml_tensor * ggml_rope_back(
1570
  struct ggml_context * ctx,
1571
- struct ggml_tensor * a,
1572
- struct ggml_tensor * b,
1573
- struct ggml_tensor * c,
1574
  int n_dims,
1575
  int mode,
1576
  int n_ctx_orig,
@@ -2036,15 +2036,15 @@ extern "C" {
2036
  // loss function
2037
 
2038
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
2039
- struct ggml_context * ctx,
2040
- struct ggml_tensor * a,
2041
- struct ggml_tensor * b);
2042
 
2043
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
2044
- struct ggml_context * ctx,
2045
- struct ggml_tensor * a,
2046
- struct ggml_tensor * b,
2047
- struct ggml_tensor * c);
2048
 
2049
  // AdamW optimizer step
2050
  // Paper: https://arxiv.org/pdf/1711.05101v3.pdf
@@ -2066,7 +2066,7 @@ extern "C" {
2066
  GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
2067
 
2068
  GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2069
- GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate, bool keep);
2070
 
2071
  GGML_API void ggml_build_opt_adamw(
2072
  struct ggml_context * ctx,
 
577
 
578
  // this tensor...
579
  enum ggml_tensor_flag {
580
+ GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
581
+ GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
582
+ GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
583
+ GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
584
  };
585
 
586
  // n-dimensional tensor
 
1410
  // supports 3D: a->ne[2] == b->ne[1]
1411
  GGML_API struct ggml_tensor * ggml_get_rows(
1412
  struct ggml_context * ctx,
1413
+ struct ggml_tensor * a, // data
1414
+ struct ggml_tensor * b); // row indices
1415
 
1416
  GGML_API struct ggml_tensor * ggml_get_rows_back(
1417
  struct ggml_context * ctx,
1418
+ struct ggml_tensor * a, // gradients of ggml_get_rows result
1419
+ struct ggml_tensor * b, // row indices
1420
+ struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1421
 
1422
  GGML_API struct ggml_tensor * ggml_diag(
1423
  struct ggml_context * ctx,
 
1568
  // a - dy
1569
  GGML_API struct ggml_tensor * ggml_rope_back(
1570
  struct ggml_context * ctx,
1571
+ struct ggml_tensor * a, // gradients of ggml_rope result
1572
+ struct ggml_tensor * b, // positions
1573
+ struct ggml_tensor * c, // freq factors
1574
  int n_dims,
1575
  int mode,
1576
  int n_ctx_orig,
 
2036
  // loss function
2037
 
2038
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
2039
+ struct ggml_context * ctx,
2040
+ struct ggml_tensor * a, // logits
2041
+ struct ggml_tensor * b); // labels
2042
 
2043
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
2044
+ struct ggml_context * ctx,
2045
+ struct ggml_tensor * a, // logits
2046
+ struct ggml_tensor * b, // labels
2047
+ struct ggml_tensor * c); // gradients of cross_entropy_loss result
2048
 
2049
  // AdamW optimizer step
2050
  // Paper: https://arxiv.org/pdf/1711.05101v3.pdf
 
2066
  GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
2067
 
2068
  GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2069
+ GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate);
2070
 
2071
  GGML_API void ggml_build_opt_adamw(
2072
  struct ggml_context * ctx,
ggml/src/ggml.c CHANGED
@@ -4725,18 +4725,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
4725
 
4726
  static struct ggml_tensor * ggml_dup_impl(
4727
  struct ggml_context * ctx,
4728
- struct ggml_tensor * a,
4729
- bool inplace) {
4730
- bool is_node = false;
4731
-
4732
- if (!inplace && (a->grad)) {
4733
- is_node = true;
4734
- }
4735
-
4736
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4737
 
4738
- result->op = GGML_OP_DUP;
4739
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4740
  result->src[0] = a;
4741
 
4742
  return result;
@@ -4744,13 +4737,13 @@ static struct ggml_tensor * ggml_dup_impl(
4744
 
4745
  struct ggml_tensor * ggml_dup(
4746
  struct ggml_context * ctx,
4747
- struct ggml_tensor * a) {
4748
  return ggml_dup_impl(ctx, a, false);
4749
  }
4750
 
4751
  struct ggml_tensor * ggml_dup_inplace(
4752
  struct ggml_context * ctx,
4753
- struct ggml_tensor * a) {
4754
  return ggml_dup_impl(ctx, a, true);
4755
  }
4756
 
@@ -4758,21 +4751,14 @@ struct ggml_tensor * ggml_dup_inplace(
4758
 
4759
  static struct ggml_tensor * ggml_add_impl(
4760
  struct ggml_context * ctx,
4761
- struct ggml_tensor * a,
4762
- struct ggml_tensor * b,
4763
- bool inplace) {
4764
  GGML_ASSERT(ggml_can_repeat(b, a));
4765
 
4766
- bool is_node = false;
4767
-
4768
- if (!inplace && (a->grad || b->grad)) {
4769
- is_node = true;
4770
- }
4771
-
4772
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4773
 
4774
- result->op = GGML_OP_ADD;
4775
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4776
  result->src[0] = a;
4777
  result->src[1] = b;
4778
 
@@ -4781,15 +4767,15 @@ static struct ggml_tensor * ggml_add_impl(
4781
 
4782
  struct ggml_tensor * ggml_add(
4783
  struct ggml_context * ctx,
4784
- struct ggml_tensor * a,
4785
- struct ggml_tensor * b) {
4786
  return ggml_add_impl(ctx, a, b, false);
4787
  }
4788
 
4789
  struct ggml_tensor * ggml_add_inplace(
4790
  struct ggml_context * ctx,
4791
- struct ggml_tensor * a,
4792
- struct ggml_tensor * b) {
4793
  return ggml_add_impl(ctx, a, b, true);
4794
  }
4795
 
@@ -4797,9 +4783,9 @@ struct ggml_tensor * ggml_add_inplace(
4797
 
4798
  static struct ggml_tensor * ggml_add_cast_impl(
4799
  struct ggml_context * ctx,
4800
- struct ggml_tensor * a,
4801
- struct ggml_tensor * b,
4802
- enum ggml_type type) {
4803
  // TODO: support less-strict constraint
4804
  // GGML_ASSERT(ggml_can_repeat(b, a));
4805
  GGML_ASSERT(ggml_can_repeat_rows(b, a));
@@ -4809,18 +4795,9 @@ static struct ggml_tensor * ggml_add_cast_impl(
4809
  a->type == GGML_TYPE_F16 ||
4810
  a->type == GGML_TYPE_BF16);
4811
 
4812
- bool is_node = false;
4813
-
4814
- if (a->grad || b->grad) {
4815
- // TODO: support backward pass for broadcasting
4816
- GGML_ASSERT(ggml_are_same_shape(a, b));
4817
- is_node = true;
4818
- }
4819
-
4820
  struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
4821
 
4822
- result->op = GGML_OP_ADD;
4823
- result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne) : NULL;
4824
  result->src[0] = a;
4825
  result->src[1] = b;
4826
 
@@ -4829,9 +4806,9 @@ static struct ggml_tensor * ggml_add_cast_impl(
4829
 
4830
  struct ggml_tensor * ggml_add_cast(
4831
  struct ggml_context * ctx,
4832
- struct ggml_tensor * a,
4833
- struct ggml_tensor * b,
4834
- enum ggml_type type) {
4835
  return ggml_add_cast_impl(ctx, a, b, type);
4836
  }
4837
 
@@ -4839,22 +4816,15 @@ struct ggml_tensor * ggml_add_cast(
4839
 
4840
  static struct ggml_tensor * ggml_add1_impl(
4841
  struct ggml_context * ctx,
4842
- struct ggml_tensor * a,
4843
- struct ggml_tensor * b,
4844
- bool inplace) {
4845
  GGML_ASSERT(ggml_is_scalar(b));
4846
  GGML_ASSERT(ggml_is_padded_1d(a));
4847
 
4848
- bool is_node = false;
4849
-
4850
- if (a->grad || b->grad) {
4851
- is_node = true;
4852
- }
4853
-
4854
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4855
 
4856
- result->op = GGML_OP_ADD1;
4857
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4858
  result->src[0] = a;
4859
  result->src[1] = b;
4860
 
@@ -4863,15 +4833,15 @@ static struct ggml_tensor * ggml_add1_impl(
4863
 
4864
  struct ggml_tensor * ggml_add1(
4865
  struct ggml_context * ctx,
4866
- struct ggml_tensor * a,
4867
- struct ggml_tensor * b) {
4868
  return ggml_add1_impl(ctx, a, b, false);
4869
  }
4870
 
4871
  struct ggml_tensor * ggml_add1_inplace(
4872
  struct ggml_context * ctx,
4873
- struct ggml_tensor * a,
4874
- struct ggml_tensor * b) {
4875
  return ggml_add1_impl(ctx, a, b, true);
4876
  }
4877
 
@@ -4879,31 +4849,24 @@ struct ggml_tensor * ggml_add1_inplace(
4879
 
4880
  static struct ggml_tensor * ggml_acc_impl(
4881
  struct ggml_context * ctx,
4882
- struct ggml_tensor * a,
4883
- struct ggml_tensor * b,
4884
- size_t nb1,
4885
- size_t nb2,
4886
- size_t nb3,
4887
- size_t offset,
4888
- bool inplace) {
4889
  GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
4890
  GGML_ASSERT(ggml_is_contiguous(a));
4891
  GGML_ASSERT(a->type == GGML_TYPE_F32);
4892
  GGML_ASSERT(b->type == GGML_TYPE_F32);
4893
 
4894
- bool is_node = false;
4895
-
4896
- if (!inplace && (a->grad || b->grad)) {
4897
- is_node = true;
4898
- }
4899
-
4900
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4901
 
4902
  int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
4903
  ggml_set_op_params(result, params, sizeof(params));
4904
 
4905
- result->op = GGML_OP_ACC;
4906
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4907
  result->src[0] = a;
4908
  result->src[1] = b;
4909
 
@@ -4912,23 +4875,23 @@ static struct ggml_tensor * ggml_acc_impl(
4912
 
4913
  struct ggml_tensor * ggml_acc(
4914
  struct ggml_context * ctx,
4915
- struct ggml_tensor * a,
4916
- struct ggml_tensor * b,
4917
- size_t nb1,
4918
- size_t nb2,
4919
- size_t nb3,
4920
- size_t offset) {
4921
  return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
4922
  }
4923
 
4924
  struct ggml_tensor * ggml_acc_inplace(
4925
  struct ggml_context * ctx,
4926
- struct ggml_tensor * a,
4927
- struct ggml_tensor * b,
4928
- size_t nb1,
4929
- size_t nb2,
4930
- size_t nb3,
4931
- size_t offset) {
4932
  return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
4933
  }
4934
 
@@ -4936,23 +4899,14 @@ struct ggml_tensor * ggml_acc_inplace(
4936
 
4937
  static struct ggml_tensor * ggml_sub_impl(
4938
  struct ggml_context * ctx,
4939
- struct ggml_tensor * a,
4940
- struct ggml_tensor * b,
4941
- bool inplace) {
4942
  GGML_ASSERT(ggml_can_repeat(b, a));
4943
 
4944
- bool is_node = false;
4945
-
4946
- if (!inplace && (a->grad || b->grad)) {
4947
- // TODO: support backward pass for broadcasting
4948
- GGML_ASSERT(ggml_are_same_shape(a, b));
4949
- is_node = true;
4950
- }
4951
-
4952
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4953
 
4954
- result->op = GGML_OP_SUB;
4955
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4956
  result->src[0] = a;
4957
  result->src[1] = b;
4958
 
@@ -4961,15 +4915,15 @@ static struct ggml_tensor * ggml_sub_impl(
4961
 
4962
  struct ggml_tensor * ggml_sub(
4963
  struct ggml_context * ctx,
4964
- struct ggml_tensor * a,
4965
- struct ggml_tensor * b) {
4966
  return ggml_sub_impl(ctx, a, b, false);
4967
  }
4968
 
4969
  struct ggml_tensor * ggml_sub_inplace(
4970
  struct ggml_context * ctx,
4971
- struct ggml_tensor * a,
4972
- struct ggml_tensor * b) {
4973
  return ggml_sub_impl(ctx, a, b, true);
4974
  }
4975
 
@@ -4977,27 +4931,14 @@ struct ggml_tensor * ggml_sub_inplace(
4977
 
4978
  static struct ggml_tensor * ggml_mul_impl(
4979
  struct ggml_context * ctx,
4980
- struct ggml_tensor * a,
4981
- struct ggml_tensor * b,
4982
- bool inplace) {
4983
  GGML_ASSERT(ggml_can_repeat(b, a));
4984
 
4985
- bool is_node = false;
4986
-
4987
- if (!inplace && (a->grad || b->grad)) {
4988
- // TODO: support backward pass for broadcasting
4989
- GGML_ASSERT(ggml_are_same_shape(a, b));
4990
- is_node = true;
4991
- }
4992
-
4993
- if (inplace) {
4994
- GGML_ASSERT(!is_node);
4995
- }
4996
-
4997
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4998
 
4999
- result->op = GGML_OP_MUL;
5000
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5001
  result->src[0] = a;
5002
  result->src[1] = b;
5003
 
@@ -5022,25 +4963,14 @@ struct ggml_tensor * ggml_mul_inplace(
5022
 
5023
  static struct ggml_tensor * ggml_div_impl(
5024
  struct ggml_context * ctx,
5025
- struct ggml_tensor * a,
5026
- struct ggml_tensor * b,
5027
- bool inplace) {
5028
  GGML_ASSERT(ggml_can_repeat(b, a));
5029
 
5030
- bool is_node = false;
5031
-
5032
- if (!inplace && (a->grad || b->grad)) {
5033
- is_node = true;
5034
- }
5035
-
5036
- if (inplace) {
5037
- GGML_ASSERT(!is_node);
5038
- }
5039
-
5040
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5041
 
5042
- result->op = GGML_OP_DIV;
5043
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5044
  result->src[0] = a;
5045
  result->src[1] = b;
5046
 
@@ -5065,18 +4995,11 @@ struct ggml_tensor * ggml_div_inplace(
5065
 
5066
  static struct ggml_tensor * ggml_sqr_impl(
5067
  struct ggml_context * ctx,
5068
- struct ggml_tensor * a,
5069
- bool inplace) {
5070
- bool is_node = false;
5071
-
5072
- if (!inplace && (a->grad)) {
5073
- is_node = true;
5074
- }
5075
-
5076
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5077
 
5078
- result->op = GGML_OP_SQR;
5079
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5080
  result->src[0] = a;
5081
 
5082
  return result;
@@ -5098,18 +5021,11 @@ struct ggml_tensor * ggml_sqr_inplace(
5098
 
5099
  static struct ggml_tensor * ggml_sqrt_impl(
5100
  struct ggml_context * ctx,
5101
- struct ggml_tensor * a,
5102
- bool inplace) {
5103
- bool is_node = false;
5104
-
5105
- if (!inplace && (a->grad)) {
5106
- is_node = true;
5107
- }
5108
-
5109
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5110
 
5111
- result->op = GGML_OP_SQRT;
5112
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5113
  result->src[0] = a;
5114
 
5115
  return result;
@@ -5132,17 +5048,10 @@ struct ggml_tensor * ggml_sqrt_inplace(
5132
  static struct ggml_tensor * ggml_log_impl(
5133
  struct ggml_context * ctx,
5134
  struct ggml_tensor * a,
5135
- bool inplace) {
5136
- bool is_node = false;
5137
-
5138
- if (!inplace && (a->grad)) {
5139
- is_node = true;
5140
- }
5141
-
5142
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5143
 
5144
- result->op = GGML_OP_LOG;
5145
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5146
  result->src[0] = a;
5147
 
5148
  return result;
@@ -5165,17 +5074,10 @@ struct ggml_tensor * ggml_log_inplace(
5165
  static struct ggml_tensor * ggml_sin_impl(
5166
  struct ggml_context * ctx,
5167
  struct ggml_tensor * a,
5168
- bool inplace) {
5169
- bool is_node = false;
5170
-
5171
- if (!inplace && (a->grad)) {
5172
- is_node = true;
5173
- }
5174
-
5175
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5176
 
5177
- result->op = GGML_OP_SIN;
5178
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5179
  result->src[0] = a;
5180
 
5181
  return result;
@@ -5198,17 +5100,10 @@ struct ggml_tensor * ggml_sin_inplace(
5198
  static struct ggml_tensor * ggml_cos_impl(
5199
  struct ggml_context * ctx,
5200
  struct ggml_tensor * a,
5201
- bool inplace) {
5202
- bool is_node = false;
5203
-
5204
- if (!inplace && (a->grad)) {
5205
- is_node = true;
5206
- }
5207
-
5208
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5209
 
5210
- result->op = GGML_OP_COS;
5211
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5212
  result->src[0] = a;
5213
 
5214
  return result;
@@ -5230,17 +5125,10 @@ struct ggml_tensor * ggml_cos_inplace(
5230
 
5231
  struct ggml_tensor * ggml_sum(
5232
  struct ggml_context * ctx,
5233
- struct ggml_tensor * a) {
5234
- bool is_node = false;
5235
-
5236
- if (a->grad) {
5237
- is_node = true;
5238
- }
5239
-
5240
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
5241
 
5242
- result->op = GGML_OP_SUM;
5243
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5244
  result->src[0] = a;
5245
 
5246
  return result;
@@ -5250,13 +5138,7 @@ struct ggml_tensor * ggml_sum(
5250
 
5251
  struct ggml_tensor * ggml_sum_rows(
5252
  struct ggml_context * ctx,
5253
- struct ggml_tensor * a) {
5254
- bool is_node = false;
5255
-
5256
- if (a->grad) {
5257
- is_node = true;
5258
- }
5259
-
5260
  int64_t ne[GGML_MAX_DIMS] = { 1 };
5261
  for (int i = 1; i < GGML_MAX_DIMS; ++i) {
5262
  ne[i] = a->ne[i];
@@ -5264,8 +5146,7 @@ struct ggml_tensor * ggml_sum_rows(
5264
 
5265
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
5266
 
5267
- result->op = GGML_OP_SUM_ROWS;
5268
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5269
  result->src[0] = a;
5270
 
5271
  return result;
@@ -5275,19 +5156,11 @@ struct ggml_tensor * ggml_sum_rows(
5275
 
5276
  struct ggml_tensor * ggml_mean(
5277
  struct ggml_context * ctx,
5278
- struct ggml_tensor * a) {
5279
- bool is_node = false;
5280
-
5281
- if (a->grad) {
5282
- GGML_ABORT("fatal error"); // TODO: implement
5283
- is_node = true;
5284
- }
5285
-
5286
  int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
5287
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5288
 
5289
- result->op = GGML_OP_MEAN;
5290
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5291
  result->src[0] = a;
5292
 
5293
  return result;
@@ -5297,19 +5170,12 @@ struct ggml_tensor * ggml_mean(
5297
 
5298
  struct ggml_tensor * ggml_argmax(
5299
  struct ggml_context * ctx,
5300
- struct ggml_tensor * a) {
5301
  GGML_ASSERT(ggml_is_matrix(a));
5302
- bool is_node = false;
5303
-
5304
- if (a->grad) {
5305
- GGML_ABORT("fatal error");
5306
- is_node = true;
5307
- }
5308
 
5309
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
5310
 
5311
- result->op = GGML_OP_ARGMAX;
5312
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5313
  result->src[0] = a;
5314
 
5315
  return result;
@@ -5319,20 +5185,13 @@ struct ggml_tensor * ggml_argmax(
5319
 
5320
  struct ggml_tensor * ggml_repeat(
5321
  struct ggml_context * ctx,
5322
- struct ggml_tensor * a,
5323
- struct ggml_tensor * b) {
5324
  GGML_ASSERT(ggml_can_repeat(a, b));
5325
 
5326
- bool is_node = false;
5327
-
5328
- if (a->grad) {
5329
- is_node = true;
5330
- }
5331
-
5332
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
5333
 
5334
- result->op = GGML_OP_REPEAT;
5335
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5336
  result->src[0] = a;
5337
 
5338
  return result;
@@ -5342,24 +5201,13 @@ struct ggml_tensor * ggml_repeat(
5342
 
5343
  struct ggml_tensor * ggml_repeat_back(
5344
  struct ggml_context * ctx,
5345
- struct ggml_tensor * a,
5346
- struct ggml_tensor * b) {
5347
  GGML_ASSERT(ggml_can_repeat(b, a));
5348
 
5349
- bool is_node = false;
5350
-
5351
- if (a->grad) {
5352
- is_node = true;
5353
- }
5354
-
5355
- if (ggml_are_same_shape(a, b) && !is_node) {
5356
- return a;
5357
- }
5358
-
5359
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
5360
 
5361
- result->op = GGML_OP_REPEAT_BACK;
5362
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5363
  result->src[0] = a;
5364
 
5365
  return result;
@@ -5369,9 +5217,9 @@ struct ggml_tensor * ggml_repeat_back(
5369
 
5370
  struct ggml_tensor * ggml_concat(
5371
  struct ggml_context * ctx,
5372
- struct ggml_tensor * a,
5373
- struct ggml_tensor * b,
5374
- int dim) {
5375
  GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
5376
 
5377
  int64_t ne[GGML_MAX_DIMS];
@@ -5384,19 +5232,11 @@ struct ggml_tensor * ggml_concat(
5384
  ne[d] = a->ne[d];
5385
  }
5386
 
5387
- bool is_node = false;
5388
-
5389
- if (a->grad || b->grad) {
5390
- GGML_ABORT("fatal error"); // TODO: implement
5391
- is_node = true;
5392
- }
5393
-
5394
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
5395
 
5396
  ggml_set_op_params_i32(result, 0, dim);
5397
 
5398
- result->op = GGML_OP_CONCAT;
5399
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5400
  result->src[0] = a;
5401
  result->src[1] = b;
5402
 
@@ -5505,20 +5345,14 @@ struct ggml_tensor * ggml_relu_inplace(
5505
 
5506
  struct ggml_tensor * ggml_leaky_relu(
5507
  struct ggml_context * ctx,
5508
- struct ggml_tensor * a, float negative_slope, bool inplace) {
5509
- bool is_node = false;
5510
-
5511
- if (!inplace && (a->grad)) {
5512
- GGML_ABORT("fatal error"); // TODO: not implemented
5513
- is_node = true;
5514
- }
5515
-
5516
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5517
 
5518
  ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
5519
 
5520
- result->op = GGML_OP_LEAKY_RELU;
5521
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5522
  result->src[0] = a;
5523
 
5524
  return result;
@@ -5586,17 +5420,9 @@ struct ggml_tensor * ggml_silu_back(
5586
  struct ggml_context * ctx,
5587
  struct ggml_tensor * a,
5588
  struct ggml_tensor * b) {
5589
- bool is_node = false;
5590
-
5591
- if (a->grad || b->grad) {
5592
- // TODO: implement backward
5593
- is_node = true;
5594
- }
5595
-
5596
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5597
 
5598
- result->op = GGML_OP_SILU_BACK;
5599
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5600
  result->src[0] = a;
5601
  result->src[1] = b;
5602
 
@@ -5604,6 +5430,7 @@ struct ggml_tensor * ggml_silu_back(
5604
  }
5605
 
5606
  // ggml hardswish
 
5607
  struct ggml_tensor * ggml_hardswish(
5608
  struct ggml_context * ctx,
5609
  struct ggml_tensor * a) {
@@ -5611,6 +5438,7 @@ struct ggml_tensor * ggml_hardswish(
5611
  }
5612
 
5613
  // ggml hardsigmoid
 
5614
  struct ggml_tensor * ggml_hardsigmoid(
5615
  struct ggml_context * ctx,
5616
  struct ggml_tensor * a) {
@@ -5618,6 +5446,7 @@ struct ggml_tensor * ggml_hardsigmoid(
5618
  }
5619
 
5620
  // ggml exp
 
5621
  struct ggml_tensor * ggml_exp(
5622
  struct ggml_context * ctx,
5623
  struct ggml_tensor * a) {
@@ -5635,21 +5464,13 @@ struct ggml_tensor * ggml_exp_inplace(
5635
  static struct ggml_tensor * ggml_norm_impl(
5636
  struct ggml_context * ctx,
5637
  struct ggml_tensor * a,
5638
- float eps,
5639
- bool inplace) {
5640
- bool is_node = false;
5641
-
5642
- if (!inplace && (a->grad)) {
5643
- GGML_ABORT("fatal error"); // TODO: implement backward
5644
- is_node = true;
5645
- }
5646
-
5647
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5648
 
5649
  ggml_set_op_params(result, &eps, sizeof(eps));
5650
 
5651
- result->op = GGML_OP_NORM;
5652
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5653
  result->src[0] = a;
5654
 
5655
  return result;
@@ -5658,14 +5479,14 @@ static struct ggml_tensor * ggml_norm_impl(
5658
  struct ggml_tensor * ggml_norm(
5659
  struct ggml_context * ctx,
5660
  struct ggml_tensor * a,
5661
- float eps) {
5662
  return ggml_norm_impl(ctx, a, eps, false);
5663
  }
5664
 
5665
  struct ggml_tensor * ggml_norm_inplace(
5666
  struct ggml_context * ctx,
5667
  struct ggml_tensor * a,
5668
- float eps) {
5669
  return ggml_norm_impl(ctx, a, eps, true);
5670
  }
5671
 
@@ -5674,20 +5495,13 @@ struct ggml_tensor * ggml_norm_inplace(
5674
  static struct ggml_tensor * ggml_rms_norm_impl(
5675
  struct ggml_context * ctx,
5676
  struct ggml_tensor * a,
5677
- float eps,
5678
- bool inplace) {
5679
- bool is_node = false;
5680
-
5681
- if (!inplace && (a->grad)) {
5682
- is_node = true;
5683
- }
5684
-
5685
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5686
 
5687
  ggml_set_op_params(result, &eps, sizeof(eps));
5688
 
5689
- result->op = GGML_OP_RMS_NORM;
5690
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5691
  result->src[0] = a;
5692
 
5693
  return result;
@@ -5696,14 +5510,14 @@ static struct ggml_tensor * ggml_rms_norm_impl(
5696
  struct ggml_tensor * ggml_rms_norm(
5697
  struct ggml_context * ctx,
5698
  struct ggml_tensor * a,
5699
- float eps) {
5700
  return ggml_rms_norm_impl(ctx, a, eps, false);
5701
  }
5702
 
5703
  struct ggml_tensor * ggml_rms_norm_inplace(
5704
  struct ggml_context * ctx,
5705
  struct ggml_tensor * a,
5706
- float eps) {
5707
  return ggml_rms_norm_impl(ctx, a, eps, true);
5708
  }
5709
 
@@ -5713,20 +5527,12 @@ struct ggml_tensor * ggml_rms_norm_back(
5713
  struct ggml_context * ctx,
5714
  struct ggml_tensor * a,
5715
  struct ggml_tensor * b,
5716
- float eps) {
5717
- bool is_node = false;
5718
-
5719
- if (a->grad) {
5720
- // TODO: implement backward
5721
- is_node = true;
5722
- }
5723
-
5724
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5725
 
5726
  ggml_set_op_params(result, &eps, sizeof(eps));
5727
 
5728
- result->op = GGML_OP_RMS_NORM_BACK;
5729
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5730
  result->src[0] = a;
5731
  result->src[1] = b;
5732
 
@@ -5736,43 +5542,35 @@ struct ggml_tensor * ggml_rms_norm_back(
5736
  // ggml_group_norm
5737
 
5738
  static struct ggml_tensor * ggml_group_norm_impl(
5739
- struct ggml_context * ctx,
5740
- struct ggml_tensor * a,
5741
- int n_groups,
5742
- float eps,
5743
- bool inplace) {
5744
-
5745
- bool is_node = false;
5746
- if (!inplace && (a->grad)) {
5747
- GGML_ABORT("fatal error"); // TODO: implement backward
5748
- is_node = true;
5749
- }
5750
-
5751
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5752
 
5753
  ggml_set_op_params_i32(result, 0, n_groups);
5754
  ggml_set_op_params_f32(result, 1, eps);
5755
 
5756
- result->op = GGML_OP_GROUP_NORM;
5757
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5758
  result->src[0] = a;
5759
 
5760
  return result;
5761
  }
5762
 
5763
  struct ggml_tensor * ggml_group_norm(
5764
- struct ggml_context * ctx,
5765
- struct ggml_tensor * a,
5766
- int n_groups,
5767
- float eps) {
5768
  return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
5769
  }
5770
 
5771
  struct ggml_tensor * ggml_group_norm_inplace(
5772
- struct ggml_context * ctx,
5773
- struct ggml_tensor * a,
5774
- int n_groups,
5775
- float eps) {
5776
  return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
5777
  }
5778
 
@@ -5785,17 +5583,10 @@ struct ggml_tensor * ggml_mul_mat(
5785
  GGML_ASSERT(ggml_can_mul_mat(a, b));
5786
  GGML_ASSERT(!ggml_is_transposed(a));
5787
 
5788
- bool is_node = false;
5789
-
5790
- if (a->grad || b->grad) {
5791
- is_node = true;
5792
- }
5793
-
5794
  const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
5795
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5796
 
5797
- result->op = GGML_OP_MUL_MAT;
5798
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5799
  result->src[0] = a;
5800
  result->src[1] = b;
5801
 
@@ -5841,17 +5632,10 @@ struct ggml_tensor * ggml_mul_mat_id(
5841
  GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
5842
  GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
5843
 
5844
- bool is_node = false;
5845
-
5846
- if (as->grad || b->grad) {
5847
- is_node = true;
5848
- }
5849
-
5850
  const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
5851
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5852
 
5853
- result->op = GGML_OP_MUL_MAT_ID;
5854
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5855
  result->src[0] = as;
5856
  result->src[1] = b;
5857
  result->src[2] = ids;
@@ -5868,18 +5652,11 @@ struct ggml_tensor * ggml_out_prod(
5868
  GGML_ASSERT(ggml_can_out_prod(a, b));
5869
  GGML_ASSERT(!ggml_is_transposed(a));
5870
 
5871
- bool is_node = false;
5872
-
5873
- if (a->grad || b->grad) {
5874
- is_node = true;
5875
- }
5876
-
5877
  // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
5878
  const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
5879
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5880
 
5881
- result->op = GGML_OP_OUT_PROD;
5882
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5883
  result->src[0] = a;
5884
  result->src[1] = b;
5885
 
@@ -5892,21 +5669,14 @@ static struct ggml_tensor * ggml_scale_impl(
5892
  struct ggml_context * ctx,
5893
  struct ggml_tensor * a,
5894
  float s,
5895
- bool inplace) {
5896
  GGML_ASSERT(ggml_is_padded_1d(a));
5897
 
5898
- bool is_node = false;
5899
-
5900
- if (a->grad) {
5901
- is_node = true;
5902
- }
5903
-
5904
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5905
 
5906
  ggml_set_op_params(result, &s, sizeof(s));
5907
 
5908
- result->op = GGML_OP_SCALE;
5909
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5910
  result->src[0] = a;
5911
 
5912
  return result;
@@ -5914,15 +5684,15 @@ static struct ggml_tensor * ggml_scale_impl(
5914
 
5915
  struct ggml_tensor * ggml_scale(
5916
  struct ggml_context * ctx,
5917
- struct ggml_tensor * a,
5918
- float s) {
5919
  return ggml_scale_impl(ctx, a, s, false);
5920
  }
5921
 
5922
  struct ggml_tensor * ggml_scale_inplace(
5923
  struct ggml_context * ctx,
5924
- struct ggml_tensor * a,
5925
- float s) {
5926
  return ggml_scale_impl(ctx, a, s, true);
5927
  }
5928
 
@@ -5936,15 +5706,9 @@ static struct ggml_tensor * ggml_set_impl(
5936
  size_t nb2,
5937
  size_t nb3,
5938
  size_t offset,
5939
- bool inplace) {
5940
  GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
5941
 
5942
- bool is_node = false;
5943
-
5944
- if (a->grad || b->grad) {
5945
- is_node = true;
5946
- }
5947
-
5948
  // make a view of the destination
5949
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5950
 
@@ -5952,8 +5716,7 @@ static struct ggml_tensor * ggml_set_impl(
5952
  int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
5953
  ggml_set_op_params(result, params, sizeof(params));
5954
 
5955
- result->op = GGML_OP_SET;
5956
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5957
  result->src[0] = a;
5958
  result->src[1] = b;
5959
 
@@ -5962,8 +5725,8 @@ static struct ggml_tensor * ggml_set_impl(
5962
 
5963
  struct ggml_tensor * ggml_set(
5964
  struct ggml_context * ctx,
5965
- struct ggml_tensor * a,
5966
- struct ggml_tensor * b,
5967
  size_t nb1,
5968
  size_t nb2,
5969
  size_t nb3,
@@ -5973,8 +5736,8 @@ struct ggml_tensor * ggml_set(
5973
 
5974
  struct ggml_tensor * ggml_set_inplace(
5975
  struct ggml_context * ctx,
5976
- struct ggml_tensor * a,
5977
- struct ggml_tensor * b,
5978
  size_t nb1,
5979
  size_t nb2,
5980
  size_t nb3,
@@ -5984,24 +5747,24 @@ struct ggml_tensor * ggml_set_inplace(
5984
 
5985
  struct ggml_tensor * ggml_set_1d(
5986
  struct ggml_context * ctx,
5987
- struct ggml_tensor * a,
5988
- struct ggml_tensor * b,
5989
  size_t offset) {
5990
  return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
5991
  }
5992
 
5993
  struct ggml_tensor * ggml_set_1d_inplace(
5994
  struct ggml_context * ctx,
5995
- struct ggml_tensor * a,
5996
- struct ggml_tensor * b,
5997
  size_t offset) {
5998
  return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
5999
  }
6000
 
6001
  struct ggml_tensor * ggml_set_2d(
6002
  struct ggml_context * ctx,
6003
- struct ggml_tensor * a,
6004
- struct ggml_tensor * b,
6005
  size_t nb1,
6006
  size_t offset) {
6007
  return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
@@ -6009,8 +5772,8 @@ struct ggml_tensor * ggml_set_2d(
6009
 
6010
  struct ggml_tensor * ggml_set_2d_inplace(
6011
  struct ggml_context * ctx,
6012
- struct ggml_tensor * a,
6013
- struct ggml_tensor * b,
6014
  size_t nb1,
6015
  size_t offset) {
6016
  return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
@@ -6024,13 +5787,6 @@ static struct ggml_tensor * ggml_cpy_impl(
6024
  struct ggml_tensor * b) {
6025
  GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
6026
 
6027
- bool is_node = false;
6028
-
6029
- if (a->grad || b->grad) {
6030
- // inplace is false and either one have a grad
6031
- is_node = true;
6032
- }
6033
-
6034
  // make a view of the destination
6035
  struct ggml_tensor * result = ggml_view_tensor(ctx, b);
6036
  if (strlen(b->name) > 0) {
@@ -6039,8 +5795,7 @@ static struct ggml_tensor * ggml_cpy_impl(
6039
  ggml_format_name(result, "%s (copy)", a->name);
6040
  }
6041
 
6042
- result->op = GGML_OP_CPY;
6043
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6044
  result->src[0] = a;
6045
  result->src[1] = b;
6046
 
@@ -6058,15 +5813,11 @@ struct ggml_tensor * ggml_cast(
6058
  struct ggml_context * ctx,
6059
  struct ggml_tensor * a,
6060
  enum ggml_type type) {
6061
- bool is_node = false;
6062
-
6063
  struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
6064
  ggml_format_name(result, "%s (copy)", a->name);
6065
 
6066
- result->op = GGML_OP_CPY;
6067
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6068
  result->src[0] = a;
6069
- result->src[1] = result;
6070
 
6071
  return result;
6072
  }
@@ -6076,17 +5827,10 @@ struct ggml_tensor * ggml_cast(
6076
  static struct ggml_tensor * ggml_cont_impl(
6077
  struct ggml_context * ctx,
6078
  struct ggml_tensor * a) {
6079
- bool is_node = false;
6080
-
6081
- if (a->grad) {
6082
- is_node = true;
6083
- }
6084
-
6085
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
6086
  ggml_format_name(result, "%s (cont)", a->name);
6087
 
6088
- result->op = GGML_OP_CONT;
6089
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6090
  result->src[0] = a;
6091
 
6092
  return result;
@@ -6132,13 +5876,10 @@ struct ggml_tensor * ggml_cont_4d(
6132
  int64_t ne3) {
6133
  GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
6134
 
6135
- bool is_node = false;
6136
-
6137
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
6138
  ggml_format_name(result, "%s (cont)", a->name);
6139
 
6140
- result->op = GGML_OP_CONT;
6141
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6142
  result->src[0] = a;
6143
 
6144
  return result;
@@ -6154,22 +5895,10 @@ struct ggml_tensor * ggml_reshape(
6154
  // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
6155
  GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
6156
 
6157
- bool is_node = false;
6158
-
6159
- if (a->grad) {
6160
- is_node = true;
6161
- }
6162
-
6163
- if (b->grad) {
6164
- // gradient propagation is not supported
6165
- //GGML_ABORT("fatal error");
6166
- }
6167
-
6168
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
6169
  ggml_format_name(result, "%s (reshaped)", a->name);
6170
 
6171
- result->op = GGML_OP_RESHAPE;
6172
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6173
  result->src[0] = a;
6174
 
6175
  return result;
@@ -6182,18 +5911,11 @@ struct ggml_tensor * ggml_reshape_1d(
6182
  GGML_ASSERT(ggml_is_contiguous(a));
6183
  GGML_ASSERT(ggml_nelements(a) == ne0);
6184
 
6185
- bool is_node = false;
6186
-
6187
- if (a->grad) {
6188
- is_node = true;
6189
- }
6190
-
6191
  const int64_t ne[1] = { ne0 };
6192
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
6193
  ggml_format_name(result, "%s (reshaped)", a->name);
6194
 
6195
- result->op = GGML_OP_RESHAPE;
6196
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6197
  result->src[0] = a;
6198
 
6199
  return result;
@@ -6207,18 +5929,11 @@ struct ggml_tensor * ggml_reshape_2d(
6207
  GGML_ASSERT(ggml_is_contiguous(a));
6208
  GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
6209
 
6210
- bool is_node = false;
6211
-
6212
- if (a->grad) {
6213
- is_node = true;
6214
- }
6215
-
6216
  const int64_t ne[2] = { ne0, ne1 };
6217
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
6218
  ggml_format_name(result, "%s (reshaped)", a->name);
6219
 
6220
- result->op = GGML_OP_RESHAPE;
6221
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6222
  result->src[0] = a;
6223
 
6224
  return result;
@@ -6233,18 +5948,11 @@ struct ggml_tensor * ggml_reshape_3d(
6233
  GGML_ASSERT(ggml_is_contiguous(a));
6234
  GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
6235
 
6236
- bool is_node = false;
6237
-
6238
- if (a->grad) {
6239
- is_node = true;
6240
- }
6241
-
6242
  const int64_t ne[3] = { ne0, ne1, ne2 };
6243
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
6244
  ggml_format_name(result, "%s (reshaped)", a->name);
6245
 
6246
- result->op = GGML_OP_RESHAPE;
6247
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6248
  result->src[0] = a;
6249
 
6250
  return result;
@@ -6260,18 +5968,11 @@ struct ggml_tensor * ggml_reshape_4d(
6260
  GGML_ASSERT(ggml_is_contiguous(a));
6261
  GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
6262
 
6263
- bool is_node = false;
6264
-
6265
- if (a->grad) {
6266
- is_node = true;
6267
- }
6268
-
6269
  const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
6270
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
6271
  ggml_format_name(result, "%s (reshaped)", a->name);
6272
 
6273
- result->op = GGML_OP_RESHAPE;
6274
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6275
  result->src[0] = a;
6276
 
6277
  return result;
@@ -6283,20 +5984,12 @@ static struct ggml_tensor * ggml_view_impl(
6283
  int n_dims,
6284
  const int64_t * ne,
6285
  size_t offset) {
6286
-
6287
- bool is_node = false;
6288
-
6289
- if (a->grad) {
6290
- is_node = true;
6291
- }
6292
-
6293
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
6294
  ggml_format_name(result, "%s (view)", a->name);
6295
 
6296
  ggml_set_op_params(result, &offset, sizeof(offset));
6297
 
6298
- result->op = GGML_OP_VIEW;
6299
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6300
  result->src[0] = a;
6301
 
6302
  return result;
@@ -6309,7 +6002,6 @@ struct ggml_tensor * ggml_view_1d(
6309
  struct ggml_tensor * a,
6310
  int64_t ne0,
6311
  size_t offset) {
6312
-
6313
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
6314
 
6315
  return result;
@@ -6324,7 +6016,6 @@ struct ggml_tensor * ggml_view_2d(
6324
  int64_t ne1,
6325
  size_t nb1,
6326
  size_t offset) {
6327
-
6328
  const int64_t ne[2] = { ne0, ne1 };
6329
 
6330
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
@@ -6347,7 +6038,6 @@ struct ggml_tensor * ggml_view_3d(
6347
  size_t nb1,
6348
  size_t nb2,
6349
  size_t offset) {
6350
-
6351
  const int64_t ne[3] = { ne0, ne1, ne2 };
6352
 
6353
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
@@ -6372,7 +6062,6 @@ struct ggml_tensor * ggml_view_4d(
6372
  size_t nb2,
6373
  size_t nb3,
6374
  size_t offset) {
6375
-
6376
  const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
6377
 
6378
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
@@ -6405,12 +6094,6 @@ struct ggml_tensor * ggml_permute(
6405
  GGML_ASSERT(axis1 != axis3);
6406
  GGML_ASSERT(axis2 != axis3);
6407
 
6408
- bool is_node = false;
6409
-
6410
- if (a->grad) {
6411
- is_node = true;
6412
- }
6413
-
6414
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6415
  ggml_format_name(result, "%s (permuted)", a->name);
6416
 
@@ -6437,8 +6120,7 @@ struct ggml_tensor * ggml_permute(
6437
  result->nb[2] = nb[2];
6438
  result->nb[3] = nb[3];
6439
 
6440
- result->op = GGML_OP_PERMUTE;
6441
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6442
  result->src[0] = a;
6443
 
6444
  int32_t params[] = { axis0, axis1, axis2, axis3 };
@@ -6452,12 +6134,6 @@ struct ggml_tensor * ggml_permute(
6452
  struct ggml_tensor * ggml_transpose(
6453
  struct ggml_context * ctx,
6454
  struct ggml_tensor * a) {
6455
- bool is_node = false;
6456
-
6457
- if (a->grad) {
6458
- is_node = true;
6459
- }
6460
-
6461
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6462
  ggml_format_name(result, "%s (transposed)", a->name);
6463
 
@@ -6467,8 +6143,7 @@ struct ggml_tensor * ggml_transpose(
6467
  result->nb[0] = a->nb[1];
6468
  result->nb[1] = a->nb[0];
6469
 
6470
- result->op = GGML_OP_TRANSPOSE;
6471
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6472
  result->src[0] = a;
6473
 
6474
  return result;
@@ -6484,12 +6159,6 @@ struct ggml_tensor * ggml_get_rows(
6484
  GGML_ASSERT(b->ne[3] == 1);
6485
  GGML_ASSERT(b->type == GGML_TYPE_I32);
6486
 
6487
- bool is_node = false;
6488
-
6489
- if (a->grad || b->grad) {
6490
- is_node = true;
6491
- }
6492
-
6493
  // TODO: implement non F32 return
6494
  enum ggml_type type = GGML_TYPE_F32;
6495
  if (a->type == GGML_TYPE_I32) {
@@ -6497,8 +6166,7 @@ struct ggml_tensor * ggml_get_rows(
6497
  }
6498
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
6499
 
6500
- result->op = GGML_OP_GET_ROWS;
6501
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6502
  result->src[0] = a;
6503
  result->src[1] = b;
6504
 
@@ -6515,18 +6183,11 @@ struct ggml_tensor * ggml_get_rows_back(
6515
  GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
6516
  GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
6517
 
6518
- bool is_node = false;
6519
-
6520
- if (a->grad || b->grad) {
6521
- is_node = true;
6522
- }
6523
-
6524
  // TODO: implement non F32 return
6525
  //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
6526
  struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
6527
 
6528
- result->op = GGML_OP_GET_ROWS_BACK;
6529
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6530
  result->src[0] = a;
6531
  result->src[1] = b;
6532
 
@@ -6539,17 +6200,11 @@ struct ggml_tensor * ggml_diag(
6539
  struct ggml_context * ctx,
6540
  struct ggml_tensor * a) {
6541
  GGML_ASSERT(a->ne[1] == 1);
6542
- bool is_node = false;
6543
-
6544
- if (a->grad) {
6545
- is_node = true;
6546
- }
6547
 
6548
  const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
6549
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
6550
 
6551
- result->op = GGML_OP_DIAG;
6552
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6553
  result->src[0] = a;
6554
 
6555
  return result;
@@ -6562,19 +6217,12 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
6562
  struct ggml_tensor * a,
6563
  int n_past,
6564
  bool inplace) {
6565
- bool is_node = false;
6566
-
6567
- if (a->grad) {
6568
- is_node = true;
6569
- }
6570
-
6571
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6572
 
6573
  int32_t params[] = { n_past };
6574
  ggml_set_op_params(result, params, sizeof(params));
6575
 
6576
- result->op = GGML_OP_DIAG_MASK_INF;
6577
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6578
  result->src[0] = a;
6579
 
6580
  return result;
@@ -6601,19 +6249,12 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
6601
  struct ggml_tensor * a,
6602
  int n_past,
6603
  bool inplace) {
6604
- bool is_node = false;
6605
-
6606
- if (a->grad) {
6607
- is_node = true;
6608
- }
6609
-
6610
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6611
 
6612
  int32_t params[] = { n_past };
6613
  ggml_set_op_params(result, params, sizeof(params));
6614
 
6615
- result->op = GGML_OP_DIAG_MASK_ZERO;
6616
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6617
  result->src[0] = a;
6618
 
6619
  return result;
@@ -6656,19 +6297,12 @@ static struct ggml_tensor * ggml_soft_max_impl(
6656
  GGML_ASSERT(mask);
6657
  }
6658
 
6659
- bool is_node = false;
6660
-
6661
- if (a->grad) {
6662
- is_node = true;
6663
- }
6664
-
6665
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6666
 
6667
  float params[] = { scale, max_bias };
6668
  ggml_set_op_params(result, params, sizeof(params));
6669
 
6670
- result->op = GGML_OP_SOFT_MAX;
6671
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6672
  result->src[0] = a;
6673
  result->src[1] = mask;
6674
 
@@ -6703,16 +6337,9 @@ static struct ggml_tensor * ggml_soft_max_back_impl(
6703
  struct ggml_tensor * a,
6704
  struct ggml_tensor * b,
6705
  bool inplace) {
6706
- bool is_node = false;
6707
-
6708
- if (a->grad || b->grad) {
6709
- is_node = true; // TODO : implement backward pass
6710
- }
6711
-
6712
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6713
 
6714
- result->op = GGML_OP_SOFT_MAX_BACK;
6715
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6716
  result->src[0] = a;
6717
  result->src[1] = b;
6718
 
@@ -6761,12 +6388,6 @@ static struct ggml_tensor * ggml_rope_impl(
6761
  GGML_ASSERT(c->ne[0] >= n_dims / 2);
6762
  }
6763
 
6764
- bool is_node = false;
6765
-
6766
- if (a->grad) {
6767
- is_node = true;
6768
- }
6769
-
6770
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6771
 
6772
  int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
@@ -6778,8 +6399,7 @@ static struct ggml_tensor * ggml_rope_impl(
6778
  memcpy(params + 10, &beta_slow, sizeof(float));
6779
  ggml_set_op_params(result, params, sizeof(params));
6780
 
6781
- result->op = GGML_OP_ROPE;
6782
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6783
  result->src[0] = a;
6784
  result->src[1] = b;
6785
  result->src[2] = c;
@@ -6907,13 +6527,6 @@ struct ggml_tensor * ggml_rope_back(
6907
  GGML_ASSERT(b->type == GGML_TYPE_I32);
6908
  GGML_ASSERT(a->ne[2] == b->ne[0]);
6909
 
6910
- bool is_node = false;
6911
-
6912
- if (a->grad) {
6913
- GGML_ASSERT(false && "backwards pass not implemented");
6914
- is_node = false;
6915
- }
6916
-
6917
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
6918
 
6919
  int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
@@ -6925,8 +6538,7 @@ struct ggml_tensor * ggml_rope_back(
6925
  memcpy(params + 10, &beta_slow, sizeof(float));
6926
  ggml_set_op_params(result, params, sizeof(params));
6927
 
6928
- result->op = GGML_OP_ROPE_BACK;
6929
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6930
  result->src[0] = a;
6931
  result->src[1] = b;
6932
  result->src[2] = c;
@@ -6941,21 +6553,13 @@ struct ggml_tensor * ggml_clamp(
6941
  struct ggml_tensor * a,
6942
  float min,
6943
  float max) {
6944
- bool is_node = false;
6945
-
6946
- if (a->grad) {
6947
- GGML_ABORT("fatal error"); // TODO: implement backward
6948
- is_node = true;
6949
- }
6950
-
6951
  // TODO: when implement backward, fix this:
6952
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6953
 
6954
  float params[] = { min, max };
6955
  ggml_set_op_params(result, params, sizeof(params));
6956
 
6957
- result->op = GGML_OP_CLAMP;
6958
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6959
  result->src[0] = a;
6960
 
6961
  return result;
@@ -7017,13 +6621,6 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
7017
  GGML_ASSERT(p0 == 0);
7018
  GGML_ASSERT(d0 == 1);
7019
 
7020
- bool is_node = false;
7021
-
7022
- if (a->grad || b->grad) {
7023
- GGML_ABORT("fatal error"); // TODO: implement backward
7024
- is_node = true;
7025
- }
7026
-
7027
  const int64_t ne[4] = {
7028
  ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
7029
  a->ne[1], b->ne[2], 1,
@@ -7033,8 +6630,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
7033
  int32_t params[] = { s0, p0, d0 };
7034
  ggml_set_op_params(result, params, sizeof(params));
7035
 
7036
- result->op = GGML_OP_CONV_TRANSPOSE_1D;
7037
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7038
  result->src[0] = a;
7039
  result->src[1] = b;
7040
 
@@ -7042,17 +6638,17 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
7042
  }
7043
 
7044
  // ggml_conv_depthwise
7045
- struct ggml_tensor * ggml_conv_depthwise_2d(
7046
- struct ggml_context * ctx,
7047
- struct ggml_tensor * a,
7048
- struct ggml_tensor * b,
7049
- int s0,
7050
- int s1,
7051
- int p0,
7052
- int p1,
7053
- int d0,
7054
- int d1) {
7055
 
 
 
 
 
 
 
 
 
 
 
7056
  struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
7057
  struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
7058
  ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
@@ -7072,29 +6668,23 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
7072
  // b: [N, IC, IH, IW]
7073
  // result: [N, OH, OW, IC*KH*KW]
7074
  struct ggml_tensor * ggml_im2col(
7075
- struct ggml_context * ctx,
7076
- struct ggml_tensor * a,
7077
- struct ggml_tensor * b,
7078
- int s0,
7079
- int s1,
7080
- int p0,
7081
- int p1,
7082
- int d0,
7083
- int d1,
7084
- bool is_2D,
7085
- enum ggml_type dst_type) {
7086
-
7087
  if(is_2D) {
7088
  GGML_ASSERT(a->ne[2] == b->ne[2]);
7089
  } else {
7090
  GGML_ASSERT(a->ne[1] == b->ne[1]);
7091
  GGML_ASSERT(b->ne[3] == 1);
7092
  }
7093
- bool is_node = false;
7094
-
7095
- if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
7096
- is_node = true;
7097
- }
7098
 
7099
  const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
7100
  const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
@@ -7113,8 +6703,7 @@ struct ggml_tensor * ggml_im2col(
7113
  int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
7114
  ggml_set_op_params(result, params, sizeof(params));
7115
 
7116
- result->op = GGML_OP_IM2COL;
7117
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7118
  result->src[0] = a;
7119
  result->src[1] = b;
7120
 
@@ -7122,30 +6711,22 @@ struct ggml_tensor * ggml_im2col(
7122
  }
7123
 
7124
  struct ggml_tensor * ggml_im2col_back(
7125
- struct ggml_context * ctx,
7126
- struct ggml_tensor * a,
7127
- struct ggml_tensor * b,
7128
- int64_t * ne,
7129
- int s0,
7130
- int s1,
7131
- int p0,
7132
- int p1,
7133
- int d0,
7134
- int d1,
7135
- bool is_2D) {
7136
-
7137
- bool is_node = false;
7138
-
7139
- if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
7140
- is_node = true;
7141
- }
7142
-
7143
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7144
  int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
7145
  ggml_set_op_params(result, params, sizeof(params));
7146
 
7147
- result->op = GGML_OP_IM2COL_BACK;
7148
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7149
  result->src[0] = a;
7150
  result->src[1] = b;
7151
 
@@ -7159,12 +6740,12 @@ struct ggml_tensor * ggml_conv_2d(
7159
  struct ggml_context * ctx,
7160
  struct ggml_tensor * a,
7161
  struct ggml_tensor * b,
7162
- int s0,
7163
- int s1,
7164
- int p0,
7165
- int p1,
7166
- int d0,
7167
- int d1) {
7168
  struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
7169
 
7170
  struct ggml_tensor * result =
@@ -7180,6 +6761,7 @@ struct ggml_tensor * ggml_conv_2d(
7180
  }
7181
 
7182
  // ggml_conv_2d_sk_p0
 
7183
  struct ggml_tensor * ggml_conv_2d_sk_p0(
7184
  struct ggml_context * ctx,
7185
  struct ggml_tensor * a,
@@ -7209,13 +6791,6 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
7209
  int stride) {
7210
  GGML_ASSERT(a->ne[3] == b->ne[2]);
7211
 
7212
- bool is_node = false;
7213
-
7214
- if (a->grad || b->grad) {
7215
- GGML_ABORT("fatal error"); // TODO: implement backward
7216
- is_node = true;
7217
- }
7218
-
7219
  const int64_t ne[4] = {
7220
  ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
7221
  ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
@@ -7226,8 +6801,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
7226
 
7227
  ggml_set_op_params_i32(result, 0, stride);
7228
 
7229
- result->op = GGML_OP_CONV_TRANSPOSE_2D;
7230
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7231
  result->src[0] = a;
7232
  result->src[1] = b;
7233
 
@@ -7247,16 +6821,8 @@ struct ggml_tensor * ggml_pool_1d(
7247
  struct ggml_tensor * a,
7248
  enum ggml_op_pool op,
7249
  int k0,
7250
- int s0,
7251
- int p0) {
7252
-
7253
- bool is_node = false;
7254
-
7255
- if (a->grad) {
7256
- GGML_ABORT("fatal error"); // TODO: implement backward
7257
- is_node = true;
7258
- }
7259
-
7260
  const int64_t ne[4] = {
7261
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
7262
  a->ne[1],
@@ -7268,8 +6834,7 @@ struct ggml_tensor * ggml_pool_1d(
7268
  int32_t params[] = { op, k0, s0, p0 };
7269
  ggml_set_op_params(result, params, sizeof(params));
7270
 
7271
- result->op = GGML_OP_POOL_1D;
7272
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7273
  result->src[0] = a;
7274
 
7275
  return result;
@@ -7287,13 +6852,6 @@ struct ggml_tensor * ggml_pool_2d(
7287
  int s1,
7288
  float p0,
7289
  float p1) {
7290
-
7291
- bool is_node = false;
7292
-
7293
- if (a->grad) {
7294
- is_node = true;
7295
- }
7296
-
7297
  struct ggml_tensor * result;
7298
  const int64_t ne[4] = {
7299
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
@@ -7306,9 +6864,9 @@ struct ggml_tensor * ggml_pool_2d(
7306
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
7307
  ggml_set_op_params(result, params, sizeof(params));
7308
 
7309
- result->op = GGML_OP_POOL_2D;
7310
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7311
  result->src[0] = a;
 
7312
  return result;
7313
  }
7314
 
@@ -7323,100 +6881,74 @@ struct ggml_tensor * ggml_pool_2d_back(
7323
  int s1,
7324
  float p0,
7325
  float p1) {
7326
-
7327
- bool is_node = false;
7328
-
7329
- if (a->grad) {
7330
- is_node = true;
7331
- }
7332
-
7333
  struct ggml_tensor * result;
7334
  result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
7335
 
7336
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
7337
  ggml_set_op_params(result, params, sizeof(params));
7338
 
7339
- result->op = GGML_OP_POOL_2D_BACK;
7340
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7341
  result->src[0] = a;
7342
  result->src[1] = af;
 
7343
  return result;
7344
  }
7345
 
7346
  // ggml_upscale
7347
 
7348
  static struct ggml_tensor * ggml_upscale_impl(
7349
- struct ggml_context * ctx,
7350
- struct ggml_tensor * a,
7351
- int ne0,
7352
- int ne1,
7353
- int ne2,
7354
- int ne3) {
7355
- bool is_node = false;
7356
-
7357
- if (a->grad) {
7358
- GGML_ABORT("fatal error"); // TODO: implement backward
7359
- is_node = true;
7360
- }
7361
-
7362
  GGML_ASSERT(a->ne[0] <= ne0);
7363
  GGML_ASSERT(a->ne[1] <= ne1);
7364
  GGML_ASSERT(a->ne[2] <= ne2);
7365
  GGML_ASSERT(a->ne[3] <= ne3);
7366
 
7367
- struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
7368
- ne0,
7369
- ne1,
7370
- ne2,
7371
- ne3
7372
- );
7373
-
7374
- result->op = GGML_OP_UPSCALE;
7375
 
7376
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7377
  result->src[0] = a;
7378
 
7379
  return result;
7380
  }
7381
 
7382
  struct ggml_tensor * ggml_upscale(
7383
- struct ggml_context * ctx,
7384
- struct ggml_tensor * a,
7385
- int scale_factor) {
7386
  return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
7387
  }
7388
 
7389
  struct ggml_tensor * ggml_upscale_ext(
7390
- struct ggml_context * ctx,
7391
- struct ggml_tensor * a,
7392
- int ne0,
7393
- int ne1,
7394
- int ne2,
7395
- int ne3) {
7396
  return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
7397
  }
7398
 
7399
  // ggml_pad
7400
 
7401
  struct ggml_tensor * ggml_pad(
7402
- struct ggml_context * ctx,
7403
- struct ggml_tensor * a,
7404
- int p0, int p1, int p2, int p3) {
7405
- bool is_node = false;
7406
-
7407
- if (a->grad) {
7408
- GGML_ABORT("fatal error"); // TODO: implement backward
7409
- is_node = true;
7410
- }
7411
-
7412
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
7413
  a->ne[0] + p0,
7414
  a->ne[1] + p1,
7415
  a->ne[2] + p2,
7416
  a->ne[3] + p3);
7417
 
7418
- result->op = GGML_OP_PAD;
7419
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7420
  result->src[0] = a;
7421
 
7422
  return result;
@@ -7425,39 +6957,32 @@ struct ggml_tensor * ggml_pad(
7425
  // ggml_arange
7426
 
7427
  struct ggml_tensor * ggml_arange(
7428
- struct ggml_context * ctx,
7429
- float start,
7430
- float stop,
7431
- float step) {
7432
-
7433
  GGML_ASSERT(stop > start);
7434
 
7435
  const int64_t steps = (int64_t) ceilf((stop - start) / step);
7436
 
7437
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
7438
 
7439
- result->op = GGML_OP_ARANGE;
7440
  ggml_set_op_params_f32(result, 0, start);
7441
  ggml_set_op_params_f32(result, 1, stop);
7442
  ggml_set_op_params_f32(result, 2, step);
7443
 
 
 
7444
  return result;
7445
  }
7446
 
7447
  // ggml_timestep_embedding
7448
 
7449
  struct ggml_tensor * ggml_timestep_embedding(
7450
- struct ggml_context * ctx,
7451
- struct ggml_tensor * timesteps,
7452
- int dim,
7453
- int max_period) {
7454
- bool is_node = false;
7455
-
7456
- if (timesteps->grad) {
7457
- GGML_ABORT("fatal error"); // TODO: implement backward
7458
- is_node = true;
7459
- }
7460
-
7461
  int actual_dim = dim;
7462
  if (dim % 2 != 0) {
7463
  actual_dim = dim + 1;
@@ -7465,11 +6990,10 @@ struct ggml_tensor * ggml_timestep_embedding(
7465
 
7466
  struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
7467
 
7468
- result->op = GGML_OP_TIMESTEP_EMBEDDING;
7469
  ggml_set_op_params_i32(result, 0, dim);
7470
  ggml_set_op_params_i32(result, 1, max_period);
7471
 
7472
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7473
  result->src[0] = timesteps;
7474
 
7475
  return result;
@@ -7478,22 +7002,14 @@ struct ggml_tensor * ggml_timestep_embedding(
7478
  // ggml_argsort
7479
 
7480
  struct ggml_tensor * ggml_argsort(
7481
- struct ggml_context * ctx,
7482
- struct ggml_tensor * a,
7483
- enum ggml_sort_order order) {
7484
- bool is_node = false;
7485
-
7486
- if (a->grad) {
7487
- GGML_ABORT("fatal error"); // TODO: not implemented
7488
- is_node = true;
7489
- }
7490
-
7491
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
7492
 
7493
  ggml_set_op_params_i32(result, 0, (int32_t) order);
7494
 
7495
- result->op = GGML_OP_ARGSORT;
7496
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7497
  result->src[0] = a;
7498
 
7499
  return result;
@@ -7546,10 +7062,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
7546
 
7547
  bool is_node = false;
7548
 
7549
- if (q->grad || k->grad || v->grad) {
7550
- is_node = true;
7551
- }
7552
-
7553
  // permute(0, 2, 1, 3)
7554
  int64_t ne[4] = { q->ne[0], q->ne[2], q->ne[1], q->ne[3] };
7555
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
@@ -7676,17 +7188,9 @@ struct ggml_tensor * ggml_ssm_conv(
7676
  GGML_ASSERT(sx->ne[1] == d_inner);
7677
  GGML_ASSERT(n_t >= 0);
7678
 
7679
- bool is_node = false;
7680
-
7681
- if (sx->grad || c->grad) {
7682
- GGML_ABORT("fatal error"); // TODO: implement
7683
- is_node = true;
7684
- }
7685
-
7686
  struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
7687
 
7688
- result->op = GGML_OP_SSM_CONV;
7689
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7690
  result->src[0] = sx;
7691
  result->src[1] = c;
7692
 
@@ -7730,18 +7234,10 @@ struct ggml_tensor * ggml_ssm_scan(
7730
  GGML_ASSERT(B->ne[2] == n_seqs);
7731
  }
7732
 
7733
- bool is_node = false;
7734
-
7735
- if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad) {
7736
- GGML_ABORT("fatal error"); // TODO: implement
7737
- is_node = true;
7738
- }
7739
-
7740
  // concatenated y + ssm_states
7741
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
7742
 
7743
  result->op = GGML_OP_SSM_SCAN;
7744
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7745
  result->src[0] = s;
7746
  result->src[1] = x;
7747
  result->src[2] = dt;
@@ -7761,13 +7257,6 @@ struct ggml_tensor * ggml_win_part(
7761
  GGML_ASSERT(a->ne[3] == 1);
7762
  GGML_ASSERT(a->type == GGML_TYPE_F32);
7763
 
7764
- bool is_node = false;
7765
-
7766
- if (a->grad) {
7767
- GGML_ABORT("fatal error"); // TODO: implement backward
7768
- is_node = true;
7769
- }
7770
-
7771
  // padding
7772
  const int px = (w - a->ne[1]%w)%w;
7773
  const int py = (w - a->ne[2]%w)%w;
@@ -7782,8 +7271,7 @@ struct ggml_tensor * ggml_win_part(
7782
  int32_t params[] = { npx, npy, w };
7783
  ggml_set_op_params(result, params, sizeof(params));
7784
 
7785
- result->op = GGML_OP_WIN_PART;
7786
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7787
  result->src[0] = a;
7788
 
7789
  return result;
@@ -7799,21 +7287,13 @@ struct ggml_tensor * ggml_win_unpart(
7799
  int w) {
7800
  GGML_ASSERT(a->type == GGML_TYPE_F32);
7801
 
7802
- bool is_node = false;
7803
-
7804
- if (a->grad) {
7805
- GGML_ABORT("fatal error"); // TODO: implement backward
7806
- is_node = true;
7807
- }
7808
-
7809
  const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
7810
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7811
 
7812
  int32_t params[] = { w };
7813
  ggml_set_op_params(result, params, sizeof(params));
7814
 
7815
- result->op = GGML_OP_WIN_UNPART;
7816
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7817
  result->src[0] = a;
7818
 
7819
  return result;
@@ -7829,18 +7309,10 @@ struct ggml_tensor * ggml_get_rel_pos(
7829
  GGML_ASSERT(qh == kh);
7830
  GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
7831
 
7832
- bool is_node = false;
7833
-
7834
- if (a->grad) {
7835
- GGML_ABORT("fatal error"); // TODO: implement backward
7836
- is_node = true;
7837
- }
7838
-
7839
  const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
7840
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
7841
 
7842
- result->op = GGML_OP_GET_REL_POS;
7843
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7844
  result->src[0] = a;
7845
 
7846
  return result;
@@ -7864,17 +7336,10 @@ static struct ggml_tensor * ggml_add_rel_pos_impl(
7864
  GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
7865
  GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
7866
 
7867
- bool is_node = false;
7868
-
7869
- if (!inplace && (a->grad || pw->grad || ph->grad)) {
7870
- is_node = true;
7871
- }
7872
-
7873
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7874
  ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
7875
 
7876
- result->op = GGML_OP_ADD_REL_POS;
7877
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7878
  result->src[0] = a;
7879
  result->src[1] = pw;
7880
  result->src[2] = ph;
@@ -7902,12 +7367,12 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
7902
 
7903
  struct ggml_tensor * ggml_rwkv_wkv(
7904
  struct ggml_context * ctx,
7905
- struct ggml_tensor * k,
7906
- struct ggml_tensor * v,
7907
- struct ggml_tensor * r,
7908
- struct ggml_tensor * tf,
7909
- struct ggml_tensor * td,
7910
- struct ggml_tensor * state) {
7911
  GGML_ASSERT(ggml_is_contiguous(k));
7912
  GGML_ASSERT(ggml_is_contiguous(v));
7913
  GGML_ASSERT(ggml_is_contiguous(r));
@@ -7928,19 +7393,11 @@ struct ggml_tensor * ggml_rwkv_wkv(
7928
  GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
7929
  }
7930
 
7931
- bool is_node = false;
7932
-
7933
- if (k->grad || v->grad || r->grad || tf->grad || td->grad || state->grad) {
7934
- GGML_ABORT("fatal error"); // TODO: implement backward
7935
- is_node = true;
7936
- }
7937
-
7938
  // concat output and new_state
7939
  const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
7940
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7941
 
7942
- result->op = GGML_OP_RWKV_WKV;
7943
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7944
  result->src[0] = k;
7945
  result->src[1] = v;
7946
  result->src[2] = r;
@@ -7955,23 +7412,16 @@ struct ggml_tensor * ggml_rwkv_wkv(
7955
 
7956
  static struct ggml_tensor * ggml_unary_impl(
7957
  struct ggml_context * ctx,
7958
- struct ggml_tensor * a,
7959
- enum ggml_unary_op op,
7960
- bool inplace) {
7961
  GGML_ASSERT(ggml_is_contiguous_1(a));
7962
 
7963
- bool is_node = false;
7964
-
7965
- if (!inplace && (a->grad)) {
7966
- is_node = true;
7967
- }
7968
-
7969
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7970
 
7971
  ggml_set_op_params_i32(result, 0, (int32_t) op);
7972
 
7973
- result->op = GGML_OP_UNARY;
7974
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7975
  result->src[0] = a;
7976
 
7977
  return result;
@@ -7980,14 +7430,14 @@ static struct ggml_tensor * ggml_unary_impl(
7980
  struct ggml_tensor * ggml_unary(
7981
  struct ggml_context * ctx,
7982
  struct ggml_tensor * a,
7983
- enum ggml_unary_op op) {
7984
  return ggml_unary_impl(ctx, a, op, false);
7985
  }
7986
 
7987
  struct ggml_tensor * ggml_unary_inplace(
7988
  struct ggml_context * ctx,
7989
  struct ggml_tensor * a,
7990
- enum ggml_unary_op op) {
7991
  return ggml_unary_impl(ctx, a, op, true);
7992
  }
7993
 
@@ -7996,20 +7446,13 @@ struct ggml_tensor * ggml_unary_inplace(
7996
  static struct ggml_tensor * ggml_map_unary_impl_f32(
7997
  struct ggml_context * ctx,
7998
  struct ggml_tensor * a,
7999
- const ggml_unary_op_f32_t fun,
8000
- bool inplace) {
8001
- bool is_node = false;
8002
-
8003
- if (!inplace && a->grad) {
8004
- is_node = true;
8005
- }
8006
-
8007
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8008
 
8009
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
8010
 
8011
- result->op = GGML_OP_MAP_UNARY;
8012
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8013
  result->src[0] = a;
8014
 
8015
  return result;
@@ -8018,14 +7461,14 @@ static struct ggml_tensor * ggml_map_unary_impl_f32(
8018
  struct ggml_tensor * ggml_map_unary_f32(
8019
  struct ggml_context * ctx,
8020
  struct ggml_tensor * a,
8021
- const ggml_unary_op_f32_t fun) {
8022
  return ggml_map_unary_impl_f32(ctx, a, fun, false);
8023
  }
8024
 
8025
  struct ggml_tensor * ggml_map_unary_inplace_f32(
8026
  struct ggml_context * ctx,
8027
  struct ggml_tensor * a,
8028
- const ggml_unary_op_f32_t fun) {
8029
  return ggml_map_unary_impl_f32(ctx, a, fun, true);
8030
  }
8031
 
@@ -8035,22 +7478,15 @@ static struct ggml_tensor * ggml_map_binary_impl_f32(
8035
  struct ggml_context * ctx,
8036
  struct ggml_tensor * a,
8037
  struct ggml_tensor * b,
8038
- const ggml_binary_op_f32_t fun,
8039
- bool inplace) {
8040
  GGML_ASSERT(ggml_are_same_shape(a, b));
8041
 
8042
- bool is_node = false;
8043
-
8044
- if (!inplace && (a->grad || b->grad)) {
8045
- is_node = true;
8046
- }
8047
-
8048
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8049
 
8050
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
8051
 
8052
- result->op = GGML_OP_MAP_BINARY;
8053
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8054
  result->src[0] = a;
8055
  result->src[1] = b;
8056
 
@@ -8061,7 +7497,7 @@ struct ggml_tensor * ggml_map_binary_f32(
8061
  struct ggml_context * ctx,
8062
  struct ggml_tensor * a,
8063
  struct ggml_tensor * b,
8064
- const ggml_binary_op_f32_t fun) {
8065
  return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
8066
  }
8067
 
@@ -8069,7 +7505,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
8069
  struct ggml_context * ctx,
8070
  struct ggml_tensor * a,
8071
  struct ggml_tensor * b,
8072
- const ggml_binary_op_f32_t fun) {
8073
  return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
8074
  }
8075
 
@@ -8079,19 +7515,12 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
8079
  struct ggml_context * ctx,
8080
  struct ggml_tensor * a,
8081
  const ggml_custom1_op_f32_t fun,
8082
- bool inplace) {
8083
- bool is_node = false;
8084
-
8085
- if (!inplace && a->grad) {
8086
- is_node = true;
8087
- }
8088
-
8089
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8090
 
8091
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
8092
 
8093
- result->op = GGML_OP_MAP_CUSTOM1_F32;
8094
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8095
  result->src[0] = a;
8096
 
8097
  return result;
@@ -8118,19 +7547,12 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
8118
  struct ggml_tensor * a,
8119
  struct ggml_tensor * b,
8120
  const ggml_custom2_op_f32_t fun,
8121
- bool inplace) {
8122
- bool is_node = false;
8123
-
8124
- if (!inplace && (a->grad || b->grad)) {
8125
- is_node = true;
8126
- }
8127
-
8128
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8129
 
8130
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
8131
 
8132
- result->op = GGML_OP_MAP_CUSTOM2_F32;
8133
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8134
  result->src[0] = a;
8135
  result->src[1] = b;
8136
 
@@ -8161,19 +7583,12 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
8161
  struct ggml_tensor * b,
8162
  struct ggml_tensor * c,
8163
  const ggml_custom3_op_f32_t fun,
8164
- bool inplace) {
8165
- bool is_node = false;
8166
-
8167
- if (!inplace && (a->grad || b->grad || c->grad)) {
8168
- is_node = true;
8169
- }
8170
-
8171
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8172
 
8173
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
8174
 
8175
- result->op = GGML_OP_MAP_CUSTOM3_F32;
8176
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8177
  result->src[0] = a;
8178
  result->src[1] = b;
8179
  result->src[2] = c;
@@ -8201,26 +7616,20 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
8201
 
8202
  // ggml_map_custom1
8203
  struct ggml_map_custom1_op_params {
8204
- ggml_custom1_op_t fun;
8205
- int n_tasks;
8206
- void * userdata;
8207
  };
8208
 
8209
  static struct ggml_tensor * ggml_map_custom1_impl(
8210
- struct ggml_context * ctx,
8211
- struct ggml_tensor * a,
8212
- const ggml_custom1_op_t fun,
8213
- int n_tasks,
8214
- void * userdata,
8215
- bool inplace) {
8216
  GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
8217
 
8218
- bool is_node = false;
8219
-
8220
- if (!inplace && a->grad) {
8221
- is_node = true;
8222
- }
8223
-
8224
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8225
 
8226
  struct ggml_map_custom1_op_params params = {
@@ -8230,55 +7639,48 @@ static struct ggml_tensor * ggml_map_custom1_impl(
8230
  };
8231
  ggml_set_op_params(result, (const void *) &params, sizeof(params));
8232
 
8233
- result->op = GGML_OP_MAP_CUSTOM1;
8234
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8235
  result->src[0] = a;
8236
 
8237
  return result;
8238
  }
8239
 
8240
  struct ggml_tensor * ggml_map_custom1(
8241
- struct ggml_context * ctx,
8242
- struct ggml_tensor * a,
8243
- const ggml_custom1_op_t fun,
8244
- int n_tasks,
8245
- void * userdata) {
8246
  return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
8247
  }
8248
 
8249
  struct ggml_tensor * ggml_map_custom1_inplace(
8250
- struct ggml_context * ctx,
8251
- struct ggml_tensor * a,
8252
- const ggml_custom1_op_t fun,
8253
- int n_tasks,
8254
- void * userdata) {
8255
  return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
8256
  }
8257
 
8258
  // ggml_map_custom2
8259
 
8260
  struct ggml_map_custom2_op_params {
8261
- ggml_custom2_op_t fun;
8262
- int n_tasks;
8263
- void * userdata;
8264
  };
8265
 
8266
  static struct ggml_tensor * ggml_map_custom2_impl(
8267
- struct ggml_context * ctx,
8268
- struct ggml_tensor * a,
8269
- struct ggml_tensor * b,
8270
- const ggml_custom2_op_t fun,
8271
- int n_tasks,
8272
- void * userdata,
8273
- bool inplace) {
8274
  GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
8275
 
8276
- bool is_node = false;
8277
-
8278
- if (!inplace && (a->grad || b->grad)) {
8279
- is_node = true;
8280
- }
8281
-
8282
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8283
 
8284
  struct ggml_map_custom2_op_params params = {
@@ -8288,8 +7690,7 @@ static struct ggml_tensor * ggml_map_custom2_impl(
8288
  };
8289
  ggml_set_op_params(result, (const void *) &params, sizeof(params));
8290
 
8291
- result->op = GGML_OP_MAP_CUSTOM2;
8292
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8293
  result->src[0] = a;
8294
  result->src[1] = b;
8295
 
@@ -8297,22 +7698,22 @@ static struct ggml_tensor * ggml_map_custom2_impl(
8297
  }
8298
 
8299
  struct ggml_tensor * ggml_map_custom2(
8300
- struct ggml_context * ctx,
8301
- struct ggml_tensor * a,
8302
- struct ggml_tensor * b,
8303
- const ggml_custom2_op_t fun,
8304
- int n_tasks,
8305
- void * userdata) {
8306
  return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
8307
  }
8308
 
8309
  struct ggml_tensor * ggml_map_custom2_inplace(
8310
- struct ggml_context * ctx,
8311
- struct ggml_tensor * a,
8312
- struct ggml_tensor * b,
8313
- const ggml_custom2_op_t fun,
8314
- int n_tasks,
8315
- void * userdata) {
8316
  return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
8317
  }
8318
 
@@ -8325,22 +7726,16 @@ struct ggml_map_custom3_op_params {
8325
  };
8326
 
8327
  static struct ggml_tensor * ggml_map_custom3_impl(
8328
- struct ggml_context * ctx,
8329
- struct ggml_tensor * a,
8330
- struct ggml_tensor * b,
8331
- struct ggml_tensor * c,
8332
- const ggml_custom3_op_t fun,
8333
- int n_tasks,
8334
- void * userdata,
8335
- bool inplace) {
8336
  GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
8337
 
8338
- bool is_node = false;
8339
-
8340
- if (!inplace && (a->grad || b->grad || c->grad)) {
8341
- is_node = true;
8342
- }
8343
-
8344
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
8345
 
8346
  struct ggml_map_custom3_op_params params = {
@@ -8350,8 +7745,7 @@ static struct ggml_tensor * ggml_map_custom3_impl(
8350
  };
8351
  ggml_set_op_params(result, (const void *) &params, sizeof(params));
8352
 
8353
- result->op = GGML_OP_MAP_CUSTOM3;
8354
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8355
  result->src[0] = a;
8356
  result->src[1] = b;
8357
  result->src[2] = c;
@@ -8360,44 +7754,38 @@ static struct ggml_tensor * ggml_map_custom3_impl(
8360
  }
8361
 
8362
  struct ggml_tensor * ggml_map_custom3(
8363
- struct ggml_context * ctx,
8364
- struct ggml_tensor * a,
8365
- struct ggml_tensor * b,
8366
- struct ggml_tensor * c,
8367
- const ggml_custom3_op_t fun,
8368
- int n_tasks,
8369
- void * userdata) {
8370
  return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
8371
  }
8372
 
8373
  struct ggml_tensor * ggml_map_custom3_inplace(
8374
- struct ggml_context * ctx,
8375
- struct ggml_tensor * a,
8376
- struct ggml_tensor * b,
8377
- struct ggml_tensor * c,
8378
- const ggml_custom3_op_t fun,
8379
- int n_tasks,
8380
- void * userdata) {
8381
  return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
8382
  }
8383
 
8384
  // ggml_cross_entropy_loss
8385
 
8386
  struct ggml_tensor * ggml_cross_entropy_loss(
8387
- struct ggml_context * ctx,
8388
- struct ggml_tensor * a,
8389
- struct ggml_tensor * b) {
8390
  GGML_ASSERT(ggml_are_same_shape(a, b));
8391
- bool is_node = false;
8392
-
8393
- if (a->grad || b->grad) {
8394
- is_node = true;
8395
- }
8396
 
8397
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
8398
 
8399
- result->op = GGML_OP_CROSS_ENTROPY_LOSS;
8400
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
8401
  result->src[0] = a;
8402
  result->src[1] = b;
8403
 
@@ -8407,17 +7795,16 @@ struct ggml_tensor * ggml_cross_entropy_loss(
8407
  // ggml_cross_entropy_loss_back
8408
 
8409
  struct ggml_tensor * ggml_cross_entropy_loss_back(
8410
- struct ggml_context * ctx,
8411
- struct ggml_tensor * a,
8412
- struct ggml_tensor * b,
8413
- struct ggml_tensor * c) {
8414
  GGML_ASSERT(ggml_are_same_shape(a, b));
8415
  GGML_ASSERT(ggml_is_scalar(c));
8416
 
8417
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
8418
 
8419
- result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
8420
- result->grad = NULL;
8421
  result->src[0] = a;
8422
  result->src[1] = b;
8423
  result->src[2] = c;
@@ -8435,7 +7822,7 @@ struct ggml_tensor * ggml_opt_step_adamw(
8435
  float beta2,
8436
  float eps,
8437
  float wd) {
8438
- GGML_ASSERT(a->grad);
8439
  GGML_ASSERT(alpha > 0.0f);
8440
  GGML_ASSERT(beta1 >= 0.0f && beta1 <= 1.0f);
8441
  GGML_ASSERT(beta2 >= 0.0f && beta2 <= 1.0f);
@@ -8444,13 +7831,6 @@ struct ggml_tensor * ggml_opt_step_adamw(
8444
 
8445
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
8446
 
8447
- result->op = GGML_OP_OPT_STEP_ADAMW;
8448
- result->grad = NULL;
8449
- result->src[0] = a;
8450
- result->src[1] = a->grad;
8451
- result->src[2] = ggml_dup_tensor(ctx, a->grad);
8452
- result->src[3] = ggml_dup_tensor(ctx, a->grad);
8453
-
8454
  const int64_t iter = 1;
8455
  memcpy(&result->op_params[0], &iter, sizeof(int64_t));
8456
  ggml_set_op_params_f32(result, 2, alpha);
@@ -8459,26 +7839,17 @@ struct ggml_tensor * ggml_opt_step_adamw(
8459
  ggml_set_op_params_f32(result, 5, eps);
8460
  ggml_set_op_params_f32(result, 6, wd);
8461
 
 
 
 
 
 
 
8462
  return result;
8463
  }
8464
 
8465
  ////////////////////////////////////////////////////////////////////////////////
8466
 
8467
- void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) {
8468
- tensor->flags |= GGML_TENSOR_FLAG_PARAM;
8469
-
8470
- GGML_ASSERT(tensor->grad == NULL);
8471
- tensor->grad = ggml_dup_tensor(ctx, tensor);
8472
- ggml_format_name(tensor->grad, "%s (grad)", tensor->name);
8473
- }
8474
-
8475
- void ggml_set_loss(struct ggml_tensor * tensor) {
8476
- GGML_ASSERT(ggml_is_scalar(tensor));
8477
- GGML_ASSERT(tensor->type == GGML_TYPE_F32);
8478
- GGML_ASSERT(tensor->grad);
8479
- tensor->flags |= GGML_TENSOR_FLAG_LOSS;
8480
- }
8481
-
8482
  // ggml_compute_forward_dup
8483
 
8484
  static void ggml_compute_forward_dup_same_cont(
@@ -18198,7 +17569,7 @@ void ggml_build_backward_gradient_checkpointing(
18198
  struct ggml_tensor * * checkpoints,
18199
  int n_checkpoints) {
18200
  ggml_graph_cpy(gf, gb_tmp);
18201
- ggml_build_backward_expand(ctx, gf, gb_tmp, false, true);
18202
 
18203
  if (n_checkpoints <= 0) {
18204
  ggml_graph_cpy(gb_tmp, gb);
@@ -18850,7 +18221,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
18850
  ggml_soft_max_back(ctx, tensor->grad, tensor),
18851
  zero_table, acc_table);
18852
  }
18853
-
18854
  } break;
18855
  case GGML_OP_SOFT_MAX_BACK:
18856
  {
@@ -18891,6 +18262,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
18891
  beta_slow),
18892
  zero_table, acc_table);
18893
  }
 
18894
  } break;
18895
  case GGML_OP_ROPE_BACK:
18896
  {
@@ -19012,6 +18384,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
19012
  }
19013
  case GGML_OP_FLASH_ATTN_EXT:
19014
  {
 
19015
  struct ggml_tensor * flash_grad = NULL;
19016
  if (src0->grad || src1->grad || tensor->src[2]->grad) {
19017
  int32_t t = ggml_get_op_params_i32(tensor, 0);
@@ -19186,6 +18559,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
19186
  tensor->grad),
19187
  zero_table, acc_table);
19188
  }
 
19189
  } break;
19190
  case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
19191
  {
@@ -19236,7 +18610,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
19236
  }
19237
  }
19238
 
19239
- if (node->op == GGML_OP_NONE && node->grad == NULL) {
19240
  // reached a leaf node, not part of the gradient graph (e.g. a constant)
19241
  GGML_ASSERT(cgraph->n_leafs < cgraph->size);
19242
 
@@ -19254,9 +18628,6 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
19254
  }
19255
 
19256
  cgraph->nodes[cgraph->n_nodes] = node;
19257
- if (cgraph->grads) {
19258
- cgraph->grads[cgraph->n_nodes] = node->grad;
19259
- }
19260
  cgraph->n_nodes++;
19261
  }
19262
  }
@@ -19284,20 +18655,58 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
19284
  ggml_build_forward_impl(cgraph, tensor, true);
19285
  }
19286
 
19287
- void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate, bool keep) {
19288
  GGML_ASSERT(gf->n_nodes > 0);
19289
  GGML_ASSERT(gf->grads);
19290
 
19291
- // if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
19292
- if (keep) {
19293
- for (int i = 0; i < gf->n_nodes; i++) {
19294
- struct ggml_tensor * node = gf->nodes[i];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19295
 
19296
- if (node->grad) {
19297
- node->grad = ggml_dup_tensor(ctx, node);
19298
- gf->grads[i] = node->grad;
 
 
 
19299
  }
 
 
 
 
 
 
19300
  }
 
 
 
 
 
 
 
19301
  }
19302
 
19303
  // keep tables of original gradients for replacement/accumulation logic
@@ -22162,8 +21571,6 @@ enum ggml_opt_result ggml_opt(
22162
  struct ggml_context * ctx,
22163
  struct ggml_opt_params params,
22164
  struct ggml_tensor * f) {
22165
- GGML_ASSERT(f->grad && "ggml_set_param called for at least one parent tensor.");
22166
-
22167
  bool free_ctx = false;
22168
  if (ctx == NULL) {
22169
  struct ggml_init_params params_ctx = {
@@ -22204,7 +21611,7 @@ enum ggml_opt_result ggml_opt_resume(
22204
  ggml_build_forward_expand(gf, f);
22205
 
22206
  struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
22207
- ggml_build_backward_expand(ctx, gf, gb, false, true);
22208
 
22209
  return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
22210
  }
@@ -22257,6 +21664,17 @@ void ggml_set_output(struct ggml_tensor * tensor) {
22257
  tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
22258
  }
22259
 
 
 
 
 
 
 
 
 
 
 
 
22260
  ////////////////////////////////////////////////////////////////////////////////
22261
 
22262
  void ggml_quantize_init(enum ggml_type type) {
 
4725
 
4726
  static struct ggml_tensor * ggml_dup_impl(
4727
  struct ggml_context * ctx,
4728
+ struct ggml_tensor * a,
4729
+ bool inplace) {
 
 
 
 
 
 
4730
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4731
 
4732
+ result->op = GGML_OP_DUP;
 
4733
  result->src[0] = a;
4734
 
4735
  return result;
 
4737
 
4738
  struct ggml_tensor * ggml_dup(
4739
  struct ggml_context * ctx,
4740
+ struct ggml_tensor * a) {
4741
  return ggml_dup_impl(ctx, a, false);
4742
  }
4743
 
4744
  struct ggml_tensor * ggml_dup_inplace(
4745
  struct ggml_context * ctx,
4746
+ struct ggml_tensor * a) {
4747
  return ggml_dup_impl(ctx, a, true);
4748
  }
4749
 
 
4751
 
4752
  static struct ggml_tensor * ggml_add_impl(
4753
  struct ggml_context * ctx,
4754
+ struct ggml_tensor * a,
4755
+ struct ggml_tensor * b,
4756
+ bool inplace) {
4757
  GGML_ASSERT(ggml_can_repeat(b, a));
4758
 
 
 
 
 
 
 
4759
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4760
 
4761
+ result->op = GGML_OP_ADD;
 
4762
  result->src[0] = a;
4763
  result->src[1] = b;
4764
 
 
4767
 
4768
  struct ggml_tensor * ggml_add(
4769
  struct ggml_context * ctx,
4770
+ struct ggml_tensor * a,
4771
+ struct ggml_tensor * b) {
4772
  return ggml_add_impl(ctx, a, b, false);
4773
  }
4774
 
4775
  struct ggml_tensor * ggml_add_inplace(
4776
  struct ggml_context * ctx,
4777
+ struct ggml_tensor * a,
4778
+ struct ggml_tensor * b) {
4779
  return ggml_add_impl(ctx, a, b, true);
4780
  }
4781
 
 
4783
 
4784
  static struct ggml_tensor * ggml_add_cast_impl(
4785
  struct ggml_context * ctx,
4786
+ struct ggml_tensor * a,
4787
+ struct ggml_tensor * b,
4788
+ enum ggml_type type) {
4789
  // TODO: support less-strict constraint
4790
  // GGML_ASSERT(ggml_can_repeat(b, a));
4791
  GGML_ASSERT(ggml_can_repeat_rows(b, a));
 
4795
  a->type == GGML_TYPE_F16 ||
4796
  a->type == GGML_TYPE_BF16);
4797
 
 
 
 
 
 
 
 
 
4798
  struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
4799
 
4800
+ result->op = GGML_OP_ADD;
 
4801
  result->src[0] = a;
4802
  result->src[1] = b;
4803
 
 
4806
 
4807
  struct ggml_tensor * ggml_add_cast(
4808
  struct ggml_context * ctx,
4809
+ struct ggml_tensor * a,
4810
+ struct ggml_tensor * b,
4811
+ enum ggml_type type) {
4812
  return ggml_add_cast_impl(ctx, a, b, type);
4813
  }
4814
 
 
4816
 
4817
  static struct ggml_tensor * ggml_add1_impl(
4818
  struct ggml_context * ctx,
4819
+ struct ggml_tensor * a,
4820
+ struct ggml_tensor * b,
4821
+ bool inplace) {
4822
  GGML_ASSERT(ggml_is_scalar(b));
4823
  GGML_ASSERT(ggml_is_padded_1d(a));
4824
 
 
 
 
 
 
 
4825
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4826
 
4827
+ result->op = GGML_OP_ADD1;
 
4828
  result->src[0] = a;
4829
  result->src[1] = b;
4830
 
 
4833
 
4834
  struct ggml_tensor * ggml_add1(
4835
  struct ggml_context * ctx,
4836
+ struct ggml_tensor * a,
4837
+ struct ggml_tensor * b) {
4838
  return ggml_add1_impl(ctx, a, b, false);
4839
  }
4840
 
4841
  struct ggml_tensor * ggml_add1_inplace(
4842
  struct ggml_context * ctx,
4843
+ struct ggml_tensor * a,
4844
+ struct ggml_tensor * b) {
4845
  return ggml_add1_impl(ctx, a, b, true);
4846
  }
4847
 
 
4849
 
4850
  static struct ggml_tensor * ggml_acc_impl(
4851
  struct ggml_context * ctx,
4852
+ struct ggml_tensor * a,
4853
+ struct ggml_tensor * b,
4854
+ size_t nb1,
4855
+ size_t nb2,
4856
+ size_t nb3,
4857
+ size_t offset,
4858
+ bool inplace) {
4859
  GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
4860
  GGML_ASSERT(ggml_is_contiguous(a));
4861
  GGML_ASSERT(a->type == GGML_TYPE_F32);
4862
  GGML_ASSERT(b->type == GGML_TYPE_F32);
4863
 
 
 
 
 
 
 
4864
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4865
 
4866
  int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
4867
  ggml_set_op_params(result, params, sizeof(params));
4868
 
4869
+ result->op = GGML_OP_ACC;
 
4870
  result->src[0] = a;
4871
  result->src[1] = b;
4872
 
 
4875
 
4876
  struct ggml_tensor * ggml_acc(
4877
  struct ggml_context * ctx,
4878
+ struct ggml_tensor * a,
4879
+ struct ggml_tensor * b,
4880
+ size_t nb1,
4881
+ size_t nb2,
4882
+ size_t nb3,
4883
+ size_t offset) {
4884
  return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
4885
  }
4886
 
4887
  struct ggml_tensor * ggml_acc_inplace(
4888
  struct ggml_context * ctx,
4889
+ struct ggml_tensor * a,
4890
+ struct ggml_tensor * b,
4891
+ size_t nb1,
4892
+ size_t nb2,
4893
+ size_t nb3,
4894
+ size_t offset) {
4895
  return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
4896
  }
4897
 
 
4899
 
4900
  static struct ggml_tensor * ggml_sub_impl(
4901
  struct ggml_context * ctx,
4902
+ struct ggml_tensor * a,
4903
+ struct ggml_tensor * b,
4904
+ bool inplace) {
4905
  GGML_ASSERT(ggml_can_repeat(b, a));
4906
 
 
 
 
 
 
 
 
 
4907
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4908
 
4909
+ result->op = GGML_OP_SUB;
 
4910
  result->src[0] = a;
4911
  result->src[1] = b;
4912
 
 
4915
 
4916
  struct ggml_tensor * ggml_sub(
4917
  struct ggml_context * ctx,
4918
+ struct ggml_tensor * a,
4919
+ struct ggml_tensor * b) {
4920
  return ggml_sub_impl(ctx, a, b, false);
4921
  }
4922
 
4923
  struct ggml_tensor * ggml_sub_inplace(
4924
  struct ggml_context * ctx,
4925
+ struct ggml_tensor * a,
4926
+ struct ggml_tensor * b) {
4927
  return ggml_sub_impl(ctx, a, b, true);
4928
  }
4929
 
 
4931
 
4932
  static struct ggml_tensor * ggml_mul_impl(
4933
  struct ggml_context * ctx,
4934
+ struct ggml_tensor * a,
4935
+ struct ggml_tensor * b,
4936
+ bool inplace) {
4937
  GGML_ASSERT(ggml_can_repeat(b, a));
4938
 
 
 
 
 
 
 
 
 
 
 
 
 
4939
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4940
 
4941
+ result->op = GGML_OP_MUL;
 
4942
  result->src[0] = a;
4943
  result->src[1] = b;
4944
 
 
4963
 
4964
  static struct ggml_tensor * ggml_div_impl(
4965
  struct ggml_context * ctx,
4966
+ struct ggml_tensor * a,
4967
+ struct ggml_tensor * b,
4968
+ bool inplace) {
4969
  GGML_ASSERT(ggml_can_repeat(b, a));
4970
 
 
 
 
 
 
 
 
 
 
 
4971
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4972
 
4973
+ result->op = GGML_OP_DIV;
 
4974
  result->src[0] = a;
4975
  result->src[1] = b;
4976
 
 
4995
 
4996
  static struct ggml_tensor * ggml_sqr_impl(
4997
  struct ggml_context * ctx,
4998
+ struct ggml_tensor * a,
4999
+ bool inplace) {
 
 
 
 
 
 
5000
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5001
 
5002
+ result->op = GGML_OP_SQR;
 
5003
  result->src[0] = a;
5004
 
5005
  return result;
 
5021
 
5022
  static struct ggml_tensor * ggml_sqrt_impl(
5023
  struct ggml_context * ctx,
5024
+ struct ggml_tensor * a,
5025
+ bool inplace) {
 
 
 
 
 
 
5026
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5027
 
5028
+ result->op = GGML_OP_SQRT;
 
5029
  result->src[0] = a;
5030
 
5031
  return result;
 
5048
  static struct ggml_tensor * ggml_log_impl(
5049
  struct ggml_context * ctx,
5050
  struct ggml_tensor * a,
5051
+ bool inplace) {
 
 
 
 
 
 
5052
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5053
 
5054
+ result->op = GGML_OP_LOG;
 
5055
  result->src[0] = a;
5056
 
5057
  return result;
 
5074
  static struct ggml_tensor * ggml_sin_impl(
5075
  struct ggml_context * ctx,
5076
  struct ggml_tensor * a,
5077
+ bool inplace) {
 
 
 
 
 
 
5078
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5079
 
5080
+ result->op = GGML_OP_SIN;
 
5081
  result->src[0] = a;
5082
 
5083
  return result;
 
5100
  static struct ggml_tensor * ggml_cos_impl(
5101
  struct ggml_context * ctx,
5102
  struct ggml_tensor * a,
5103
+ bool inplace) {
 
 
 
 
 
 
5104
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5105
 
5106
+ result->op = GGML_OP_COS;
 
5107
  result->src[0] = a;
5108
 
5109
  return result;
 
5125
 
5126
  struct ggml_tensor * ggml_sum(
5127
  struct ggml_context * ctx,
5128
+ struct ggml_tensor * a) {
 
 
 
 
 
 
5129
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
5130
 
5131
+ result->op = GGML_OP_SUM;
 
5132
  result->src[0] = a;
5133
 
5134
  return result;
 
5138
 
5139
  struct ggml_tensor * ggml_sum_rows(
5140
  struct ggml_context * ctx,
5141
+ struct ggml_tensor * a) {
 
 
 
 
 
 
5142
  int64_t ne[GGML_MAX_DIMS] = { 1 };
5143
  for (int i = 1; i < GGML_MAX_DIMS; ++i) {
5144
  ne[i] = a->ne[i];
 
5146
 
5147
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
5148
 
5149
+ result->op = GGML_OP_SUM_ROWS;
 
5150
  result->src[0] = a;
5151
 
5152
  return result;
 
5156
 
5157
  struct ggml_tensor * ggml_mean(
5158
  struct ggml_context * ctx,
5159
+ struct ggml_tensor * a) {
 
 
 
 
 
 
 
5160
  int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
5161
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5162
 
5163
+ result->op = GGML_OP_MEAN;
 
5164
  result->src[0] = a;
5165
 
5166
  return result;
 
5170
 
5171
  struct ggml_tensor * ggml_argmax(
5172
  struct ggml_context * ctx,
5173
+ struct ggml_tensor * a) {
5174
  GGML_ASSERT(ggml_is_matrix(a));
 
 
 
 
 
 
5175
 
5176
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
5177
 
5178
+ result->op = GGML_OP_ARGMAX;
 
5179
  result->src[0] = a;
5180
 
5181
  return result;
 
5185
 
5186
  struct ggml_tensor * ggml_repeat(
5187
  struct ggml_context * ctx,
5188
+ struct ggml_tensor * a,
5189
+ struct ggml_tensor * b) {
5190
  GGML_ASSERT(ggml_can_repeat(a, b));
5191
 
 
 
 
 
 
 
5192
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
5193
 
5194
+ result->op = GGML_OP_REPEAT;
 
5195
  result->src[0] = a;
5196
 
5197
  return result;
 
5201
 
5202
  struct ggml_tensor * ggml_repeat_back(
5203
  struct ggml_context * ctx,
5204
+ struct ggml_tensor * a,
5205
+ struct ggml_tensor * b) {
5206
  GGML_ASSERT(ggml_can_repeat(b, a));
5207
 
 
 
 
 
 
 
 
 
 
 
5208
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
5209
 
5210
+ result->op = GGML_OP_REPEAT_BACK;
 
5211
  result->src[0] = a;
5212
 
5213
  return result;
 
5217
 
5218
  struct ggml_tensor * ggml_concat(
5219
  struct ggml_context * ctx,
5220
+ struct ggml_tensor * a,
5221
+ struct ggml_tensor * b,
5222
+ int dim) {
5223
  GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
5224
 
5225
  int64_t ne[GGML_MAX_DIMS];
 
5232
  ne[d] = a->ne[d];
5233
  }
5234
 
 
 
 
 
 
 
 
5235
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
5236
 
5237
  ggml_set_op_params_i32(result, 0, dim);
5238
 
5239
+ result->op = GGML_OP_CONCAT;
 
5240
  result->src[0] = a;
5241
  result->src[1] = b;
5242
 
 
5345
 
5346
  struct ggml_tensor * ggml_leaky_relu(
5347
  struct ggml_context * ctx,
5348
+ struct ggml_tensor * a,
5349
+ float negative_slope,
5350
+ bool inplace) {
 
 
 
 
 
5351
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5352
 
5353
  ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
5354
 
5355
+ result->op = GGML_OP_LEAKY_RELU;
 
5356
  result->src[0] = a;
5357
 
5358
  return result;
 
5420
  struct ggml_context * ctx,
5421
  struct ggml_tensor * a,
5422
  struct ggml_tensor * b) {
 
 
 
 
 
 
 
5423
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5424
 
5425
+ result->op = GGML_OP_SILU_BACK;
 
5426
  result->src[0] = a;
5427
  result->src[1] = b;
5428
 
 
5430
  }
5431
 
5432
  // ggml hardswish
5433
+
5434
  struct ggml_tensor * ggml_hardswish(
5435
  struct ggml_context * ctx,
5436
  struct ggml_tensor * a) {
 
5438
  }
5439
 
5440
  // ggml hardsigmoid
5441
+
5442
  struct ggml_tensor * ggml_hardsigmoid(
5443
  struct ggml_context * ctx,
5444
  struct ggml_tensor * a) {
 
5446
  }
5447
 
5448
  // ggml exp
5449
+
5450
  struct ggml_tensor * ggml_exp(
5451
  struct ggml_context * ctx,
5452
  struct ggml_tensor * a) {
 
5464
  static struct ggml_tensor * ggml_norm_impl(
5465
  struct ggml_context * ctx,
5466
  struct ggml_tensor * a,
5467
+ float eps,
5468
+ bool inplace) {
 
 
 
 
 
 
 
5469
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5470
 
5471
  ggml_set_op_params(result, &eps, sizeof(eps));
5472
 
5473
+ result->op = GGML_OP_NORM;
 
5474
  result->src[0] = a;
5475
 
5476
  return result;
 
5479
  struct ggml_tensor * ggml_norm(
5480
  struct ggml_context * ctx,
5481
  struct ggml_tensor * a,
5482
+ float eps) {
5483
  return ggml_norm_impl(ctx, a, eps, false);
5484
  }
5485
 
5486
  struct ggml_tensor * ggml_norm_inplace(
5487
  struct ggml_context * ctx,
5488
  struct ggml_tensor * a,
5489
+ float eps) {
5490
  return ggml_norm_impl(ctx, a, eps, true);
5491
  }
5492
 
 
5495
  static struct ggml_tensor * ggml_rms_norm_impl(
5496
  struct ggml_context * ctx,
5497
  struct ggml_tensor * a,
5498
+ float eps,
5499
+ bool inplace) {
 
 
 
 
 
 
5500
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5501
 
5502
  ggml_set_op_params(result, &eps, sizeof(eps));
5503
 
5504
+ result->op = GGML_OP_RMS_NORM;
 
5505
  result->src[0] = a;
5506
 
5507
  return result;
 
5510
  struct ggml_tensor * ggml_rms_norm(
5511
  struct ggml_context * ctx,
5512
  struct ggml_tensor * a,
5513
+ float eps) {
5514
  return ggml_rms_norm_impl(ctx, a, eps, false);
5515
  }
5516
 
5517
  struct ggml_tensor * ggml_rms_norm_inplace(
5518
  struct ggml_context * ctx,
5519
  struct ggml_tensor * a,
5520
+ float eps) {
5521
  return ggml_rms_norm_impl(ctx, a, eps, true);
5522
  }
5523
 
 
5527
  struct ggml_context * ctx,
5528
  struct ggml_tensor * a,
5529
  struct ggml_tensor * b,
5530
+ float eps) {
 
 
 
 
 
 
 
5531
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5532
 
5533
  ggml_set_op_params(result, &eps, sizeof(eps));
5534
 
5535
+ result->op = GGML_OP_RMS_NORM_BACK;
 
5536
  result->src[0] = a;
5537
  result->src[1] = b;
5538
 
 
5542
  // ggml_group_norm
5543
 
5544
  static struct ggml_tensor * ggml_group_norm_impl(
5545
+ struct ggml_context * ctx,
5546
+ struct ggml_tensor * a,
5547
+ int n_groups,
5548
+ float eps,
5549
+ bool inplace) {
 
 
 
 
 
 
 
5550
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5551
 
5552
  ggml_set_op_params_i32(result, 0, n_groups);
5553
  ggml_set_op_params_f32(result, 1, eps);
5554
 
5555
+ result->op = GGML_OP_GROUP_NORM;
 
5556
  result->src[0] = a;
5557
 
5558
  return result;
5559
  }
5560
 
5561
  struct ggml_tensor * ggml_group_norm(
5562
+ struct ggml_context * ctx,
5563
+ struct ggml_tensor * a,
5564
+ int n_groups,
5565
+ float eps) {
5566
  return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
5567
  }
5568
 
5569
  struct ggml_tensor * ggml_group_norm_inplace(
5570
+ struct ggml_context * ctx,
5571
+ struct ggml_tensor * a,
5572
+ int n_groups,
5573
+ float eps) {
5574
  return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
5575
  }
5576
 
 
5583
  GGML_ASSERT(ggml_can_mul_mat(a, b));
5584
  GGML_ASSERT(!ggml_is_transposed(a));
5585
 
 
 
 
 
 
 
5586
  const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
5587
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5588
 
5589
+ result->op = GGML_OP_MUL_MAT;
 
5590
  result->src[0] = a;
5591
  result->src[1] = b;
5592
 
 
5632
  GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
5633
  GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
5634
 
 
 
 
 
 
 
5635
  const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
5636
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5637
 
5638
+ result->op = GGML_OP_MUL_MAT_ID;
 
5639
  result->src[0] = as;
5640
  result->src[1] = b;
5641
  result->src[2] = ids;
 
5652
  GGML_ASSERT(ggml_can_out_prod(a, b));
5653
  GGML_ASSERT(!ggml_is_transposed(a));
5654
 
 
 
 
 
 
 
5655
  // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
5656
  const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
5657
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5658
 
5659
+ result->op = GGML_OP_OUT_PROD;
 
5660
  result->src[0] = a;
5661
  result->src[1] = b;
5662
 
 
5669
  struct ggml_context * ctx,
5670
  struct ggml_tensor * a,
5671
  float s,
5672
+ bool inplace) {
5673
  GGML_ASSERT(ggml_is_padded_1d(a));
5674
 
 
 
 
 
 
 
5675
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5676
 
5677
  ggml_set_op_params(result, &s, sizeof(s));
5678
 
5679
+ result->op = GGML_OP_SCALE;
 
5680
  result->src[0] = a;
5681
 
5682
  return result;
 
5684
 
5685
  struct ggml_tensor * ggml_scale(
5686
  struct ggml_context * ctx,
5687
+ struct ggml_tensor * a,
5688
+ float s) {
5689
  return ggml_scale_impl(ctx, a, s, false);
5690
  }
5691
 
5692
  struct ggml_tensor * ggml_scale_inplace(
5693
  struct ggml_context * ctx,
5694
+ struct ggml_tensor * a,
5695
+ float s) {
5696
  return ggml_scale_impl(ctx, a, s, true);
5697
  }
5698
 
 
5706
  size_t nb2,
5707
  size_t nb3,
5708
  size_t offset,
5709
+ bool inplace) {
5710
  GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
5711
 
 
 
 
 
 
 
5712
  // make a view of the destination
5713
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5714
 
 
5716
  int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
5717
  ggml_set_op_params(result, params, sizeof(params));
5718
 
5719
+ result->op = GGML_OP_SET;
 
5720
  result->src[0] = a;
5721
  result->src[1] = b;
5722
 
 
5725
 
5726
  struct ggml_tensor * ggml_set(
5727
  struct ggml_context * ctx,
5728
+ struct ggml_tensor * a,
5729
+ struct ggml_tensor * b,
5730
  size_t nb1,
5731
  size_t nb2,
5732
  size_t nb3,
 
5736
 
5737
  struct ggml_tensor * ggml_set_inplace(
5738
  struct ggml_context * ctx,
5739
+ struct ggml_tensor * a,
5740
+ struct ggml_tensor * b,
5741
  size_t nb1,
5742
  size_t nb2,
5743
  size_t nb3,
 
5747
 
5748
  struct ggml_tensor * ggml_set_1d(
5749
  struct ggml_context * ctx,
5750
+ struct ggml_tensor * a,
5751
+ struct ggml_tensor * b,
5752
  size_t offset) {
5753
  return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
5754
  }
5755
 
5756
  struct ggml_tensor * ggml_set_1d_inplace(
5757
  struct ggml_context * ctx,
5758
+ struct ggml_tensor * a,
5759
+ struct ggml_tensor * b,
5760
  size_t offset) {
5761
  return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
5762
  }
5763
 
5764
  struct ggml_tensor * ggml_set_2d(
5765
  struct ggml_context * ctx,
5766
+ struct ggml_tensor * a,
5767
+ struct ggml_tensor * b,
5768
  size_t nb1,
5769
  size_t offset) {
5770
  return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
 
5772
 
5773
  struct ggml_tensor * ggml_set_2d_inplace(
5774
  struct ggml_context * ctx,
5775
+ struct ggml_tensor * a,
5776
+ struct ggml_tensor * b,
5777
  size_t nb1,
5778
  size_t offset) {
5779
  return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
 
5787
  struct ggml_tensor * b) {
5788
  GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
5789
 
 
 
 
 
 
 
 
5790
  // make a view of the destination
5791
  struct ggml_tensor * result = ggml_view_tensor(ctx, b);
5792
  if (strlen(b->name) > 0) {
 
5795
  ggml_format_name(result, "%s (copy)", a->name);
5796
  }
5797
 
5798
+ result->op = GGML_OP_CPY;
 
5799
  result->src[0] = a;
5800
  result->src[1] = b;
5801
 
 
5813
  struct ggml_context * ctx,
5814
  struct ggml_tensor * a,
5815
  enum ggml_type type) {
 
 
5816
  struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
5817
  ggml_format_name(result, "%s (copy)", a->name);
5818
 
5819
+ result->op = GGML_OP_CPY;
 
5820
  result->src[0] = a;
 
5821
 
5822
  return result;
5823
  }
 
5827
  static struct ggml_tensor * ggml_cont_impl(
5828
  struct ggml_context * ctx,
5829
  struct ggml_tensor * a) {
 
 
 
 
 
 
5830
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5831
  ggml_format_name(result, "%s (cont)", a->name);
5832
 
5833
+ result->op = GGML_OP_CONT;
 
5834
  result->src[0] = a;
5835
 
5836
  return result;
 
5876
  int64_t ne3) {
5877
  GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
5878
 
 
 
5879
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
5880
  ggml_format_name(result, "%s (cont)", a->name);
5881
 
5882
+ result->op = GGML_OP_CONT;
 
5883
  result->src[0] = a;
5884
 
5885
  return result;
 
5895
  // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
5896
  GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
5897
 
 
 
 
 
 
 
 
 
 
 
 
5898
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
5899
  ggml_format_name(result, "%s (reshaped)", a->name);
5900
 
5901
+ result->op = GGML_OP_RESHAPE;
 
5902
  result->src[0] = a;
5903
 
5904
  return result;
 
5911
  GGML_ASSERT(ggml_is_contiguous(a));
5912
  GGML_ASSERT(ggml_nelements(a) == ne0);
5913
 
 
 
 
 
 
 
5914
  const int64_t ne[1] = { ne0 };
5915
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
5916
  ggml_format_name(result, "%s (reshaped)", a->name);
5917
 
5918
+ result->op = GGML_OP_RESHAPE;
 
5919
  result->src[0] = a;
5920
 
5921
  return result;
 
5929
  GGML_ASSERT(ggml_is_contiguous(a));
5930
  GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
5931
 
 
 
 
 
 
 
5932
  const int64_t ne[2] = { ne0, ne1 };
5933
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
5934
  ggml_format_name(result, "%s (reshaped)", a->name);
5935
 
5936
+ result->op = GGML_OP_RESHAPE;
 
5937
  result->src[0] = a;
5938
 
5939
  return result;
 
5948
  GGML_ASSERT(ggml_is_contiguous(a));
5949
  GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
5950
 
 
 
 
 
 
 
5951
  const int64_t ne[3] = { ne0, ne1, ne2 };
5952
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
5953
  ggml_format_name(result, "%s (reshaped)", a->name);
5954
 
5955
+ result->op = GGML_OP_RESHAPE;
 
5956
  result->src[0] = a;
5957
 
5958
  return result;
 
5968
  GGML_ASSERT(ggml_is_contiguous(a));
5969
  GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
5970
 
 
 
 
 
 
 
5971
  const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
5972
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
5973
  ggml_format_name(result, "%s (reshaped)", a->name);
5974
 
5975
+ result->op = GGML_OP_RESHAPE;
 
5976
  result->src[0] = a;
5977
 
5978
  return result;
 
5984
  int n_dims,
5985
  const int64_t * ne,
5986
  size_t offset) {
 
 
 
 
 
 
 
5987
  struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
5988
  ggml_format_name(result, "%s (view)", a->name);
5989
 
5990
  ggml_set_op_params(result, &offset, sizeof(offset));
5991
 
5992
+ result->op = GGML_OP_VIEW;
 
5993
  result->src[0] = a;
5994
 
5995
  return result;
 
6002
  struct ggml_tensor * a,
6003
  int64_t ne0,
6004
  size_t offset) {
 
6005
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
6006
 
6007
  return result;
 
6016
  int64_t ne1,
6017
  size_t nb1,
6018
  size_t offset) {
 
6019
  const int64_t ne[2] = { ne0, ne1 };
6020
 
6021
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
 
6038
  size_t nb1,
6039
  size_t nb2,
6040
  size_t offset) {
 
6041
  const int64_t ne[3] = { ne0, ne1, ne2 };
6042
 
6043
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
 
6062
  size_t nb2,
6063
  size_t nb3,
6064
  size_t offset) {
 
6065
  const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
6066
 
6067
  struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
 
6094
  GGML_ASSERT(axis1 != axis3);
6095
  GGML_ASSERT(axis2 != axis3);
6096
 
 
 
 
 
 
 
6097
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6098
  ggml_format_name(result, "%s (permuted)", a->name);
6099
 
 
6120
  result->nb[2] = nb[2];
6121
  result->nb[3] = nb[3];
6122
 
6123
+ result->op = GGML_OP_PERMUTE;
 
6124
  result->src[0] = a;
6125
 
6126
  int32_t params[] = { axis0, axis1, axis2, axis3 };
 
6134
  struct ggml_tensor * ggml_transpose(
6135
  struct ggml_context * ctx,
6136
  struct ggml_tensor * a) {
 
 
 
 
 
 
6137
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6138
  ggml_format_name(result, "%s (transposed)", a->name);
6139
 
 
6143
  result->nb[0] = a->nb[1];
6144
  result->nb[1] = a->nb[0];
6145
 
6146
+ result->op = GGML_OP_TRANSPOSE;
 
6147
  result->src[0] = a;
6148
 
6149
  return result;
 
6159
  GGML_ASSERT(b->ne[3] == 1);
6160
  GGML_ASSERT(b->type == GGML_TYPE_I32);
6161
 
 
 
 
 
 
 
6162
  // TODO: implement non F32 return
6163
  enum ggml_type type = GGML_TYPE_F32;
6164
  if (a->type == GGML_TYPE_I32) {
 
6166
  }
6167
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
6168
 
6169
+ result->op = GGML_OP_GET_ROWS;
 
6170
  result->src[0] = a;
6171
  result->src[1] = b;
6172
 
 
6183
  GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
6184
  GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
6185
 
 
 
 
 
 
 
6186
  // TODO: implement non F32 return
6187
  //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
6188
  struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
6189
 
6190
+ result->op = GGML_OP_GET_ROWS_BACK;
 
6191
  result->src[0] = a;
6192
  result->src[1] = b;
6193
 
 
6200
  struct ggml_context * ctx,
6201
  struct ggml_tensor * a) {
6202
  GGML_ASSERT(a->ne[1] == 1);
 
 
 
 
 
6203
 
6204
  const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
6205
  struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
6206
 
6207
+ result->op = GGML_OP_DIAG;
 
6208
  result->src[0] = a;
6209
 
6210
  return result;
 
6217
  struct ggml_tensor * a,
6218
  int n_past,
6219
  bool inplace) {
 
 
 
 
 
 
6220
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6221
 
6222
  int32_t params[] = { n_past };
6223
  ggml_set_op_params(result, params, sizeof(params));
6224
 
6225
+ result->op = GGML_OP_DIAG_MASK_INF;
 
6226
  result->src[0] = a;
6227
 
6228
  return result;
 
6249
  struct ggml_tensor * a,
6250
  int n_past,
6251
  bool inplace) {
 
 
 
 
 
 
6252
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6253
 
6254
  int32_t params[] = { n_past };
6255
  ggml_set_op_params(result, params, sizeof(params));
6256
 
6257
+ result->op = GGML_OP_DIAG_MASK_ZERO;
 
6258
  result->src[0] = a;
6259
 
6260
  return result;
 
6297
  GGML_ASSERT(mask);
6298
  }
6299
 
 
 
 
 
 
 
6300
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6301
 
6302
  float params[] = { scale, max_bias };
6303
  ggml_set_op_params(result, params, sizeof(params));
6304
 
6305
+ result->op = GGML_OP_SOFT_MAX;
 
6306
  result->src[0] = a;
6307
  result->src[1] = mask;
6308
 
 
6337
  struct ggml_tensor * a,
6338
  struct ggml_tensor * b,
6339
  bool inplace) {
 
 
 
 
 
 
6340
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6341
 
6342
+ result->op = GGML_OP_SOFT_MAX_BACK;
 
6343
  result->src[0] = a;
6344
  result->src[1] = b;
6345
 
 
6388
  GGML_ASSERT(c->ne[0] >= n_dims / 2);
6389
  }
6390
 
 
 
 
 
 
 
6391
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6392
 
6393
  int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
 
6399
  memcpy(params + 10, &beta_slow, sizeof(float));
6400
  ggml_set_op_params(result, params, sizeof(params));
6401
 
6402
+ result->op = GGML_OP_ROPE;
 
6403
  result->src[0] = a;
6404
  result->src[1] = b;
6405
  result->src[2] = c;
 
6527
  GGML_ASSERT(b->type == GGML_TYPE_I32);
6528
  GGML_ASSERT(a->ne[2] == b->ne[0]);
6529
 
 
 
 
 
 
 
 
6530
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
6531
 
6532
  int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
 
6538
  memcpy(params + 10, &beta_slow, sizeof(float));
6539
  ggml_set_op_params(result, params, sizeof(params));
6540
 
6541
+ result->op = GGML_OP_ROPE_BACK;
 
6542
  result->src[0] = a;
6543
  result->src[1] = b;
6544
  result->src[2] = c;
 
6553
  struct ggml_tensor * a,
6554
  float min,
6555
  float max) {
 
 
 
 
 
 
 
6556
  // TODO: when implement backward, fix this:
6557
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6558
 
6559
  float params[] = { min, max };
6560
  ggml_set_op_params(result, params, sizeof(params));
6561
 
6562
+ result->op = GGML_OP_CLAMP;
 
6563
  result->src[0] = a;
6564
 
6565
  return result;
 
6621
  GGML_ASSERT(p0 == 0);
6622
  GGML_ASSERT(d0 == 1);
6623
 
 
 
 
 
 
 
 
6624
  const int64_t ne[4] = {
6625
  ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
6626
  a->ne[1], b->ne[2], 1,
 
6630
  int32_t params[] = { s0, p0, d0 };
6631
  ggml_set_op_params(result, params, sizeof(params));
6632
 
6633
+ result->op = GGML_OP_CONV_TRANSPOSE_1D;
 
6634
  result->src[0] = a;
6635
  result->src[1] = b;
6636
 
 
6638
  }
6639
 
6640
  // ggml_conv_depthwise
 
 
 
 
 
 
 
 
 
 
6641
 
6642
+ struct ggml_tensor * ggml_conv_depthwise_2d(
6643
+ struct ggml_context * ctx,
6644
+ struct ggml_tensor * a,
6645
+ struct ggml_tensor * b,
6646
+ int s0,
6647
+ int s1,
6648
+ int p0,
6649
+ int p1,
6650
+ int d0,
6651
+ int d1) {
6652
  struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
6653
  struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
6654
  ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
 
6668
  // b: [N, IC, IH, IW]
6669
  // result: [N, OH, OW, IC*KH*KW]
6670
  struct ggml_tensor * ggml_im2col(
6671
+ struct ggml_context * ctx,
6672
+ struct ggml_tensor * a,
6673
+ struct ggml_tensor * b,
6674
+ int s0,
6675
+ int s1,
6676
+ int p0,
6677
+ int p1,
6678
+ int d0,
6679
+ int d1,
6680
+ bool is_2D,
6681
+ enum ggml_type dst_type) {
 
6682
  if(is_2D) {
6683
  GGML_ASSERT(a->ne[2] == b->ne[2]);
6684
  } else {
6685
  GGML_ASSERT(a->ne[1] == b->ne[1]);
6686
  GGML_ASSERT(b->ne[3] == 1);
6687
  }
 
 
 
 
 
6688
 
6689
  const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
6690
  const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
 
6703
  int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
6704
  ggml_set_op_params(result, params, sizeof(params));
6705
 
6706
+ result->op = GGML_OP_IM2COL;
 
6707
  result->src[0] = a;
6708
  result->src[1] = b;
6709
 
 
6711
  }
6712
 
6713
  struct ggml_tensor * ggml_im2col_back(
6714
+ struct ggml_context * ctx,
6715
+ struct ggml_tensor * a,
6716
+ struct ggml_tensor * b,
6717
+ int64_t * ne,
6718
+ int s0,
6719
+ int s1,
6720
+ int p0,
6721
+ int p1,
6722
+ int d0,
6723
+ int d1,
6724
+ bool is_2D) {
 
 
 
 
 
 
 
6725
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6726
  int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
6727
  ggml_set_op_params(result, params, sizeof(params));
6728
 
6729
+ result->op = GGML_OP_IM2COL_BACK;
 
6730
  result->src[0] = a;
6731
  result->src[1] = b;
6732
 
 
6740
  struct ggml_context * ctx,
6741
  struct ggml_tensor * a,
6742
  struct ggml_tensor * b,
6743
+ int s0,
6744
+ int s1,
6745
+ int p0,
6746
+ int p1,
6747
+ int d0,
6748
+ int d1) {
6749
  struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
6750
 
6751
  struct ggml_tensor * result =
 
6761
  }
6762
 
6763
  // ggml_conv_2d_sk_p0
6764
+
6765
  struct ggml_tensor * ggml_conv_2d_sk_p0(
6766
  struct ggml_context * ctx,
6767
  struct ggml_tensor * a,
 
6791
  int stride) {
6792
  GGML_ASSERT(a->ne[3] == b->ne[2]);
6793
 
 
 
 
 
 
 
 
6794
  const int64_t ne[4] = {
6795
  ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
6796
  ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
 
6801
 
6802
  ggml_set_op_params_i32(result, 0, stride);
6803
 
6804
+ result->op = GGML_OP_CONV_TRANSPOSE_2D;
 
6805
  result->src[0] = a;
6806
  result->src[1] = b;
6807
 
 
6821
  struct ggml_tensor * a,
6822
  enum ggml_op_pool op,
6823
  int k0,
6824
+ int s0,
6825
+ int p0) {
 
 
 
 
 
 
 
 
6826
  const int64_t ne[4] = {
6827
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
6828
  a->ne[1],
 
6834
  int32_t params[] = { op, k0, s0, p0 };
6835
  ggml_set_op_params(result, params, sizeof(params));
6836
 
6837
+ result->op = GGML_OP_POOL_1D;
 
6838
  result->src[0] = a;
6839
 
6840
  return result;
 
6852
  int s1,
6853
  float p0,
6854
  float p1) {
 
 
 
 
 
 
 
6855
  struct ggml_tensor * result;
6856
  const int64_t ne[4] = {
6857
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
 
6864
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
6865
  ggml_set_op_params(result, params, sizeof(params));
6866
 
6867
+ result->op = GGML_OP_POOL_2D;
 
6868
  result->src[0] = a;
6869
+
6870
  return result;
6871
  }
6872
 
 
6881
  int s1,
6882
  float p0,
6883
  float p1) {
 
 
 
 
 
 
 
6884
  struct ggml_tensor * result;
6885
  result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
6886
 
6887
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
6888
  ggml_set_op_params(result, params, sizeof(params));
6889
 
6890
+ result->op = GGML_OP_POOL_2D_BACK;
 
6891
  result->src[0] = a;
6892
  result->src[1] = af;
6893
+
6894
  return result;
6895
  }
6896
 
6897
  // ggml_upscale
6898
 
6899
  static struct ggml_tensor * ggml_upscale_impl(
6900
+ struct ggml_context * ctx,
6901
+ struct ggml_tensor * a,
6902
+ int ne0,
6903
+ int ne1,
6904
+ int ne2,
6905
+ int ne3) {
 
 
 
 
 
 
 
6906
  GGML_ASSERT(a->ne[0] <= ne0);
6907
  GGML_ASSERT(a->ne[1] <= ne1);
6908
  GGML_ASSERT(a->ne[2] <= ne2);
6909
  GGML_ASSERT(a->ne[3] <= ne3);
6910
 
6911
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
 
 
 
 
 
 
 
6912
 
6913
+ result->op = GGML_OP_UPSCALE;
6914
  result->src[0] = a;
6915
 
6916
  return result;
6917
  }
6918
 
6919
  struct ggml_tensor * ggml_upscale(
6920
+ struct ggml_context * ctx,
6921
+ struct ggml_tensor * a,
6922
+ int scale_factor) {
6923
  return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
6924
  }
6925
 
6926
  struct ggml_tensor * ggml_upscale_ext(
6927
+ struct ggml_context * ctx,
6928
+ struct ggml_tensor * a,
6929
+ int ne0,
6930
+ int ne1,
6931
+ int ne2,
6932
+ int ne3) {
6933
  return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
6934
  }
6935
 
6936
  // ggml_pad
6937
 
6938
  struct ggml_tensor * ggml_pad(
6939
+ struct ggml_context * ctx,
6940
+ struct ggml_tensor * a,
6941
+ int p0,
6942
+ int p1,
6943
+ int p2,
6944
+ int p3) {
 
 
 
 
6945
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
6946
  a->ne[0] + p0,
6947
  a->ne[1] + p1,
6948
  a->ne[2] + p2,
6949
  a->ne[3] + p3);
6950
 
6951
+ result->op = GGML_OP_PAD;
 
6952
  result->src[0] = a;
6953
 
6954
  return result;
 
6957
  // ggml_arange
6958
 
6959
  struct ggml_tensor * ggml_arange(
6960
+ struct ggml_context * ctx,
6961
+ float start,
6962
+ float stop,
6963
+ float step) {
 
6964
  GGML_ASSERT(stop > start);
6965
 
6966
  const int64_t steps = (int64_t) ceilf((stop - start) / step);
6967
 
6968
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
6969
 
 
6970
  ggml_set_op_params_f32(result, 0, start);
6971
  ggml_set_op_params_f32(result, 1, stop);
6972
  ggml_set_op_params_f32(result, 2, step);
6973
 
6974
+ result->op = GGML_OP_ARANGE;
6975
+
6976
  return result;
6977
  }
6978
 
6979
  // ggml_timestep_embedding
6980
 
6981
  struct ggml_tensor * ggml_timestep_embedding(
6982
+ struct ggml_context * ctx,
6983
+ struct ggml_tensor * timesteps,
6984
+ int dim,
6985
+ int max_period) {
 
 
 
 
 
 
 
6986
  int actual_dim = dim;
6987
  if (dim % 2 != 0) {
6988
  actual_dim = dim + 1;
 
6990
 
6991
  struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
6992
 
 
6993
  ggml_set_op_params_i32(result, 0, dim);
6994
  ggml_set_op_params_i32(result, 1, max_period);
6995
 
6996
+ result->op = GGML_OP_TIMESTEP_EMBEDDING;
6997
  result->src[0] = timesteps;
6998
 
6999
  return result;
 
7002
  // ggml_argsort
7003
 
7004
  struct ggml_tensor * ggml_argsort(
7005
+ struct ggml_context * ctx,
7006
+ struct ggml_tensor * a,
7007
+ enum ggml_sort_order order) {
 
 
 
 
 
 
 
7008
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
7009
 
7010
  ggml_set_op_params_i32(result, 0, (int32_t) order);
7011
 
7012
+ result->op = GGML_OP_ARGSORT;
 
7013
  result->src[0] = a;
7014
 
7015
  return result;
 
7062
 
7063
  bool is_node = false;
7064
 
 
 
 
 
7065
  // permute(0, 2, 1, 3)
7066
  int64_t ne[4] = { q->ne[0], q->ne[2], q->ne[1], q->ne[3] };
7067
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 
7188
  GGML_ASSERT(sx->ne[1] == d_inner);
7189
  GGML_ASSERT(n_t >= 0);
7190
 
 
 
 
 
 
 
 
7191
  struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
7192
 
7193
+ result->op = GGML_OP_SSM_CONV;
 
7194
  result->src[0] = sx;
7195
  result->src[1] = c;
7196
 
 
7234
  GGML_ASSERT(B->ne[2] == n_seqs);
7235
  }
7236
 
 
 
 
 
 
 
 
7237
  // concatenated y + ssm_states
7238
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
7239
 
7240
  result->op = GGML_OP_SSM_SCAN;
 
7241
  result->src[0] = s;
7242
  result->src[1] = x;
7243
  result->src[2] = dt;
 
7257
  GGML_ASSERT(a->ne[3] == 1);
7258
  GGML_ASSERT(a->type == GGML_TYPE_F32);
7259
 
 
 
 
 
 
 
 
7260
  // padding
7261
  const int px = (w - a->ne[1]%w)%w;
7262
  const int py = (w - a->ne[2]%w)%w;
 
7271
  int32_t params[] = { npx, npy, w };
7272
  ggml_set_op_params(result, params, sizeof(params));
7273
 
7274
+ result->op = GGML_OP_WIN_PART;
 
7275
  result->src[0] = a;
7276
 
7277
  return result;
 
7287
  int w) {
7288
  GGML_ASSERT(a->type == GGML_TYPE_F32);
7289
 
 
 
 
 
 
 
 
7290
  const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
7291
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7292
 
7293
  int32_t params[] = { w };
7294
  ggml_set_op_params(result, params, sizeof(params));
7295
 
7296
+ result->op = GGML_OP_WIN_UNPART;
 
7297
  result->src[0] = a;
7298
 
7299
  return result;
 
7309
  GGML_ASSERT(qh == kh);
7310
  GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
7311
 
 
 
 
 
 
 
 
7312
  const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
7313
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
7314
 
7315
+ result->op = GGML_OP_GET_REL_POS;
 
7316
  result->src[0] = a;
7317
 
7318
  return result;
 
7336
  GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
7337
  GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
7338
 
 
 
 
 
 
 
7339
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7340
  ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
7341
 
7342
+ result->op = GGML_OP_ADD_REL_POS;
 
7343
  result->src[0] = a;
7344
  result->src[1] = pw;
7345
  result->src[2] = ph;
 
7367
 
7368
  struct ggml_tensor * ggml_rwkv_wkv(
7369
  struct ggml_context * ctx,
7370
+ struct ggml_tensor * k,
7371
+ struct ggml_tensor * v,
7372
+ struct ggml_tensor * r,
7373
+ struct ggml_tensor * tf,
7374
+ struct ggml_tensor * td,
7375
+ struct ggml_tensor * state) {
7376
  GGML_ASSERT(ggml_is_contiguous(k));
7377
  GGML_ASSERT(ggml_is_contiguous(v));
7378
  GGML_ASSERT(ggml_is_contiguous(r));
 
7393
  GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
7394
  }
7395
 
 
 
 
 
 
 
 
7396
  // concat output and new_state
7397
  const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
7398
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7399
 
7400
+ result->op = GGML_OP_RWKV_WKV;
 
7401
  result->src[0] = k;
7402
  result->src[1] = v;
7403
  result->src[2] = r;
 
7412
 
7413
  static struct ggml_tensor * ggml_unary_impl(
7414
  struct ggml_context * ctx,
7415
+ struct ggml_tensor * a,
7416
+ enum ggml_unary_op op,
7417
+ bool inplace) {
7418
  GGML_ASSERT(ggml_is_contiguous_1(a));
7419
 
 
 
 
 
 
 
7420
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7421
 
7422
  ggml_set_op_params_i32(result, 0, (int32_t) op);
7423
 
7424
+ result->op = GGML_OP_UNARY;
 
7425
  result->src[0] = a;
7426
 
7427
  return result;
 
7430
  struct ggml_tensor * ggml_unary(
7431
  struct ggml_context * ctx,
7432
  struct ggml_tensor * a,
7433
+ enum ggml_unary_op op) {
7434
  return ggml_unary_impl(ctx, a, op, false);
7435
  }
7436
 
7437
  struct ggml_tensor * ggml_unary_inplace(
7438
  struct ggml_context * ctx,
7439
  struct ggml_tensor * a,
7440
+ enum ggml_unary_op op) {
7441
  return ggml_unary_impl(ctx, a, op, true);
7442
  }
7443
 
 
7446
  static struct ggml_tensor * ggml_map_unary_impl_f32(
7447
  struct ggml_context * ctx,
7448
  struct ggml_tensor * a,
7449
+ const ggml_unary_op_f32_t fun,
7450
+ bool inplace) {
 
 
 
 
 
 
7451
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7452
 
7453
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7454
 
7455
+ result->op = GGML_OP_MAP_UNARY;
 
7456
  result->src[0] = a;
7457
 
7458
  return result;
 
7461
  struct ggml_tensor * ggml_map_unary_f32(
7462
  struct ggml_context * ctx,
7463
  struct ggml_tensor * a,
7464
+ const ggml_unary_op_f32_t fun) {
7465
  return ggml_map_unary_impl_f32(ctx, a, fun, false);
7466
  }
7467
 
7468
  struct ggml_tensor * ggml_map_unary_inplace_f32(
7469
  struct ggml_context * ctx,
7470
  struct ggml_tensor * a,
7471
+ const ggml_unary_op_f32_t fun) {
7472
  return ggml_map_unary_impl_f32(ctx, a, fun, true);
7473
  }
7474
 
 
7478
  struct ggml_context * ctx,
7479
  struct ggml_tensor * a,
7480
  struct ggml_tensor * b,
7481
+ const ggml_binary_op_f32_t fun,
7482
+ bool inplace) {
7483
  GGML_ASSERT(ggml_are_same_shape(a, b));
7484
 
 
 
 
 
 
 
7485
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7486
 
7487
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7488
 
7489
+ result->op = GGML_OP_MAP_BINARY;
 
7490
  result->src[0] = a;
7491
  result->src[1] = b;
7492
 
 
7497
  struct ggml_context * ctx,
7498
  struct ggml_tensor * a,
7499
  struct ggml_tensor * b,
7500
+ const ggml_binary_op_f32_t fun) {
7501
  return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
7502
  }
7503
 
 
7505
  struct ggml_context * ctx,
7506
  struct ggml_tensor * a,
7507
  struct ggml_tensor * b,
7508
+ const ggml_binary_op_f32_t fun) {
7509
  return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
7510
  }
7511
 
 
7515
  struct ggml_context * ctx,
7516
  struct ggml_tensor * a,
7517
  const ggml_custom1_op_f32_t fun,
7518
+ bool inplace) {
 
 
 
 
 
 
7519
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7520
 
7521
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7522
 
7523
+ result->op = GGML_OP_MAP_CUSTOM1_F32;
 
7524
  result->src[0] = a;
7525
 
7526
  return result;
 
7547
  struct ggml_tensor * a,
7548
  struct ggml_tensor * b,
7549
  const ggml_custom2_op_f32_t fun,
7550
+ bool inplace) {
 
 
 
 
 
 
7551
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7552
 
7553
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7554
 
7555
+ result->op = GGML_OP_MAP_CUSTOM2_F32;
 
7556
  result->src[0] = a;
7557
  result->src[1] = b;
7558
 
 
7583
  struct ggml_tensor * b,
7584
  struct ggml_tensor * c,
7585
  const ggml_custom3_op_f32_t fun,
7586
+ bool inplace) {
 
 
 
 
 
 
7587
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7588
 
7589
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7590
 
7591
+ result->op = GGML_OP_MAP_CUSTOM3_F32;
 
7592
  result->src[0] = a;
7593
  result->src[1] = b;
7594
  result->src[2] = c;
 
7616
 
7617
  // ggml_map_custom1
7618
  struct ggml_map_custom1_op_params {
7619
+ ggml_custom1_op_t fun;
7620
+ int n_tasks;
7621
+ void * userdata;
7622
  };
7623
 
7624
  static struct ggml_tensor * ggml_map_custom1_impl(
7625
+ struct ggml_context * ctx,
7626
+ struct ggml_tensor * a,
7627
+ const ggml_custom1_op_t fun,
7628
+ int n_tasks,
7629
+ void * userdata,
7630
+ bool inplace) {
7631
  GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7632
 
 
 
 
 
 
 
7633
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7634
 
7635
  struct ggml_map_custom1_op_params params = {
 
7639
  };
7640
  ggml_set_op_params(result, (const void *) &params, sizeof(params));
7641
 
7642
+ result->op = GGML_OP_MAP_CUSTOM1;
 
7643
  result->src[0] = a;
7644
 
7645
  return result;
7646
  }
7647
 
7648
  struct ggml_tensor * ggml_map_custom1(
7649
+ struct ggml_context * ctx,
7650
+ struct ggml_tensor * a,
7651
+ const ggml_custom1_op_t fun,
7652
+ int n_tasks,
7653
+ void * userdata) {
7654
  return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
7655
  }
7656
 
7657
  struct ggml_tensor * ggml_map_custom1_inplace(
7658
+ struct ggml_context * ctx,
7659
+ struct ggml_tensor * a,
7660
+ const ggml_custom1_op_t fun,
7661
+ int n_tasks,
7662
+ void * userdata) {
7663
  return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
7664
  }
7665
 
7666
  // ggml_map_custom2
7667
 
7668
  struct ggml_map_custom2_op_params {
7669
+ ggml_custom2_op_t fun;
7670
+ int n_tasks;
7671
+ void * userdata;
7672
  };
7673
 
7674
  static struct ggml_tensor * ggml_map_custom2_impl(
7675
+ struct ggml_context * ctx,
7676
+ struct ggml_tensor * a,
7677
+ struct ggml_tensor * b,
7678
+ const ggml_custom2_op_t fun,
7679
+ int n_tasks,
7680
+ void * userdata,
7681
+ bool inplace) {
7682
  GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7683
 
 
 
 
 
 
 
7684
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7685
 
7686
  struct ggml_map_custom2_op_params params = {
 
7690
  };
7691
  ggml_set_op_params(result, (const void *) &params, sizeof(params));
7692
 
7693
+ result->op = GGML_OP_MAP_CUSTOM2;
 
7694
  result->src[0] = a;
7695
  result->src[1] = b;
7696
 
 
7698
  }
7699
 
7700
  struct ggml_tensor * ggml_map_custom2(
7701
+ struct ggml_context * ctx,
7702
+ struct ggml_tensor * a,
7703
+ struct ggml_tensor * b,
7704
+ const ggml_custom2_op_t fun,
7705
+ int n_tasks,
7706
+ void * userdata) {
7707
  return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
7708
  }
7709
 
7710
  struct ggml_tensor * ggml_map_custom2_inplace(
7711
+ struct ggml_context * ctx,
7712
+ struct ggml_tensor * a,
7713
+ struct ggml_tensor * b,
7714
+ const ggml_custom2_op_t fun,
7715
+ int n_tasks,
7716
+ void * userdata) {
7717
  return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
7718
  }
7719
 
 
7726
  };
7727
 
7728
  static struct ggml_tensor * ggml_map_custom3_impl(
7729
+ struct ggml_context * ctx,
7730
+ struct ggml_tensor * a,
7731
+ struct ggml_tensor * b,
7732
+ struct ggml_tensor * c,
7733
+ const ggml_custom3_op_t fun,
7734
+ int n_tasks,
7735
+ void * userdata,
7736
+ bool inplace) {
7737
  GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7738
 
 
 
 
 
 
 
7739
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7740
 
7741
  struct ggml_map_custom3_op_params params = {
 
7745
  };
7746
  ggml_set_op_params(result, (const void *) &params, sizeof(params));
7747
 
7748
+ result->op = GGML_OP_MAP_CUSTOM3;
 
7749
  result->src[0] = a;
7750
  result->src[1] = b;
7751
  result->src[2] = c;
 
7754
  }
7755
 
7756
  struct ggml_tensor * ggml_map_custom3(
7757
+ struct ggml_context * ctx,
7758
+ struct ggml_tensor * a,
7759
+ struct ggml_tensor * b,
7760
+ struct ggml_tensor * c,
7761
+ const ggml_custom3_op_t fun,
7762
+ int n_tasks,
7763
+ void * userdata) {
7764
  return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
7765
  }
7766
 
7767
  struct ggml_tensor * ggml_map_custom3_inplace(
7768
+ struct ggml_context * ctx,
7769
+ struct ggml_tensor * a,
7770
+ struct ggml_tensor * b,
7771
+ struct ggml_tensor * c,
7772
+ const ggml_custom3_op_t fun,
7773
+ int n_tasks,
7774
+ void * userdata) {
7775
  return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
7776
  }
7777
 
7778
  // ggml_cross_entropy_loss
7779
 
7780
  struct ggml_tensor * ggml_cross_entropy_loss(
7781
+ struct ggml_context * ctx,
7782
+ struct ggml_tensor * a,
7783
+ struct ggml_tensor * b) {
7784
  GGML_ASSERT(ggml_are_same_shape(a, b));
 
 
 
 
 
7785
 
7786
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
7787
 
7788
+ result->op = GGML_OP_CROSS_ENTROPY_LOSS;
 
7789
  result->src[0] = a;
7790
  result->src[1] = b;
7791
 
 
7795
  // ggml_cross_entropy_loss_back
7796
 
7797
  struct ggml_tensor * ggml_cross_entropy_loss_back(
7798
+ struct ggml_context * ctx,
7799
+ struct ggml_tensor * a,
7800
+ struct ggml_tensor * b,
7801
+ struct ggml_tensor * c) {
7802
  GGML_ASSERT(ggml_are_same_shape(a, b));
7803
  GGML_ASSERT(ggml_is_scalar(c));
7804
 
7805
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
7806
 
7807
+ result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
 
7808
  result->src[0] = a;
7809
  result->src[1] = b;
7810
  result->src[2] = c;
 
7822
  float beta2,
7823
  float eps,
7824
  float wd) {
7825
+ GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
7826
  GGML_ASSERT(alpha > 0.0f);
7827
  GGML_ASSERT(beta1 >= 0.0f && beta1 <= 1.0f);
7828
  GGML_ASSERT(beta2 >= 0.0f && beta2 <= 1.0f);
 
7831
 
7832
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
7833
 
 
 
 
 
 
 
 
7834
  const int64_t iter = 1;
7835
  memcpy(&result->op_params[0], &iter, sizeof(int64_t));
7836
  ggml_set_op_params_f32(result, 2, alpha);
 
7839
  ggml_set_op_params_f32(result, 5, eps);
7840
  ggml_set_op_params_f32(result, 6, wd);
7841
 
7842
+ result->op = GGML_OP_OPT_STEP_ADAMW;
7843
+ result->src[0] = a;
7844
+ result->src[1] = a->grad;
7845
+ result->src[2] = ggml_dup_tensor(ctx, a);
7846
+ result->src[3] = ggml_dup_tensor(ctx, a);
7847
+
7848
  return result;
7849
  }
7850
 
7851
  ////////////////////////////////////////////////////////////////////////////////
7852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7853
  // ggml_compute_forward_dup
7854
 
7855
  static void ggml_compute_forward_dup_same_cont(
 
17569
  struct ggml_tensor * * checkpoints,
17570
  int n_checkpoints) {
17571
  ggml_graph_cpy(gf, gb_tmp);
17572
+ ggml_build_backward_expand(ctx, gf, gb_tmp, false);
17573
 
17574
  if (n_checkpoints <= 0) {
17575
  ggml_graph_cpy(gb_tmp, gb);
 
18221
  ggml_soft_max_back(ctx, tensor->grad, tensor),
18222
  zero_table, acc_table);
18223
  }
18224
+ GGML_ASSERT((!src1 || !src1->grad) && "backward pass for softmax mask not implemented");
18225
  } break;
18226
  case GGML_OP_SOFT_MAX_BACK:
18227
  {
 
18262
  beta_slow),
18263
  zero_table, acc_table);
18264
  }
18265
+ GGML_ASSERT((!src2 || !src2->grad) && "gradients for freq factors not implemented");
18266
  } break;
18267
  case GGML_OP_ROPE_BACK:
18268
  {
 
18384
  }
18385
  case GGML_OP_FLASH_ATTN_EXT:
18386
  {
18387
+ GGML_ABORT("FA backward pass not adapted after rework");
18388
  struct ggml_tensor * flash_grad = NULL;
18389
  if (src0->grad || src1->grad || tensor->src[2]->grad) {
18390
  int32_t t = ggml_get_op_params_i32(tensor, 0);
 
18559
  tensor->grad),
18560
  zero_table, acc_table);
18561
  }
18562
+ GGML_ASSERT(!src1->grad && "backward pass for labels not implemented");
18563
  } break;
18564
  case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
18565
  {
 
18610
  }
18611
  }
18612
 
18613
+ if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
18614
  // reached a leaf node, not part of the gradient graph (e.g. a constant)
18615
  GGML_ASSERT(cgraph->n_leafs < cgraph->size);
18616
 
 
18628
  }
18629
 
18630
  cgraph->nodes[cgraph->n_nodes] = node;
 
 
 
18631
  cgraph->n_nodes++;
18632
  }
18633
  }
 
18655
  ggml_build_forward_impl(cgraph, tensor, true);
18656
  }
18657
 
18658
+ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate) {
18659
  GGML_ASSERT(gf->n_nodes > 0);
18660
  GGML_ASSERT(gf->grads);
18661
 
18662
+ for (int i = 0; i < gf->n_nodes; ++i) {
18663
+ struct ggml_tensor * node = gf->nodes[i];
18664
+
18665
+ bool needs_grad = node->flags & GGML_TENSOR_FLAG_PARAM;
18666
+ bool ignore_src[GGML_MAX_SRC] = {false};
18667
+ switch (node->op) {
18668
+ // gradients in node->src[0] for one reason or another have no effect on output gradients
18669
+ case GGML_OP_IM2COL: // only used for its shape
18670
+ case GGML_OP_IM2COL_BACK: // same as IM2COL
18671
+ ignore_src[0] = true;
18672
+ break;
18673
+ case GGML_OP_UNARY: {
18674
+ const enum ggml_unary_op uop = ggml_get_unary_op(node);
18675
+ // SGN and STEP unary ops are piecewise constant
18676
+ if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
18677
+ ignore_src[0] = true;
18678
+ }
18679
+ } break;
18680
+
18681
+ // gradients in node->src[1] for one reason or another have no effect on output gradients
18682
+ case GGML_OP_CPY: // gradients in CPY target are irrelevant
18683
+ case GGML_OP_GET_ROWS: // row indices not differentiable
18684
+ case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
18685
+ case GGML_OP_ROPE: // positions not differentiable
18686
+ ignore_src[1] = true;
18687
+ break;
18688
 
18689
+ default:
18690
+ break;
18691
+ }
18692
+ for (int j = 0; j < GGML_MAX_SRC; ++j) {
18693
+ if (!node->src[j] || !node->src[j]->grad || ignore_src[j]) {
18694
+ continue;
18695
  }
18696
+ GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
18697
+ needs_grad = true;
18698
+ break;
18699
+ }
18700
+ if (!needs_grad) {
18701
+ continue;
18702
  }
18703
+
18704
+ // inplace operations are currently not supported
18705
+ GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
18706
+ node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
18707
+
18708
+ // create a new tensor with the same type and shape as the node and set it as grad
18709
+ node->grad = ggml_dup_tensor(ctx, node);
18710
  }
18711
 
18712
  // keep tables of original gradients for replacement/accumulation logic
 
21571
  struct ggml_context * ctx,
21572
  struct ggml_opt_params params,
21573
  struct ggml_tensor * f) {
 
 
21574
  bool free_ctx = false;
21575
  if (ctx == NULL) {
21576
  struct ggml_init_params params_ctx = {
 
21611
  ggml_build_forward_expand(gf, f);
21612
 
21613
  struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
21614
+ ggml_build_backward_expand(ctx, gf, gb, false);
21615
 
21616
  return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
21617
  }
 
21664
  tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
21665
  }
21666
 
21667
+ void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) {
21668
+ GGML_UNUSED(ctx); // TODO: remove this parameter
21669
+ tensor->flags |= GGML_TENSOR_FLAG_PARAM;
21670
+ }
21671
+
21672
+ void ggml_set_loss(struct ggml_tensor * tensor) {
21673
+ GGML_ASSERT(ggml_is_scalar(tensor));
21674
+ GGML_ASSERT(tensor->type == GGML_TYPE_F32);
21675
+ tensor->flags |= GGML_TENSOR_FLAG_LOSS;
21676
+ }
21677
+
21678
  ////////////////////////////////////////////////////////////////////////////////
21679
 
21680
  void ggml_quantize_init(enum ggml_type type) {