JohannesGaessler commited on
Commit
1bed92f
·
1 Parent(s): d4c0faf

Fix FlashAttention debug test, FP32 assert (llama/7684)

Browse files
Files changed (1) hide show
  1. ggml-cuda/fattn-vec-f32.cuh +0 -4
ggml-cuda/fattn-vec-f32.cuh CHANGED
@@ -278,14 +278,10 @@ void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx,
278
 
279
  template <int D, ggml_type type_K, ggml_type type_V>
280
  void ggml_cuda_flash_attn_ext_vec_f32_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
281
- ggml_tensor * KQV = dst;
282
  ggml_tensor * Q = dst->src[0];
283
  ggml_tensor * K = dst->src[1];
284
  ggml_tensor * V = dst->src[2];
285
 
286
- const int32_t precision = KQV->op_params[2];
287
- GGML_ASSERT(precision == GGML_PREC_DEFAULT);
288
-
289
  GGML_ASSERT(K->type == type_K);
290
  GGML_ASSERT(V->type == type_V);
291
 
 
278
 
279
  template <int D, ggml_type type_K, ggml_type type_V>
280
  void ggml_cuda_flash_attn_ext_vec_f32_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 
281
  ggml_tensor * Q = dst->src[0];
282
  ggml_tensor * K = dst->src[1];
283
  ggml_tensor * V = dst->src[2];
284
 
 
 
 
285
  GGML_ASSERT(K->type == type_K);
286
  GGML_ASSERT(V->type == type_V);
287