ggml : move ggml_flash_attn_ext_get_prec to ggml-impl.h

ggerganov · Nov 8, 2024 · 1888c1f · 1888c1f
1 parent bc143ec
commit 1888c1f
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 3 deletions.
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -1746,9 +1746,6 @@ extern "C" {
             struct ggml_tensor * a,
             enum ggml_prec       prec);
 
-    GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
-            const struct ggml_tensor * a);
-
     // TODO: needs to be adapted to ggml_flash_attn_ext
     GGML_API struct ggml_tensor * ggml_flash_attn_back(
            struct ggml_context * ctx,

diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu
@@ -7,6 +7,8 @@
 #include "fattn-wmma-f16.cuh"
 #include "fattn.cuh"
 
+#include "ggml-impl.h"
+
 #include <cstdint>
 
 static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
@@ -114,6 +114,8 @@ static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, floa
     ((float *)(tensor->op_params))[i] = value;
 }
 
+static enum ggml_prec ggml_flash_attn_ext_get_prec(const struct ggml_tensor * a);
+
 struct ggml_map_custom1_op_params {
     ggml_custom1_op_t  fun;
     int                n_tasks;