Skip to content

Commit

Permalink
Add support for FP8 (E4M3)
Browse files Browse the repository at this point in the history
See #549
  • Loading branch information
jart committed Aug 22, 2024
1 parent c44664b commit 42fa422
Show file tree
Hide file tree
Showing 23 changed files with 1,404 additions and 85 deletions.
2 changes: 1 addition & 1 deletion build/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ INSTALL = install

ARFLAGS = rcsD
CXXFLAGS = -frtti -std=gnu++23
CCFLAGS = -O2 -fexceptions -fsignaling-nans -ffunction-sections -fdata-sections
CCFLAGS = -O2 -g -fexceptions -fsignaling-nans -ffunction-sections -fdata-sections
CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes -DLLAMAFILE_DEBUG
TARGET_ARCH = -Xx86_64-mtune=znver4

Expand Down
97 changes: 97 additions & 0 deletions llama.cpp/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,87 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)

// FP8 (E4M3)
//
// Exponent bits : 4
// Mantissa bits : 3
// Exponent bias : 7
// Infinities : N/A
// NaN : S.1111.111
// Zeros : S.0000.000
// Max normal : S.1111.110 = 1.75 * 2**8 = 448
// Min normal : S.0001.000 = 2**(−6)
// Max subnorm : S.0000.111 = 0.875 ∗ 2**(−6)
// Min subnorm : S.0000.001 = 2**(−9)
//
// See "FP8 Formats For Deep Learning"
// §3 FP8 Binary Interchange Format
// NVIDIA / ARM / Intel

static uint8_t to_fp8(float f) {
uint8_t sign = signbit(f) ? 0x80 : 0;
if (isnan(f)) return sign | 127;
if (!f) return sign;
f = fabsf(f);
int exp = floorf(log2f(f));
float mantissa = f / exp2f(exp) - 1.0f;
if (exp < -6) {
mantissa = f / exp2f(-6); // subnormal
exp = -7;
}
if (exp > 8) {
return sign | 0x7E; // overflow
}
uint8_t exp_bits = (exp + 7) & 0x0F;
uint8_t mantissa_bits = (uint8_t)(mantissa * 8) & 0x07;
// [jpp] avoid generate NAN ?
if (exp_bits == 0x0F && mantissa_bits == 0x07) mantissa_bits = 0x06;
return sign | (exp_bits << 3) | mantissa_bits;
}

static float from_fp8(uint8_t fp8) {
union {
float f;
uint32_t i;
} u;
uint32_t t = fp8;
if ((fp8 & 127) == 127)
return NAN;
if ((fp8 & 127) >= 8) {
int exp = ((fp8 >> 3) & 15) - 7;
u.i = (t & 128) << 24; // sign
u.i |= (exp + 127) << 23; // exponent
u.i |= (t & 7) << 20; // mantissa: bit 2-0 -> 22-20
} else {
const unsigned kSubnormal[] = {
0x00000000, 0x3b000000, 0x3b800000, 0x3bc00000,
0x3c000000, 0x3c200000, 0x3c400000, 0x3c600000,
};
u.i = kSubnormal[fp8 & 127];
u.i |= (t & 128) << 24;
}
return u.f;
}

static ggml_fp8_t ggml_compute_fp32_to_fp8(float f) {
union {
uint8_t i;
ggml_fp8_t f;
} u = {to_fp8(f)};
return u.f;
}

static float ggml_compute_fp8_to_fp32(ggml_fp8_t f) {
union {
ggml_fp8_t f;
uint8_t i;
} u = {f};
return from_fp8(u.i);
}

#define GGML_COMPUTE_FP8_TO_FP32(x) ggml_compute_fp8_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP8(x) ggml_compute_fp32_to_fp8(x)

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -146,6 +227,22 @@ extern "C" {
#include <sys/prctl.h>
#endif

// precomputed f32 table for fp8 (1 KB)
// defined in ggml.c, initialized in ggml_init()
extern float ggml_table_f32_fp8[1 << 8];

inline static float ggml_lookup_fp8_to_fp32(ggml_fp8_t f) {
union {
ggml_fp8_t f;
uint8_t i;
} u = {f};
return ggml_table_f32_fp8[u.i];
}

#define GGML_FP32_TO_FP8(x) GGML_COMPUTE_FP32_TO_FP8(x)
#define GGML_FP8_TO_FP32(x) ggml_lookup_fp8_to_fp32(x)
// #define GGML_FP8_TO_FP32(x) GGML_COMPUTE_FP8_TO_FP32(x)

// 16-bit float
// on Arm, we use __fp16
// on x86, we use uint16_t
Expand Down
3 changes: 2 additions & 1 deletion llama.cpp/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,8 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {

static bool ggml_metal_supports_op(const struct ggml_backend_metal_context * ctx, const struct ggml_tensor * op) {
for (size_t i = 0, n = 3; i < n; ++i) {
if (op->src[i] != NULL && op->src[i]->type == GGML_TYPE_BF16) {
if (op->src[i] != NULL && (op->src[i]->type == GGML_TYPE_BF16 ||
op->src[i]->type == GGML_TYPE_FP8)) {
return false;
}
}
Expand Down
12 changes: 12 additions & 0 deletions llama.cpp/ggml-quants.inc
Original file line number Diff line number Diff line change
Expand Up @@ -14699,6 +14699,18 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
return false;
}
} break;
case GGML_TYPE_FP8:
{
int nans = 0;
const unsigned char * f = (const unsigned char *) data;
for (size_t i = 0; i < nb; ++i) {
nans += f[i] > 0xff;

This comment has been minimized.

Copy link
@Djip007

Djip007 Aug 22, 2024

Contributor

may be more something like:

                    nans += (f[i] & 0x7f) == 0x7f;

This comment has been minimized.

Copy link
@jart

jart Aug 22, 2024

Author Collaborator

Fixed. b81b590

}
if (nans) {
fprintf(stderr, "%s: found %d NaNs in row of %zu FP8 values\n", __func__, nans, nb);
return false;
}
} break;
case GGML_TYPE_F16:
{
const ggml_fp16_t * f = (const ggml_fp16_t *) data;
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_avx
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_avx
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_avx
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_avx
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_avx
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_avx
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_avx
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_avx
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_avx
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_avx
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_avx
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_avx
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_avx
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_avx
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_avx
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_avx
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_avx
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_avx
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_avx
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_avx
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_avx
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_avx
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_avx
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_avx
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_avx
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_avx
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_avx
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_avx2
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_avx2
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_avx2
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_avx2
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_avx2
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_avx2
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_avx2
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_avx2
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_avx2
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_avx2
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_avx2
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_avx2
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_avx2
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_avx2
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_avx2
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_avx2
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_avx2
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_avx2
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_avx2
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_avx2
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_avx2
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_avx2
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_avx2
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_avx2
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_avx2
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_avx2
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_avx2
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_avx512
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_avx512
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_avx512
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_avx512
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_avx512
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_avx512
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_avx512
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_avx512
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_avx512
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_avx512
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_avx512
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_avx512
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_avx512
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_avx512
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_avx512
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_avx512
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_avx512
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_avx512
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_avx512
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_avx512
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_avx512
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_avx512
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_avx512
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_avx512
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_avx512
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_avx512
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_avx512
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-avx512bf16.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_avx512bf16
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_avx512bf16
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_avx512bf16
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_avx512bf16
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_avx512bf16
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_avx512bf16
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_avx512bf16
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_avx512bf16
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_avx512bf16
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_avx512bf16
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_avx512bf16
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_avx512bf16
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_avx512bf16
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_avx512bf16
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_avx512bf16
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_avx512bf16
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_avx512bf16
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_avx512bf16
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_avx512bf16
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_avx512bf16
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_avx512bf16
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_avx512bf16
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_avx512bf16
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_avx512bf16
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_avx512bf16
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_avx512bf16
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_avx512bf16
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-avx512vl.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_avx512vl
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_avx512vl
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_avx512vl
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_avx512vl
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_avx512vl
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_avx512vl
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_avx512vl
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_avx512vl
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_avx512vl
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_avx512vl
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_avx512vl
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_avx512vl
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_avx512vl
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_avx512vl
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_avx512vl
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_avx512vl
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_avx512vl
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_avx512vl
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_avx512vl
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_avx512vl
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_avx512vl
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_avx512vl
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_avx512vl
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_avx512vl
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_avx512vl
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_avx512vl
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_avx512vl
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-f16c.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_f16c
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_f16c
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_f16c
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_f16c
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_f16c
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_f16c
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_f16c
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_f16c
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_f16c
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_f16c
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_f16c
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_f16c
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_f16c
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_f16c
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_f16c
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_f16c
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_f16c
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_f16c
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_f16c
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_f16c
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_f16c
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_f16c
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_f16c
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_f16c
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_f16c
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_f16c
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_f16c
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-fma.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_fma
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_fma
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_fma
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_fma
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_fma
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_fma
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_fma
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_fma
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_fma
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_fma
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_fma
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_fma
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_fma
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_fma
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_fma
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_fma
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_fma
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_fma
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_fma
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_fma
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_fma
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_fma
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_fma
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_fma
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_fma
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_fma
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_fma
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp/ggml-vector-amd-k8.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
#define ggml_bf16_to_fp32_row ggml_bf16_to_fp32_row_amd_k8
#define ggml_fp32_to_bf16_row ggml_fp32_to_bf16_row_amd_k8
#define ggml_fp32_to_bf16_row_ref ggml_fp32_to_bf16_row_ref_amd_k8
#define ggml_fp8_to_fp32_row ggml_fp8_to_fp32_row_amd_k8
#define ggml_fp32_to_fp8_row ggml_fp32_to_fp8_row_amd_k8
#define ggml_vec_dot_f32 ggml_vec_dot_f32_amd_k8
#define ggml_vec_dot_f16 ggml_vec_dot_f16_amd_k8
#define ggml_vec_dot_bf16 ggml_vec_dot_bf16_amd_k8
#define ggml_vec_dot_fp8 ggml_vec_dot_fp8_amd_k8
#define ggml_vec_dot_f16_unroll ggml_vec_dot_f16_unroll_amd_k8
#define ggml_vec_mad_f32_unroll ggml_vec_mad_f32_unroll_amd_k8
#define ggml_vec_set_i8 ggml_vec_set_i8_amd_k8
#define ggml_vec_set_i16 ggml_vec_set_i16_amd_k8
#define ggml_vec_set_i32 ggml_vec_set_i32_amd_k8
#define ggml_vec_set_f16 ggml_vec_set_f16_amd_k8
#define ggml_vec_set_bf16 ggml_vec_set_bf16_amd_k8
#define ggml_vec_set_fp8 ggml_vec_set_fp8_amd_k8
#define ggml_vec_add_f32 ggml_vec_add_f32_amd_k8
#define ggml_vec_add1_f32 ggml_vec_add1_f32_amd_k8
#define ggml_vec_acc_f32 ggml_vec_acc_f32_amd_k8
Expand Down Expand Up @@ -50,6 +54,7 @@
#define ggml_vec_sum_f32_ggf ggml_vec_sum_f32_ggf_amd_k8
#define ggml_vec_sum_f16_ggf ggml_vec_sum_f16_ggf_amd_k8
#define ggml_vec_sum_bf16_ggf ggml_vec_sum_bf16_ggf_amd_k8
#define ggml_vec_sum_fp8_ggf ggml_vec_sum_fp8_ggf_amd_k8
#define ggml_vec_max_f32 ggml_vec_max_f32_amd_k8
#define ggml_vec_argmax_f32 ggml_vec_argmax_f32_amd_k8
#define ggml_vec_soft_max_f32 ggml_vec_soft_max_f32_amd_k8
Expand Down
Loading

0 comments on commit 42fa422

Please sign in to comment.