diff --git a/Makefile b/Makefile index f3120d8840e1f9..e494c53fa36a40 100644 --- a/Makefile +++ b/Makefile @@ -958,14 +958,14 @@ OBJ_GGML = \ $(DIR_GGML)/src/ggml-alloc.o \ $(DIR_GGML)/src/ggml-backend.o \ $(DIR_GGML)/src/ggml-backend-reg.o \ - $(DIR_GGML)/src/ggml-fp8_cpp11.o \ + $(DIR_GGML)/src/ggml-fp8.o \ $(DIR_GGML)/src/ggml-opt.o \ $(DIR_GGML)/src/ggml-quants.o \ $(DIR_GGML)/src/ggml-threading.o \ $(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp11.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \ $(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu-fp8_cpp11.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu-fp8.o \ $(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \ $(OBJ_GGML_EXT) @@ -1106,13 +1106,10 @@ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d) # Default target all: $(BUILD_TARGETS) -# for c++17 build -$(DIR_GGML)/%_cpp17.o: $(DIR_GGML)/%.cpp - $(CXX) $(CXXFLAGS) -MMD -std=c++17 -c $< -o $@ - -# for c++11 build -$(DIR_GGML)/%_cpp11.o: $(DIR_GGML)/%.cpp - $(CXX) $(CXXFLAGS) -MMD -std=c++11 -c $< -o $@ +# force c++ build for source file that have same name as c file +# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files +$(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ # Rules for building object files $(DIR_GGML)/%.o: $(DIR_GGML)/%.c diff --git a/Package.swift b/Package.swift index 3b06197e070076..d3a2e67ab60459 100644 --- a/Package.swift +++ b/Package.swift @@ -21,6 +21,7 @@ var sources = [ "ggml/src/ggml-threading.cpp", "ggml/src/ggml-quants.c", "ggml/src/ggml-fp8.cpp", + "ggml/src/ggml-cpu/ggml-cpu-fp8.c", ] var resources: [Resource] = [] @@ -89,5 +90,5 @@ let package = Package( linkerSettings: linkerSettings ) ], - cxxLanguageStandard: .cxx11 + cxxLanguageStandard: .cxx17 ) diff --git a/ggml/src/ggml-cpu/ggml-cpu-fp8.cpp b/ggml/src/ggml-cpu/ggml-cpu-fp8.cpp index ac6fe4231238d3..780b2850b1f548 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-fp8.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-fp8.cpp @@ -14,20 +14,20 @@ namespace fp8 { template inline uint8_t from_float(float value) { FP8 out; - union { + union fp32_int32 { float f; uint32_t bits; } in = {value}; out.bits = (in.bits >> 24) & 0x80; in.bits &= 0x7fffffff; - if (in.f >= FP8::MAX()) { + if (in.f >= FP8::MAX) { out.bits |= 0x7E; - } else if (in.f < FP8::MIN()) { // => 0. + } else if (in.f < FP8::MIN) { // => 0. } else { - in.f *= exp_m2::E_BIAS()-127>(); - uint32_t eps = (0x3fffff>>FP8::M()) + ((in.bits >> (23-FP8::M())) & 0x1); + in.f *= exp_f2::E_BIAS-127>(); + uint32_t eps = (0x3fffff>>FP8::M) + ((in.bits >> (23-FP8::M)) & 0x1); in.bits += eps; - out.bits |= (in.bits >> (23-FP8::M())) & 0x7F; + out.bits |= (in.bits >> (23-FP8::M)) & 0x7F; } return out.bits; } @@ -37,16 +37,16 @@ inline uint8_t from_float(float value) { #endif template inline float to_float(const FP8& in) { - union { + union fp32_int32 { float f; uint32_t bits; } out = {0}; out.bits = in.bits & 0x80; out.bits <<= 24; uint32_t _bits = in.bits & 0x7F; - _bits <<= (23-FP8::M()); + _bits <<= (23-FP8::M); out.bits |= _bits; - out.f *= exp_p2<127-FP8::E_BIAS()>(); + out.f *= exp_f2<127-FP8::E_BIAS>(); return out.f; } } // namespace fp8 @@ -91,8 +91,8 @@ static inline void conv(const float* x, bloc_fp8* y, int64_t size) { for (int64_t i=0; i::MAX()/m; - y[q].d = m/FP8::MAX(); + const float D = FP8::MAX/m; + y[q].d = m/FP8::MAX; #ifdef GGML_USE_OPENMP_SIMD #pragma omp simd #endif @@ -154,14 +154,14 @@ float dot_reg(const bloc_fp8* x, const _Y* y, int64_t size) { for(int64_t v=0; v(); } + for(int64_t v=0; v(); } for(int64_t v=0; v* x, const _Y* y, int64_t size) { // apply scale for(int64_t r=0; r(); + Z[r][v] += Z0[r][v]*(x[q]).d * exp_f2<127-fp8_t::E_BIAS>(); } } } diff --git a/ggml/src/ggml-fp8.cpp b/ggml/src/ggml-fp8.cpp index a0cc95fd1f4321..dd5d12063702c8 100644 --- a/ggml/src/ggml-fp8.cpp +++ b/ggml/src/ggml-fp8.cpp @@ -10,7 +10,7 @@ template inline FP8 float_to_fp8(float value) { FP8 out; - union { + union fp32_int32 { float f; uint32_t bits; } in = {value}; @@ -19,39 +19,39 @@ inline FP8 float_to_fp8(float value) { // value without sign in.bits &= 0x7fffffff; //GGML_ASSERT(in.bits < 0x7f800000); // +/- infinity or NAN - if (in.f >= FP8::MAX()) { + if (in.f >= FP8::MAX) { out.bits |= 0x7E; - } else if (in.f < FP8::MIN()) { // => 0. + } else if (in.f < FP8::MIN) { // => 0. // OK: S.0000000 } else { - in.f *= exp_m2::E_BIAS()-127>(); + in.f *= exp_f2::E_BIAS-127>(); // - trunc //uint32_t eps = 0; // - rounding half away from zero - //uint32_t eps = 0x400000>>FP8::M(); + //uint32_t eps = 0x400000>>FP8::M; // - rounding half toward zero - //uint32_t eps = 0x3fffff>>FP8::M(); + //uint32_t eps = 0x3fffff>>FP8::M; // - rounding to nearest even - uint32_t eps = (0x3fffff>>FP8::M()) + ((in.bits >> (23-FP8::M())) & 0x1); + uint32_t eps = (0x3fffff>>FP8::M) + ((in.bits >> (23-FP8::M)) & 0x1); // shift mantissa. in.bits += eps; - out.bits |= (in.bits >> (23-FP8::M())) & 0x7F; + out.bits |= (in.bits >> (23-FP8::M)) & 0x7F; } return out; } template inline float fp8_to_float(const FP8& in) { - union { + union fp32_int32 { float f; uint32_t bits; } out = {0}; out.bits = in.bits & 0x80; out.bits <<= 24; uint32_t _bits = in.bits & 0x7F; - _bits <<= (23-FP8::M()); + _bits <<= (23-FP8::M); out.bits |= _bits; - out.f *= exp_p2<127-FP8::E_BIAS()>(); + out.f *= exp_f2<127-FP8::E_BIAS>(); return out.f; } @@ -93,8 +93,8 @@ static inline void conv(const float* x, bloc_fp8* y, int64_t size) { for (int64_t i=0; i::MAX()/m; - y[q].d = m/FP8::MAX(); + const float D = FP8::MAX/m; + y[q].d = m/FP8::MAX; for (int64_t i=0; i(x[q*QK+i]*D); } diff --git a/ggml/src/ggml-fp8.h b/ggml/src/ggml-fp8.h index b793028c6429fd..da7784d46010e7 100644 --- a/ggml/src/ggml-fp8.h +++ b/ggml/src/ggml-fp8.h @@ -1,29 +1,27 @@ // this is more a .inc. #ifdef __cplusplus template -constexpr float exp_p2() { - return exp_p2()*2; -} -template -constexpr float exp_m2() { - return exp_m2()/2; -} -template constexpr int exp_i2() { return 1 << N; } -template<> constexpr float exp_p2<0>() { return 1;} -template<> constexpr float exp_m2<0>() { return 1;} + +template +constexpr float exp_f2() { + if constexpr (N>0) return exp_f2()*2; + if constexpr (N<0) return exp_f2()/2; + if constexpr (N==0) return 1.; +} + template //, int M=7-E> 1.7 bits! struct FP8 { uint8_t bits; using type = FP8<_E>; - static constexpr int E() { return _E; } - static constexpr int M() { return 7-_E; } - static constexpr int E_BIAS() { return exp_i2<_E-1>()-1; } - static constexpr float MAX() { return (2-exp_m2<-M()+1>())*exp_p2()>(); } - static constexpr float MIN() { return exp_m2<-M()>()*exp_m2<2-exp_i2<_E-1>()>(); } + static constexpr int E = _E; + static constexpr int M = (7-_E); + static constexpr int E_BIAS = exp_i2()-1; + static constexpr float MAX = (2-exp_f2<-M+1>())*exp_f2()>(); + static constexpr float MIN = exp_f2<-M>()*exp_f2<2-exp_i2()>(); }; extern "C" {