move to c++17

ggerganov · Nov 30, 2024 · e6c4791 · e6c4791
1 parent 038b5fa
commit e6c4791
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 53 deletions.
diff --git a/Makefile b/Makefile
@@ -958,14 +958,14 @@ OBJ_GGML = \
 	$(DIR_GGML)/src/ggml-alloc.o \
 	$(DIR_GGML)/src/ggml-backend.o \
 	$(DIR_GGML)/src/ggml-backend-reg.o \
-	$(DIR_GGML)/src/ggml-fp8_cpp11.o \
+	$(DIR_GGML)/src/ggml-fp8.o \
 	$(DIR_GGML)/src/ggml-opt.o \
 	$(DIR_GGML)/src/ggml-quants.o \
 	$(DIR_GGML)/src/ggml-threading.o \
 	$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
-	$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp11.o \
+	$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
 	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
-	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-fp8_cpp11.o \
+	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-fp8.o \
 	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
 	$(OBJ_GGML_EXT)
 
@@ -1106,13 +1106,10 @@ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
 # Default target
 all: $(BUILD_TARGETS)
 
-# for c++17 build
-$(DIR_GGML)/%_cpp17.o: $(DIR_GGML)/%.cpp
-	$(CXX) $(CXXFLAGS) -MMD -std=c++17 -c $< -o $@
-
-# for c++11 build
-$(DIR_GGML)/%_cpp11.o: $(DIR_GGML)/%.cpp
-	$(CXX) $(CXXFLAGS) -MMD -std=c++11 -c $< -o $@
+# force c++ build for source file that have same name as c file
+# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
+$(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp
+	$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
 
 # Rules for building object files
 $(DIR_GGML)/%.o: $(DIR_GGML)/%.c

diff --git a/Package.swift b/Package.swift
@@ -21,6 +21,7 @@ var sources = [
     "ggml/src/ggml-threading.cpp",
     "ggml/src/ggml-quants.c",
     "ggml/src/ggml-fp8.cpp",
+    "ggml/src/ggml-cpu/ggml-cpu-fp8.c",
 ]
 
 var resources: [Resource] = []
@@ -89,5 +90,5 @@ let package = Package(
             linkerSettings: linkerSettings
         )
     ],
-    cxxLanguageStandard: .cxx11
+    cxxLanguageStandard: .cxx17
 )
diff --git a/ggml/src/ggml-cpu/ggml-cpu-fp8.cpp b/ggml/src/ggml-cpu/ggml-cpu-fp8.cpp
@@ -14,20 +14,20 @@ namespace fp8 {
 template<int E>
 inline uint8_t from_float(float value) {
     FP8<E> out;
-    union {
+    union fp32_int32 {
         float f;
         uint32_t bits;
     } in = {value};
     out.bits = (in.bits >> 24) & 0x80;
     in.bits &= 0x7fffffff;
-    if (in.f >= FP8<E>::MAX()) {
+    if (in.f >= FP8<E>::MAX) {
         out.bits |= 0x7E;
-    } else if (in.f < FP8<E>::MIN()) { // => 0.
+    } else if (in.f < FP8<E>::MIN) { // => 0.
     } else {
-        in.f *= exp_m2<FP8<E>::E_BIAS()-127>();
-        uint32_t eps = (0x3fffff>>FP8<E>::M()) + ((in.bits >> (23-FP8<E>::M())) & 0x1);
+        in.f *= exp_f2<FP8<E>::E_BIAS-127>();
+        uint32_t eps = (0x3fffff>>FP8<E>::M) + ((in.bits >> (23-FP8<E>::M)) & 0x1);
         in.bits += eps;
-        out.bits |= (in.bits >> (23-FP8<E>::M())) & 0x7F;
+        out.bits |= (in.bits >> (23-FP8<E>::M)) & 0x7F;
     }
     return out.bits;
 }
@@ -37,16 +37,16 @@ inline uint8_t from_float(float value) {
 #endif
 template<int E>
 inline float to_float(const FP8<E>& in) {
-    union {
+    union fp32_int32 {
         float f;
         uint32_t bits;
     } out = {0};
     out.bits = in.bits & 0x80;
     out.bits <<= 24;
     uint32_t _bits = in.bits & 0x7F;
-    _bits <<= (23-FP8<E>::M());
+    _bits <<= (23-FP8<E>::M);
     out.bits |= _bits;
-    out.f *= exp_p2<127-FP8<E>::E_BIAS()>();
+    out.f *= exp_f2<127-FP8<E>::E_BIAS>();
     return out.f;
 }
 } // namespace fp8
@@ -91,8 +91,8 @@ static inline void conv(const float* x, bloc_fp8<E, QK>* y, int64_t size) {
         for (int64_t i=0; i<QK; i++) {
             m = std::max(std::abs(x[q*QK+i]),m);
         }
-        const float D = FP8<E>::MAX()/m;
-        y[q].d = m/FP8<E>::MAX();
+        const float D = FP8<E>::MAX/m;
+        y[q].d = m/FP8<E>::MAX;
 #ifdef GGML_USE_OPENMP_SIMD
         #pragma omp simd
 #endif
@@ -154,22 +154,22 @@ float dot_reg(const bloc_fp8<E, QK>* x, const _Y* y, int64_t size) {
                 for(int64_t v=0; v<VECT_SIZE; ++v) { mantice_16bits[v] = mantice_8bits[v]; }
 
                 for(int64_t v=0; v<VECT_SIZE; ++v) { sign_16bits[v] <<= 8; }
-                for(int64_t v=0; v<VECT_SIZE; ++v) { mantice_16bits[v] <<= (7-fp8_t::M()); }
+                for(int64_t v=0; v<VECT_SIZE; ++v) { mantice_16bits[v] <<= (7-fp8_t::M); }
 
                 for(int64_t v=0; v<VECT_SIZE; ++v) { x_bf16[v] = sign_16bits[v] | mantice_16bits[v]; }
 
                 for(int64_t v=0; v<VECT_SIZE; ++v) { ux[v].bits = x_bf16[v]; }
                 for(int64_t v=0; v<VECT_SIZE; ++v) { ux[v].bits <<= 16; }
 
-                for(int64_t v=0; v<VECT_SIZE; ++v) { X[v] = ux[v].f; } // * exp_p2<127-fp8_t::E_BIAS()>(); }
+                for(int64_t v=0; v<VECT_SIZE; ++v) { X[v] = ux[v].f; } // * exp_f2<127-fp8_t::E_BIAS>(); }
                 for(int64_t v=0; v<VECT_SIZE; ++v) { Y[v] = (float)y[q*QK+i+r*VECT_SIZE+v]; }
                 for(int64_t v=0; v<VECT_SIZE; ++v) { Z0[r][v] += X[v]*Y[v]; }
             }
         }
         // apply scale
         for(int64_t r=0; r<NB_REG; ++r) {
             for(int64_t v=0; v<VECT_SIZE; ++v) {
-                Z[r][v] += Z0[r][v]*(x[q]).d * exp_p2<127-fp8_t::E_BIAS()>();
+                Z[r][v] += Z0[r][v]*(x[q]).d * exp_f2<127-fp8_t::E_BIAS>();
             }
         }
     }

diff --git a/ggml/src/ggml-fp8.cpp b/ggml/src/ggml-fp8.cpp
@@ -10,7 +10,7 @@
 template<int E>
 inline FP8<E> float_to_fp8(float value) {
     FP8<E> out;
-    union {
+    union fp32_int32 {
         float f;
         uint32_t bits;
     } in = {value};
@@ -19,39 +19,39 @@ inline FP8<E> float_to_fp8(float value) {
     // value without sign
     in.bits &= 0x7fffffff;
     //GGML_ASSERT(in.bits < 0x7f800000); // +/- infinity or NAN
-    if (in.f >= FP8<E>::MAX()) {
+    if (in.f >= FP8<E>::MAX) {
         out.bits |= 0x7E;
-    } else if (in.f < FP8<E>::MIN()) { // => 0.
+    } else if (in.f < FP8<E>::MIN) { // => 0.
         // OK: S.0000000
     } else {
-        in.f *= exp_m2<FP8<E>::E_BIAS()-127>();
+        in.f *= exp_f2<FP8<E>::E_BIAS-127>();
         // - trunc
         //uint32_t eps = 0;
         // - rounding half away from zero
-        //uint32_t eps = 0x400000>>FP8<E>::M();
+        //uint32_t eps = 0x400000>>FP8<E>::M;
         // - rounding half toward zero
-        //uint32_t eps = 0x3fffff>>FP8<E>::M();
+        //uint32_t eps = 0x3fffff>>FP8<E>::M;
         // - rounding to nearest even
-        uint32_t eps = (0x3fffff>>FP8<E>::M()) + ((in.bits >> (23-FP8<E>::M())) & 0x1);
+        uint32_t eps = (0x3fffff>>FP8<E>::M) + ((in.bits >> (23-FP8<E>::M)) & 0x1);
         // shift mantissa.
         in.bits += eps;
-        out.bits |= (in.bits >> (23-FP8<E>::M())) & 0x7F;
+        out.bits |= (in.bits >> (23-FP8<E>::M)) & 0x7F;
     }
     return out;
 }
 
 template<int E>
 inline float fp8_to_float(const FP8<E>& in) {
-    union {
+    union fp32_int32 {
         float f;
         uint32_t bits;
     } out = {0};
     out.bits = in.bits & 0x80;
     out.bits <<= 24;
     uint32_t _bits = in.bits & 0x7F;
-    _bits <<= (23-FP8<E>::M());
+    _bits <<= (23-FP8<E>::M);
     out.bits |= _bits;
-    out.f *= exp_p2<127-FP8<E>::E_BIAS()>();
+    out.f *= exp_f2<127-FP8<E>::E_BIAS>();
     return out.f;
 }
 
@@ -93,8 +93,8 @@ static inline void conv(const float* x, bloc_fp8<E, QK>* y, int64_t size) {
         for (int64_t i=0; i<QK; i++) {
             m = std::max(std::abs(x[q*QK+i]),m);
         }
-        const float D = FP8<E>::MAX()/m;
-        y[q].d = m/FP8<E>::MAX();
+        const float D = FP8<E>::MAX/m;
+        y[q].d = m/FP8<E>::MAX;
         for (int64_t i=0; i<QK; i++) {
             y[q].qs[i] = float_to_fp8<E>(x[q*QK+i]*D);
         }

diff --git a/ggml/src/ggml-fp8.h b/ggml/src/ggml-fp8.h
@@ -1,29 +1,27 @@
 // this is more a .inc.
 #ifdef  __cplusplus
 template<int N>
-constexpr float exp_p2() {
-    return exp_p2<N-1>()*2;
-}
-template<int N>
-constexpr float exp_m2() {
-    return exp_m2<N+1>()/2;
-}
-template<int N>
 constexpr int exp_i2() {
     return 1 << N;
 }
-template<> constexpr float exp_p2<0>() { return 1;}
-template<> constexpr float exp_m2<0>() { return 1;}
+
+template<int N>
+constexpr float exp_f2() {
+    if constexpr (N>0) return exp_f2<N-1>()*2;
+    if constexpr (N<0) return exp_f2<N+1>()/2;
+    if constexpr (N==0) return 1.;
+}
+
 
 template<int _E> //, int M=7-E>  1.7 bits!
 struct FP8 {
     uint8_t bits;
     using type = FP8<_E>;
-    static constexpr int E()      { return _E; }
-    static constexpr int M()      { return 7-_E; }
-    static constexpr int E_BIAS() { return exp_i2<_E-1>()-1; }
-    static constexpr float MAX()  { return (2-exp_m2<-M()+1>())*exp_p2<exp_i2<_E-1>()>(); }
-    static constexpr float MIN()  { return exp_m2<-M()>()*exp_m2<2-exp_i2<_E-1>()>(); }
+    static constexpr int E      = _E;
+    static constexpr int M      = (7-_E);
+    static constexpr int E_BIAS = exp_i2<E-1>()-1;
+    static constexpr float MAX  = (2-exp_f2<-M+1>())*exp_f2<exp_i2<E-1>()>();
+    static constexpr float MIN  = exp_f2<-M>()*exp_f2<2-exp_i2<E-1>()>();
 };
 
 extern "C" {