Finished implementation to export a net to File, at the moment it is …

…working only for fully connected layers, added tutorial, added Mish activation function, changed some names to methods that were not consistent with naming convention
yixuan · Dec 2, 2019 · a405bd0 · a405bd0
1 parent a1c0ee2
commit a405bd0
Show file tree

Hide file tree

Showing 22 changed files with 965 additions and 239 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@ Release/*
 .cproject
 .project
 .settings/*
+*.orig
 
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -0,0 +1,7 @@
+Authors
+=======
+
+The authors and developers of MiniDNN are:
+ - [Yixuan Qiu](https://statr.me/about/) (<yixuanq@gmail.com>)
+ - [Giovanni Stabile](https://www.giovannistabile.com/) (<gstabile@sissa.it>)
+
diff --git a/include/Activation/Identity.h b/include/Activation/Identity.h
@@ -40,6 +40,11 @@ class Identity
         {
             G.noalias() = F;
         }
+
+        static std::string return_type()
+        {
+            return "Identity";
+        }
 };
 
 

diff --git a/include/Activation/Mish.h b/include/Activation/Mish.h
@@ -0,0 +1,59 @@
+#ifndef ACTIVATION_MISH_H_
+#define ACTIVATION_MISH_H_
+
+#include <Eigen/Core>
+#include "../Config.h"
+
+namespace MiniDNN
+{
+
+
+///
+/// \ingroup Activations
+///
+/// The Mish activation function
+///
+/// from : https://arxiv.org/abs/1908.08681
+///
+class Mish
+{
+    private:
+        typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
+
+    public:
+        // a = activation(z) = max(z, 0)
+        // Z = [z1, ..., zn], A = [a1, ..., an], n observations
+        static inline void activate(const Matrix& Z, Matrix& A)
+        {
+            A.array() = Z.array() * ((((Z.array()).exp()).log1p()).tanh());
+        }
+
+        // Apply the Jacobian matrix J to a vector f
+        // J = d_a / d_z = diag(sign(a)) = diag(a > 0)
+        // g = J * f = (a > 0) .* f
+        // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
+        // Note: When entering this function, Z and G may point to the same matrix
+        static inline void apply_jacobian(const Matrix& Z, const Matrix& A,
+                                          const Matrix& F, Matrix& G)
+        {
+            Matrix tempSoftplus;
+            Matrix tempSech;
+            Matrix ex;
+            ex.array() = Z.array().exp();
+            tempSoftplus.array() = ex.array().log1p();
+            tempSech.array() = Scalar(1) / (tempSoftplus.array().cosh());
+            G.array() = tempSoftplus.array().tanh() + Z.array() * ex.array() *
+                        tempSech.array() * (tempSech.array() / (Scalar(1) + ex.array())) * F.array();
+        }
+
+        static std::string return_type()
+        {
+            return "Mish";
+        }
+};
+
+
+} // namespace MiniDNN
+
+
+#endif /* ACTIVATION_MISH_H_ */
diff --git a/include/Activation/ReLU.h b/include/Activation/ReLU.h
@@ -36,6 +36,11 @@ class ReLU
         {
             G.array() = (A.array() > Scalar(0)).select(F, Scalar(0));
         }
+
+        static std::string return_type()
+        {
+            return "ReLU";
+        }
 };
 
 

diff --git a/include/Activation/Sigmoid.h b/include/Activation/Sigmoid.h
@@ -36,6 +36,11 @@ class Sigmoid
         {
             G.array() = A.array() * (Scalar(1) - A.array()) * F.array();
         }
+
+        static std::string return_type()
+        {
+            return "Sigmoid";
+        }
 };
 
 

diff --git a/include/Activation/Softmax.h b/include/Activation/Softmax.h
@@ -37,6 +37,11 @@ class Softmax
             RowArray a_dot_f = A.cwiseProduct(F).colwise().sum();
             G.array() = A.array() * (F.array().rowwise() - a_dot_f);
         }
+
+        static std::string return_type()
+        {
+            return "Softmax";
+        }
 };
 
 

diff --git a/include/Layer.h b/include/Layer.h
@@ -72,6 +72,13 @@ class Layer
         /// \param rng   The random number generator of type RNG.
         virtual void init(const Scalar& mu, const Scalar& sigma, RNG& rng) = 0;
 
+        ///
+        /// Initialize layer parameters using without distribution, used just when the layer is read from file
+        ///
+        virtual void init() = 0;
+
+
+
         ///
         /// Compute the output of this layer
         ///
@@ -152,6 +159,22 @@ class Layer
         /// Get serialized values of the gradient of parameters
         ///
         virtual std::vector<Scalar> get_derivatives() const = 0;
+
+        ///
+        /// @brief      Return the layer type, useful to export the NN model
+        ///
+        /// @return     Type of the layer
+        ///
+        virtual std::string layer_type() const = 0;
+
+        ///
+        /// @brief      Return the activation type, useful to export the NN model
+        ///
+        /// @return     Type of the activation type
+        ///
+        virtual std::string activation_type() const = 0;
+
+
 };
 
 

diff --git a/include/Layer/Convolutional.h b/include/Layer/Convolutional.h
@@ -82,6 +82,12 @@ class Convolutional: public Layer
             internal::set_normal_random(m_bias.data(), m_dim.out_channels, rng, mu, sigma);
         }
 
+        void init()
+        {
+            M_assert(1 = 2,
+                     "At the moment the readNet method is implemented only for fully connected layers!!");
+        }
+
         // http://cs231n.github.io/convolutional-networks/
         void forward(const Matrix& prev_layer_data)
         {
@@ -211,6 +217,17 @@ class Convolutional: public Layer
                       res.begin() + m_df_data.size());
             return res;
         }
+
+        std::string layer_type() const
+        {
+            return "Convolutional";
+        }
+
+        std::string activation_type() const
+        {
+            return Activation::return_type();
+        }
+
 };
 
 

diff --git a/include/Layer/FullyConnected.h b/include/Layer/FullyConnected.h
@@ -3,6 +3,7 @@
 
 #include <Eigen/Core>
 #include <vector>
+#include <iostream>
 #include <stdexcept>
 #include "../Config.h"
 #include "../Layer.h"
@@ -57,6 +58,14 @@ class FullyConnected: public Layer
             internal::set_normal_random(m_bias.data(), m_bias.size(), rng, mu, sigma);
         }
 
+        void init()
+        {
+            m_weight.resize(this->m_in_size, this->m_out_size);
+            m_bias.resize(this->m_out_size);
+            m_dw.resize(this->m_in_size, this->m_out_size);
+            m_db.resize(this->m_out_size);
+        }
+
         // prev_layer_data: in_size x nobs
         void forward(const Matrix& prev_layer_data)
         {
@@ -140,6 +149,16 @@ class FullyConnected: public Layer
             std::copy(m_db.data(), m_db.data() + m_db.size(), res.begin() + m_dw.size());
             return res;
         }
+
+        std::string layer_type() const
+        {
+            return "FullyConnected";
+        }
+
+        std::string activation_type() const
+        {
+            return Activation::return_type();
+        }
 };
 
 

diff --git a/include/Layer/MaxPooling.h b/include/Layer/MaxPooling.h
@@ -65,6 +65,10 @@ class MaxPooling: public Layer
 
         void init(const Scalar& mu, const Scalar& sigma, RNG& rng) {}
 
+        void init() {}
+
+
+
         void forward(const Matrix& prev_layer_data)
         {
             // Each column is an observation
@@ -166,6 +170,16 @@ class MaxPooling: public Layer
         {
             return std::vector<Scalar>();
         }
+
+        std::string layer_type() const
+        {
+            return "MaxPooling";
+        }
+
+        std::string activation_type() const
+        {
+            return Activation::return_type();
+        }
 };
 
 

diff --git a/include/MiniDNN.h b/include/MiniDNN.h
@@ -13,6 +13,7 @@
 #include "Layer/MaxPooling.h"
 
 #include "Activation/ReLU.h"
+#include "Activation/Mish.h"
 #include "Activation/Identity.h"
 #include "Activation/Sigmoid.h"
 #include "Activation/Softmax.h"
@@ -31,6 +32,8 @@
 #include "Callback.h"
 #include "Callback/VerboseCallback.h"
 
+#include "Utils/MiniDNNStream.h"
+
 #include "Network.h"
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,6 +40,11 @@ class Identity @@
             {
                 G.noalias() = F;
             }
+            static std::string return_type()
+            {
+                return "Identity";
+            }
     };
@@ Expand Down @@