review updates

* Simplify binary magic setup * Improve documentation * Sort output of binary read Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com> Co-authored-by: Marcel Koch <marcel.koch@kit.edu> Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
ginkgo-project · Mar 10, 2022 · 092c57a · 092c57a
1 parent 27f45e6
commit 092c57a
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 52 deletions.
diff --git a/benchmark/tools/CMakeLists.txt b/benchmark/tools/CMakeLists.txt
@@ -4,4 +4,4 @@ target_compile_definitions(matrix_complex PRIVATE GKO_TOOL_COMPLEX)
 target_link_libraries(matrix Ginkgo::ginkgo)
 target_link_libraries(matrix_complex Ginkgo::ginkgo)
 add_executable(mtx_to_binary mtx_to_binary.cpp)
-target_link_libraries(mtx_to_binary Ginkgo::ginkgo)
+target_link_libraries(mtx_to_binary Ginkgo::ginkgo)
diff --git a/benchmark/tools/mtx_to_binary.cpp b/benchmark/tools/mtx_to_binary.cpp
@@ -78,7 +78,16 @@ void process(const char* input, const char* output, bool validate)
 int main(int argc, char** argv)
 {
     if (argc < 3 || (std::string{argv[1]} == "-v" && argc < 4)) {
-        std::cerr << "Usage: " << argv[0] << " [-v] [input] [output]\n";
+        std::cerr
+            << "Usage: " << argv[0]
+            << " [-v] [input] [output]\n"
+               "Reads the input file in MatrixMarket format and converts it"
+               "to Ginkgo's binary format.\nWith the optional -v flag, reads "
+               "the written binary output again and compares it with the "
+               "original input to validate the conversion.\n"
+               "The conversion uses a complex value type if necessary, "
+               "the highest possible value precision and the smallest "
+               "possible index type.\n";
         return 1;
     }
     bool validate = std::string{argv[1]} == "-v";

diff --git a/core/base/mtx_io.cpp b/core/base/mtx_io.cpp
@@ -763,8 +763,15 @@ matrix_data<ValueType, IndexType> read_raw(std::istream& is)
 }
 
 
+/**
+ * Returns the magic number at the beginning of the binary format header for the
+ * given type parameters.
+ *
+ * @tparam ValueType  the value type to be used for the binary storage
+ * @tparam IndexType  the index type to be used for the binary storage
+ */
 template <typename ValueType, typename IndexType>
-static constexpr uint64_t binary_format_magic()
+static constexpr uint64 binary_format_magic()
 {
     constexpr auto is_int = std::is_same<IndexType, int32>::value;
     constexpr auto is_long = std::is_same<IndexType, int64>::value;
@@ -781,16 +788,16 @@ static constexpr uint64_t binary_format_magic()
     constexpr auto index_bit = is_int ? 'I' : 'L';
     constexpr auto value_bit =
         is_double ? 'D' : (is_float ? 'S' : (is_complex_double ? 'Z' : 'C'));
-    constexpr uint64 type_bits = index_bit * 256ull + value_bit;
+    constexpr uint64 shift = 256;
+    constexpr uint64 type_bits = index_bit * shift + value_bit;
     return 'G' +
-           256ull *
+           shift *
                ('I' +
-                256ull *
+                shift *
                     ('N' +
-                     256ull *
+                     shift *
                          ('K' +
-                          256ull *
-                              ('G' + 256ull * ('O' + 256ull * type_bits)))));
+                          shift * ('G' + shift * ('O' + shift * type_bits)))));
 }
 
 
@@ -856,6 +863,8 @@ matrix_data<ValueType, IndexType> read_binary_convert(std::istream& is,
         result.nonzeros[i].row = row;
         result.nonzeros[i].column = column;
     }
+    // sort the entries
+    result.ensure_row_major_order();
     return result;
 }
 
@@ -866,6 +875,7 @@ matrix_data<ValueType, IndexType> read_binary_convert(std::istream& is,
 template <typename ValueType, typename IndexType>
 matrix_data<ValueType, IndexType> read_binary_raw(std::istream& is)
 {
+    static_assert(sizeof(uint64) == 8, "c++ is broken");  // just to be sure
     std::array<char, 32> header{};
     GKO_CHECK_STREAM(is.read(header.data(), 32), "failed reading header");
     uint64 magic{};

diff --git a/core/test/base/mtx_io.cpp b/core/test/base/mtx_io.cpp
@@ -404,17 +404,18 @@ std::array<gko::uint64, 20> build_binary_complex_data()
     double neg_dbl_val = -2.5;
     std::memcpy(&int_val, &dbl_val, sizeof(double));
     std::memcpy(&neg_int_val, &neg_dbl_val, sizeof(double));
+    constexpr gko::uint64 shift = 256;
+    // note: the following data is not sorted!
     std::array<gko::uint64, 20> data{
-        'G' + 256ull *
+        'G' + shift *
                   ('I' +
-                   256ull *
+                   shift *
                        ('N' +
-                        256ull *
+                        shift *
                             ('K' +
-                             256ull *
-                                 ('G' +
-                                  256ull *
-                                      ('O' + 256ull * ('Z' + 256ull * 'L')))))),
+                             shift * ('G' +
+                                      shift * ('O' +
+                                               shift * ('Z' + shift * 'L')))))),
         64,       // num_rows
         32,       // num_cols
         4,        // num_entries
@@ -426,14 +427,14 @@ std::array<gko::uint64, 20> build_binary_complex_data()
         1,        // col
         int_val,
         neg_int_val,
-        4,  // row
-        2,  // col
-        0,
-        neg_int_val,
         16,  // row
         25,  // col
         0,
-        0};
+        0,
+        4,  // row
+        2,  // col
+        0,
+        neg_int_val};
     return data;
 }
 
@@ -446,26 +447,27 @@ std::array<gko::uint64, 16> build_binary_real_data()
     double neg_dbl_val = -2.5;
     std::memcpy(&int_val, &dbl_val, sizeof(double));
     std::memcpy(&neg_int_val, &neg_dbl_val, sizeof(double));
+    constexpr gko::uint64 shift = 256;
+    // note: the following data is not sorted!
     std::array<gko::uint64, 16> data{
-        'G' + 256ull *
+        'G' + shift *
                   ('I' +
-                   256ull *
+                   shift *
                        ('N' +
-                        256ull *
+                        shift *
                             ('K' +
-                             256ull *
-                                 ('G' +
-                                  256ull *
-                                      ('O' + 256ull * ('D' + 256ull * 'L')))))),
+                             shift * ('G' +
+                                      shift * ('O' +
+                                               shift * ('D' + shift * 'L')))))),
         64,  // num_rows
         32,  // num_cols
         4,   // num_entries
-        0,   // row
-        1,   // col
-        0,   // val
         1,   // row
         1,   // col
         int_val,
+        0,  // row
+        1,  // col
+        0,  // val
         4,  // row
         2,  // col
         neg_int_val,
@@ -831,8 +833,8 @@ TEST(MtxReader, WritesBinary)
     gko::matrix_data<double, gko::int64> data;
     data.size = gko::dim<2>{64, 32};
     data.nonzeros.resize(4);
-    data.nonzeros[0] = {0, 1, 0.0};
-    data.nonzeros[1] = {1, 1, 2.5};
+    data.nonzeros[0] = {1, 1, 2.5};
+    data.nonzeros[1] = {0, 1, 0.0};
     data.nonzeros[2] = {4, 2, -2.5};
     data.nonzeros[3] = {16, 25, 0.0};
 
@@ -852,8 +854,8 @@ TEST(MtxReader, WritesComplexBinary)
     data.nonzeros.resize(4);
     data.nonzeros[0] = {0, 1, {0.0, 2.5}};
     data.nonzeros[1] = {1, 1, {2.5, -2.5}};
-    data.nonzeros[2] = {4, 2, {0.0, -2.5}};
-    data.nonzeros[3] = {16, 25, {0.0, 0.0}};
+    data.nonzeros[2] = {16, 25, {0.0, 0.0}};
+    data.nonzeros[3] = {4, 2, {0.0, -2.5}};
 
     gko::write_binary_raw(ss, data);
 
@@ -973,6 +975,7 @@ TYPED_TEST(RealDummyLinOpTest, ReadsGenericLinOpFromBinaryStream)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
+    using tpl = typename gko::matrix_data<value_type, index_type>::nonzero_type;
     auto raw_data = build_binary_real_data();
     std::istringstream iss(std::string{reinterpret_cast<char*>(raw_data.data()),
                                        raw_data.size() * sizeof(gko::uint64)});
@@ -983,18 +986,10 @@ TYPED_TEST(RealDummyLinOpTest, ReadsGenericLinOpFromBinaryStream)
     const auto& data = lin_op->data_;
     ASSERT_EQ(data.size, gko::dim<2>(64, 32));
     ASSERT_EQ(data.nonzeros.size(), 4);
-    ASSERT_EQ(data.nonzeros[0].row, 0);
-    ASSERT_EQ(data.nonzeros[1].row, 1);
-    ASSERT_EQ(data.nonzeros[2].row, 4);
-    ASSERT_EQ(data.nonzeros[3].row, 16);
-    ASSERT_EQ(data.nonzeros[0].column, 1);
-    ASSERT_EQ(data.nonzeros[1].column, 1);
-    ASSERT_EQ(data.nonzeros[2].column, 2);
-    ASSERT_EQ(data.nonzeros[3].column, 25);
-    ASSERT_EQ(data.nonzeros[0].value, value_type{0.0});
-    ASSERT_EQ(data.nonzeros[1].value, value_type{2.5});
-    ASSERT_EQ(data.nonzeros[2].value, value_type{-2.5});
-    ASSERT_EQ(data.nonzeros[3].value, value_type{0.0});
+    ASSERT_EQ(data.nonzeros[0], tpl(0, 1, value_type{0.0}));
+    ASSERT_EQ(data.nonzeros[1], tpl(1, 1, value_type{2.5}));
+    ASSERT_EQ(data.nonzeros[2], tpl(4, 2, value_type{-2.5}));
+    ASSERT_EQ(data.nonzeros[3], tpl(16, 25, value_type{0.0}));
 }
 
 

diff --git a/include/ginkgo/core/base/mtx_io.hpp b/include/ginkgo/core/base/mtx_io.hpp
@@ -52,7 +52,7 @@ namespace gko {
  * @param is  input stream from which to read the data
  *
  * @return A matrix_data structure containing the matrix. The nonzero elements
- *         are sorted in lexicographic order of their (row, colum) indexes.
+ *         are sorted in lexicographic order of their (row, column) indexes.
  *
  * @note This is an advanced routine that will return the raw matrix data
  *       structure. Consider using gko::read instead.
@@ -67,13 +67,27 @@ matrix_data<ValueType, IndexType> read_raw(std::istream& is);
  * so files from a big endian processor can't be read from a little endian
  * processor and vice-versa.
  *
+ * The binary format has the following structure (in system endianness):
+ * 1. A 32 byte header consisting of 4 uint64_t values:
+ *    magic = GINKGO__: The highest two bytes stand for value and index type.
+ *                      value type: S (float), D (double),
+ *                                  C (complex<float>), Z(complex<double>)
+ *                      index type: I (int32), L (int64)
+ *    num_rows: Number of rows
+ *    num_cols: Number of columns
+ *    num_entries: Number of (row, column, value) tuples to follow
+ * 2. Following are num_entries blocks of size
+ *    sizeof(IndexType) * 2 + sizeof(ValueType).
+ *    Each consists of a row index stored as IndexType, followed by
+ *    a column index stored as IndexType and a value stored as ValueType.
+ *
  * @tparam ValueType  type of matrix values
  * @tparam IndexType  type of matrix indexes
  *
  * @param is  input stream from which to read the data
  *
  * @return A matrix_data structure containing the matrix. The nonzero elements
- *         are sorted in lexicographic order of their (row, colum) indexes.
+ *         are sorted in lexicographic order of their (row, column) indexes.
  *
  * @note This is an advanced routine that will return the raw matrix data
  *       structure. Consider using gko::read_binary instead.
@@ -92,7 +106,7 @@ matrix_data<ValueType, IndexType> read_binary_raw(std::istream& is);
  * @param is  input stream from which to read the data
  *
  * @return A matrix_data structure containing the matrix. The nonzero elements
- *         are sorted in lexicographic order of their (row, colum) indexes.
+ *         are sorted in lexicographic order of their (row, column) indexes.
  *
  * @note This is an advanced routine that will return the raw matrix data
  *       structure. Consider using gko::read_generic instead.
@@ -146,7 +160,6 @@ void write_raw(std::ostream& os, const matrix_data<ValueType, IndexType>& data,
  *
  * @param os  output stream where the data is to be written
  * @param data  the matrix data to write
- * @param layout  the layout used in the output
  *
  * @note This is an advanced routine that writes the raw matrix data structure.
  *       If you are trying to write an existing matrix, consider using
@@ -333,7 +346,6 @@ inline void write(
  *
  * @param os  output stream where the data is to be written
  * @param matrix  the matrix to write
- * @param layout  the layout used in the output
  */
 template <typename MatrixType, typename StreamType>
 inline void write_binary(StreamType&& os, MatrixType* matrix)