From 8b34494f7b5c85b0a931fe2be0c9471f20b70342 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 10 Mar 2022 11:45:54 +0100 Subject: [PATCH] review updates * Improve documentation * Sort output of binary read Co-authored-by: Marcel Koch Co-authored-by: Pratik Nayak --- benchmark/tools/CMakeLists.txt | 2 +- benchmark/tools/mtx_to_binary.cpp | 11 ++++++++++- core/base/mtx_io.cpp | 12 +++++++++++- core/test/base/mtx_io.cpp | 26 ++++++++++++++------------ include/ginkgo/core/base/mtx_io.hpp | 16 ++++++++++++++-- 5 files changed, 50 insertions(+), 17 deletions(-) diff --git a/benchmark/tools/CMakeLists.txt b/benchmark/tools/CMakeLists.txt index 3bab475d471..0f3671b4779 100644 --- a/benchmark/tools/CMakeLists.txt +++ b/benchmark/tools/CMakeLists.txt @@ -4,4 +4,4 @@ target_compile_definitions(matrix_complex PRIVATE GKO_TOOL_COMPLEX) target_link_libraries(matrix Ginkgo::ginkgo) target_link_libraries(matrix_complex Ginkgo::ginkgo) add_executable(mtx_to_binary mtx_to_binary.cpp) -target_link_libraries(mtx_to_binary Ginkgo::ginkgo) \ No newline at end of file +target_link_libraries(mtx_to_binary Ginkgo::ginkgo) diff --git a/benchmark/tools/mtx_to_binary.cpp b/benchmark/tools/mtx_to_binary.cpp index 8e26ee87b50..4d0421a5cf8 100644 --- a/benchmark/tools/mtx_to_binary.cpp +++ b/benchmark/tools/mtx_to_binary.cpp @@ -78,7 +78,16 @@ void process(const char* input, const char* output, bool validate) int main(int argc, char** argv) { if (argc < 3 || (std::string{argv[1]} == "-v" && argc < 4)) { - std::cerr << "Usage: " << argv[0] << " [-v] [input] [output]\n"; + std::cerr + << "Usage: " << argv[0] + << " [-v] [input] [output]\n" + "Reads the input file in MatrixMarket format and converts it" + "to Ginkgo's binary format.\nWith the optional -v flag, reads " + "the written binary output again and compares it with the " + "original input to validate the conversion.\n" + "The conversion uses a complex value type if necessary, " + "the highest possible value precision and the smallest " + "possible index type.\n"; return 1; } bool validate = std::string{argv[1]} == "-v"; diff --git a/core/base/mtx_io.cpp b/core/base/mtx_io.cpp index 7dada4e06a6..f0ad3ef9ae0 100644 --- a/core/base/mtx_io.cpp +++ b/core/base/mtx_io.cpp @@ -763,8 +763,15 @@ matrix_data read_raw(std::istream& is) } +/** + * Returns the magic number at the beginning of the binary format header for the + * given type parameters. + * + * @tparam ValueType the value type to be used for the binary storage + * @tparam IndexType the index type to be used for the binary storage + */ template -static constexpr uint64_t binary_format_magic() +static constexpr uint64 binary_format_magic() { constexpr auto is_int = std::is_same::value; constexpr auto is_long = std::is_same::value; @@ -856,6 +863,8 @@ matrix_data read_binary_convert(std::istream& is, result.nonzeros[i].row = row; result.nonzeros[i].column = column; } + // sort the entries + result.ensure_row_major_order(); return result; } @@ -866,6 +875,7 @@ matrix_data read_binary_convert(std::istream& is, template matrix_data read_binary_raw(std::istream& is) { + static_assert(sizeof(uint64) == 8, "c++ is broken"); // just to be sure std::array header{}; GKO_CHECK_STREAM(is.read(header.data(), 32), "failed reading header"); uint64 magic{}; diff --git a/core/test/base/mtx_io.cpp b/core/test/base/mtx_io.cpp index 57ab8d5d3ee..49dc25d8750 100644 --- a/core/test/base/mtx_io.cpp +++ b/core/test/base/mtx_io.cpp @@ -405,6 +405,7 @@ std::array build_binary_complex_data() std::memcpy(&int_val, &dbl_val, sizeof(double)); std::memcpy(&neg_int_val, &neg_dbl_val, sizeof(double)); constexpr gko::uint64 shift = 256; + // note: the following data is not sorted! std::array data{ 'G' + shift * ('I' + @@ -426,14 +427,14 @@ std::array build_binary_complex_data() 1, // col int_val, neg_int_val, - 4, // row - 2, // col - 0, - neg_int_val, 16, // row 25, // col 0, - 0}; + 0, + 4, // row + 2, // col + 0, + neg_int_val}; return data; } @@ -447,6 +448,7 @@ std::array build_binary_real_data() std::memcpy(&int_val, &dbl_val, sizeof(double)); std::memcpy(&neg_int_val, &neg_dbl_val, sizeof(double)); constexpr gko::uint64 shift = 256; + // note: the following data is not sorted! std::array data{ 'G' + shift * ('I' + @@ -460,12 +462,12 @@ std::array build_binary_real_data() 64, // num_rows 32, // num_cols 4, // num_entries - 0, // row - 1, // col - 0, // val 1, // row 1, // col int_val, + 0, // row + 1, // col + 0, // val 4, // row 2, // col neg_int_val, @@ -831,8 +833,8 @@ TEST(MtxReader, WritesBinary) gko::matrix_data data; data.size = gko::dim<2>{64, 32}; data.nonzeros.resize(4); - data.nonzeros[0] = {0, 1, 0.0}; - data.nonzeros[1] = {1, 1, 2.5}; + data.nonzeros[0] = {1, 1, 2.5}; + data.nonzeros[1] = {0, 1, 0.0}; data.nonzeros[2] = {4, 2, -2.5}; data.nonzeros[3] = {16, 25, 0.0}; @@ -852,8 +854,8 @@ TEST(MtxReader, WritesComplexBinary) data.nonzeros.resize(4); data.nonzeros[0] = {0, 1, {0.0, 2.5}}; data.nonzeros[1] = {1, 1, {2.5, -2.5}}; - data.nonzeros[2] = {4, 2, {0.0, -2.5}}; - data.nonzeros[3] = {16, 25, {0.0, 0.0}}; + data.nonzeros[2] = {16, 25, {0.0, 0.0}}; + data.nonzeros[3] = {4, 2, {0.0, -2.5}}; gko::write_binary_raw(ss, data); diff --git a/include/ginkgo/core/base/mtx_io.hpp b/include/ginkgo/core/base/mtx_io.hpp index 2bf4d42cfb1..e8a8b1e72cc 100644 --- a/include/ginkgo/core/base/mtx_io.hpp +++ b/include/ginkgo/core/base/mtx_io.hpp @@ -67,6 +67,20 @@ matrix_data read_raw(std::istream& is); * so files from a big endian processor can't be read from a little endian * processor and vice-versa. * + * The binary format has the following structure (in system endianness): + * 1. A 32 byte header consisting of 4 uint64_t values: + * magic = GINKGO__: The highest two bytes stand for value and index type. + * value type: S (float), D (double), + * C (complex), Z(complex) + * index type: I (int32), L (int64) + * num_rows: Number of rows + * num_cols: Number of columns + * num_entries: Number of (row, column, value) tuples to follow + * 2. Following are num_entries blocks of size + * sizeof(IndexType) * 2 + sizeof(ValueType). + * Each consists of a row index stored as IndexType, followed by + * a column index stored as IndexType and a value stored as ValueType. + * * @tparam ValueType type of matrix values * @tparam IndexType type of matrix indexes * @@ -146,7 +160,6 @@ void write_raw(std::ostream& os, const matrix_data& data, * * @param os output stream where the data is to be written * @param data the matrix data to write - * @param layout the layout used in the output * * @note This is an advanced routine that writes the raw matrix data structure. * If you are trying to write an existing matrix, consider using @@ -333,7 +346,6 @@ inline void write( * * @param os output stream where the data is to be written * @param matrix the matrix to write - * @param layout the layout used in the output */ template inline void write_binary(StreamType&& os, MatrixType* matrix)