Skip to content

Commit

Permalink
review updates
Browse files Browse the repository at this point in the history
* Simplify binary magic setup
* Improve documentation
* Sort output of binary read

Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
  • Loading branch information
4 people committed Mar 10, 2022
1 parent 27f45e6 commit 092c57a
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 52 deletions.
2 changes: 1 addition & 1 deletion benchmark/tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ target_compile_definitions(matrix_complex PRIVATE GKO_TOOL_COMPLEX)
target_link_libraries(matrix Ginkgo::ginkgo)
target_link_libraries(matrix_complex Ginkgo::ginkgo)
add_executable(mtx_to_binary mtx_to_binary.cpp)
target_link_libraries(mtx_to_binary Ginkgo::ginkgo)
target_link_libraries(mtx_to_binary Ginkgo::ginkgo)
11 changes: 10 additions & 1 deletion benchmark/tools/mtx_to_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,16 @@ void process(const char* input, const char* output, bool validate)
int main(int argc, char** argv)
{
if (argc < 3 || (std::string{argv[1]} == "-v" && argc < 4)) {
std::cerr << "Usage: " << argv[0] << " [-v] [input] [output]\n";
std::cerr
<< "Usage: " << argv[0]
<< " [-v] [input] [output]\n"
"Reads the input file in MatrixMarket format and converts it"
"to Ginkgo's binary format.\nWith the optional -v flag, reads "
"the written binary output again and compares it with the "
"original input to validate the conversion.\n"
"The conversion uses a complex value type if necessary, "
"the highest possible value precision and the smallest "
"possible index type.\n";
return 1;
}
bool validate = std::string{argv[1]} == "-v";
Expand Down
24 changes: 17 additions & 7 deletions core/base/mtx_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,15 @@ matrix_data<ValueType, IndexType> read_raw(std::istream& is)
}


/**
* Returns the magic number at the beginning of the binary format header for the
* given type parameters.
*
* @tparam ValueType the value type to be used for the binary storage
* @tparam IndexType the index type to be used for the binary storage
*/
template <typename ValueType, typename IndexType>
static constexpr uint64_t binary_format_magic()
static constexpr uint64 binary_format_magic()
{
constexpr auto is_int = std::is_same<IndexType, int32>::value;
constexpr auto is_long = std::is_same<IndexType, int64>::value;
Expand All @@ -781,16 +788,16 @@ static constexpr uint64_t binary_format_magic()
constexpr auto index_bit = is_int ? 'I' : 'L';
constexpr auto value_bit =
is_double ? 'D' : (is_float ? 'S' : (is_complex_double ? 'Z' : 'C'));
constexpr uint64 type_bits = index_bit * 256ull + value_bit;
constexpr uint64 shift = 256;
constexpr uint64 type_bits = index_bit * shift + value_bit;
return 'G' +
256ull *
shift *
('I' +
256ull *
shift *
('N' +
256ull *
shift *
('K' +
256ull *
('G' + 256ull * ('O' + 256ull * type_bits)))));
shift * ('G' + shift * ('O' + shift * type_bits)))));
}


Expand Down Expand Up @@ -856,6 +863,8 @@ matrix_data<ValueType, IndexType> read_binary_convert(std::istream& is,
result.nonzeros[i].row = row;
result.nonzeros[i].column = column;
}
// sort the entries
result.ensure_row_major_order();
return result;
}

Expand All @@ -866,6 +875,7 @@ matrix_data<ValueType, IndexType> read_binary_convert(std::istream& is,
template <typename ValueType, typename IndexType>
matrix_data<ValueType, IndexType> read_binary_raw(std::istream& is)
{
static_assert(sizeof(uint64) == 8, "c++ is broken"); // just to be sure
std::array<char, 32> header{};
GKO_CHECK_STREAM(is.read(header.data(), 32), "failed reading header");
uint64 magic{};
Expand Down
71 changes: 33 additions & 38 deletions core/test/base/mtx_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,17 +404,18 @@ std::array<gko::uint64, 20> build_binary_complex_data()
double neg_dbl_val = -2.5;
std::memcpy(&int_val, &dbl_val, sizeof(double));
std::memcpy(&neg_int_val, &neg_dbl_val, sizeof(double));
constexpr gko::uint64 shift = 256;
// note: the following data is not sorted!
std::array<gko::uint64, 20> data{
'G' + 256ull *
'G' + shift *
('I' +
256ull *
shift *
('N' +
256ull *
shift *
('K' +
256ull *
('G' +
256ull *
('O' + 256ull * ('Z' + 256ull * 'L')))))),
shift * ('G' +
shift * ('O' +
shift * ('Z' + shift * 'L')))))),
64, // num_rows
32, // num_cols
4, // num_entries
Expand All @@ -426,14 +427,14 @@ std::array<gko::uint64, 20> build_binary_complex_data()
1, // col
int_val,
neg_int_val,
4, // row
2, // col
0,
neg_int_val,
16, // row
25, // col
0,
0};
0,
4, // row
2, // col
0,
neg_int_val};
return data;
}

Expand All @@ -446,26 +447,27 @@ std::array<gko::uint64, 16> build_binary_real_data()
double neg_dbl_val = -2.5;
std::memcpy(&int_val, &dbl_val, sizeof(double));
std::memcpy(&neg_int_val, &neg_dbl_val, sizeof(double));
constexpr gko::uint64 shift = 256;
// note: the following data is not sorted!
std::array<gko::uint64, 16> data{
'G' + 256ull *
'G' + shift *
('I' +
256ull *
shift *
('N' +
256ull *
shift *
('K' +
256ull *
('G' +
256ull *
('O' + 256ull * ('D' + 256ull * 'L')))))),
shift * ('G' +
shift * ('O' +
shift * ('D' + shift * 'L')))))),
64, // num_rows
32, // num_cols
4, // num_entries
0, // row
1, // col
0, // val
1, // row
1, // col
int_val,
0, // row
1, // col
0, // val
4, // row
2, // col
neg_int_val,
Expand Down Expand Up @@ -831,8 +833,8 @@ TEST(MtxReader, WritesBinary)
gko::matrix_data<double, gko::int64> data;
data.size = gko::dim<2>{64, 32};
data.nonzeros.resize(4);
data.nonzeros[0] = {0, 1, 0.0};
data.nonzeros[1] = {1, 1, 2.5};
data.nonzeros[0] = {1, 1, 2.5};
data.nonzeros[1] = {0, 1, 0.0};
data.nonzeros[2] = {4, 2, -2.5};
data.nonzeros[3] = {16, 25, 0.0};

Expand All @@ -852,8 +854,8 @@ TEST(MtxReader, WritesComplexBinary)
data.nonzeros.resize(4);
data.nonzeros[0] = {0, 1, {0.0, 2.5}};
data.nonzeros[1] = {1, 1, {2.5, -2.5}};
data.nonzeros[2] = {4, 2, {0.0, -2.5}};
data.nonzeros[3] = {16, 25, {0.0, 0.0}};
data.nonzeros[2] = {16, 25, {0.0, 0.0}};
data.nonzeros[3] = {4, 2, {0.0, -2.5}};

gko::write_binary_raw(ss, data);

Expand Down Expand Up @@ -973,6 +975,7 @@ TYPED_TEST(RealDummyLinOpTest, ReadsGenericLinOpFromBinaryStream)
{
using value_type = typename TestFixture::value_type;
using index_type = typename TestFixture::index_type;
using tpl = typename gko::matrix_data<value_type, index_type>::nonzero_type;
auto raw_data = build_binary_real_data();
std::istringstream iss(std::string{reinterpret_cast<char*>(raw_data.data()),
raw_data.size() * sizeof(gko::uint64)});
Expand All @@ -983,18 +986,10 @@ TYPED_TEST(RealDummyLinOpTest, ReadsGenericLinOpFromBinaryStream)
const auto& data = lin_op->data_;
ASSERT_EQ(data.size, gko::dim<2>(64, 32));
ASSERT_EQ(data.nonzeros.size(), 4);
ASSERT_EQ(data.nonzeros[0].row, 0);
ASSERT_EQ(data.nonzeros[1].row, 1);
ASSERT_EQ(data.nonzeros[2].row, 4);
ASSERT_EQ(data.nonzeros[3].row, 16);
ASSERT_EQ(data.nonzeros[0].column, 1);
ASSERT_EQ(data.nonzeros[1].column, 1);
ASSERT_EQ(data.nonzeros[2].column, 2);
ASSERT_EQ(data.nonzeros[3].column, 25);
ASSERT_EQ(data.nonzeros[0].value, value_type{0.0});
ASSERT_EQ(data.nonzeros[1].value, value_type{2.5});
ASSERT_EQ(data.nonzeros[2].value, value_type{-2.5});
ASSERT_EQ(data.nonzeros[3].value, value_type{0.0});
ASSERT_EQ(data.nonzeros[0], tpl(0, 1, value_type{0.0}));
ASSERT_EQ(data.nonzeros[1], tpl(1, 1, value_type{2.5}));
ASSERT_EQ(data.nonzeros[2], tpl(4, 2, value_type{-2.5}));
ASSERT_EQ(data.nonzeros[3], tpl(16, 25, value_type{0.0}));
}


Expand Down
22 changes: 17 additions & 5 deletions include/ginkgo/core/base/mtx_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace gko {
* @param is input stream from which to read the data
*
* @return A matrix_data structure containing the matrix. The nonzero elements
* are sorted in lexicographic order of their (row, colum) indexes.
* are sorted in lexicographic order of their (row, column) indexes.
*
* @note This is an advanced routine that will return the raw matrix data
* structure. Consider using gko::read instead.
Expand All @@ -67,13 +67,27 @@ matrix_data<ValueType, IndexType> read_raw(std::istream& is);
* so files from a big endian processor can't be read from a little endian
* processor and vice-versa.
*
* The binary format has the following structure (in system endianness):
* 1. A 32 byte header consisting of 4 uint64_t values:
* magic = GINKGO__: The highest two bytes stand for value and index type.
* value type: S (float), D (double),
* C (complex<float>), Z(complex<double>)
* index type: I (int32), L (int64)
* num_rows: Number of rows
* num_cols: Number of columns
* num_entries: Number of (row, column, value) tuples to follow
* 2. Following are num_entries blocks of size
* sizeof(IndexType) * 2 + sizeof(ValueType).
* Each consists of a row index stored as IndexType, followed by
* a column index stored as IndexType and a value stored as ValueType.
*
* @tparam ValueType type of matrix values
* @tparam IndexType type of matrix indexes
*
* @param is input stream from which to read the data
*
* @return A matrix_data structure containing the matrix. The nonzero elements
* are sorted in lexicographic order of their (row, colum) indexes.
* are sorted in lexicographic order of their (row, column) indexes.
*
* @note This is an advanced routine that will return the raw matrix data
* structure. Consider using gko::read_binary instead.
Expand All @@ -92,7 +106,7 @@ matrix_data<ValueType, IndexType> read_binary_raw(std::istream& is);
* @param is input stream from which to read the data
*
* @return A matrix_data structure containing the matrix. The nonzero elements
* are sorted in lexicographic order of their (row, colum) indexes.
* are sorted in lexicographic order of their (row, column) indexes.
*
* @note This is an advanced routine that will return the raw matrix data
* structure. Consider using gko::read_generic instead.
Expand Down Expand Up @@ -146,7 +160,6 @@ void write_raw(std::ostream& os, const matrix_data<ValueType, IndexType>& data,
*
* @param os output stream where the data is to be written
* @param data the matrix data to write
* @param layout the layout used in the output
*
* @note This is an advanced routine that writes the raw matrix data structure.
* If you are trying to write an existing matrix, consider using
Expand Down Expand Up @@ -333,7 +346,6 @@ inline void write(
*
* @param os output stream where the data is to be written
* @param matrix the matrix to write
* @param layout the layout used in the output
*/
template <typename MatrixType, typename StreamType>
inline void write_binary(StreamType&& os, MatrixType* matrix)
Expand Down

0 comments on commit 092c57a

Please sign in to comment.