From 7438eb20b2ce527edb3c3fbc83e3007629a2993e Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 17:49:05 -0600 Subject: [PATCH 1/5] add hach checking for params --- src/outputs/output_utils.cpp | 40 +++++++++++++-- src/outputs/output_utils.hpp | 4 ++ src/outputs/parthenon_hdf5.cpp | 3 ++ src/parameter_input.hpp | 44 +++++++++++++++++ src/utils/hash.hpp | 11 +++-- tst/unit/CMakeLists.txt | 2 +- ...d_desired.cpp => test_parameter_input.cpp} | 49 +++++++++++++++++++ 7 files changed, 145 insertions(+), 8 deletions(-) rename tst/unit/{test_required_desired.cpp => test_parameter_input.cpp} (71%) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 8b9d8478cfde..4c3c46fee0b6 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -30,6 +30,7 @@ #include "mesh/mesh_refinement.hpp" #include "mesh/meshblock.hpp" #include "outputs/output_utils.hpp" +#include "parameter_input.hpp" namespace parthenon { namespace OutputUtils { @@ -306,21 +307,52 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { #endif // MPI_PARALLEL return out; } -std::size_t MPISum(std::size_t val) { +constexpr void CheckMPISizeT() { #ifdef MPI_PARALLEL - // Need to use sizeof here because unsigned long long and unsigned - // long are identical under the hood but registered as different - // types static_assert(std::is_integral::value && !std::is_signed::value, "size_t is unsigned and integral"); static_assert(sizeof(std::size_t) == sizeof(unsigned long long int), "MPI_UNSIGNED_LONG_LONG same as size_t"); + +#endif +} +std::size_t MPISum(std::size_t val) { +#ifdef MPI_PARALLEL + // Need to use sizeof here because unsigned long long and unsigned + // long are identical under the hood but registered as different + // types + CheckMPISizeT(); PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &val, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD)); #endif return val; } +void CheckParameterInputConsistent(ParameterInput *pin) { +#ifdef MPI_PARALLEL + CheckMPISizeT(); + + std::size_t pin_hash = std::hash()(*pin); + std::size_t pin_hash_root = pin_hash; + PARTHENON_MPI_CHECK( + MPI_Bcast(&pin_hash_root, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD)); + + int is_same_local = (pin_hash == pin_hash_root); + int pinput_same_accross_ranks; + PARTHENON_MPI_CHECK(MPI_Reduce(&is_same_local, &pinput_same_accross_ranks, 1, MPI_INT, + MPI_LAND, 0, MPI_COMM_WORLD)); + if (Globals::my_rank == 0) { + PARTHENON_REQUIRE_THROWS( + pinput_same_accross_ranks, + "Parameter input object must be the same on every rank, otherwise I/O may " + "be\n\t\t" + "unable to write it safely. If you reached this error message, look to make " + "sure\n\t\t" + "that your calls to functions that look like pin->GetOrAdd are all called\n\t\t" + "exactly the same way on every MPI rank."); + } +#endif // MPI_PARALLEL +} } // namespace OutputUtils } // namespace parthenon diff --git a/src/outputs/output_utils.hpp b/src/outputs/output_utils.hpp index cc65dc63f7a2..5f95fbbaedc2 100644 --- a/src/outputs/output_utils.hpp +++ b/src/outputs/output_utils.hpp @@ -43,6 +43,9 @@ #include "utils/error_checking.hpp" namespace parthenon { +// forward declaration +class ParameterInput; + namespace OutputUtils { // Helper struct containing some information about a variable struct VarInfo { @@ -348,6 +351,7 @@ std::vector ComputeDerefinementCount(Mesh *pm); std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count); std::size_t MPISum(std::size_t local); +void CheckParameterInputConsistent(ParameterInput *pin); } // namespace OutputUtils } // namespace parthenon diff --git a/src/outputs/parthenon_hdf5.cpp b/src/outputs/parthenon_hdf5.cpp index 7872e2ad4d03..6cb8c9a84955 100644 --- a/src/outputs/parthenon_hdf5.cpp +++ b/src/outputs/parthenon_hdf5.cpp @@ -72,6 +72,9 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm Kokkos::Profiling::pushRegion("PHDF5::WriteOutputFileRealPrec"); } + // Check that the parameter input is safe to write to HDF5 + OutputUtils::CheckParameterInputConsistent(pin); + // writes all graphics variables to hdf file // HDF5 structures // Also writes companion xdmf file diff --git a/src/parameter_input.hpp b/src/parameter_input.hpp index 31c45dee050d..5379f5587dc4 100644 --- a/src/parameter_input.hpp +++ b/src/parameter_input.hpp @@ -31,6 +31,7 @@ #include "config.hpp" #include "defs.hpp" #include "outputs/io_wrapper.hpp" +#include "utils/hash.hpp" #include "utils/string_utils.hpp" namespace parthenon { @@ -74,6 +75,8 @@ class InputBlock { // Functions are implemented in parameter_input.cpp class ParameterInput { + friend class std::hash; + public: // constructor/destructor ParameterInput(); @@ -213,4 +216,45 @@ class ParameterInput { } }; } // namespace parthenon + +// JMM: Believe it or not, this is the recommended way to overload hash functions +// See: https://en.cppreference.com/w/cpp/utility/hash +namespace std { +template <> +struct hash { + std::size_t operator()(const parthenon::InputLine &il) { + return parthenon::impl::hash_combine(0, il.param_name, il.param_value, + il.param_comment); + } +}; + +template <> +struct hash { + std::size_t operator()(const parthenon::InputBlock &ib) { + using parthenon::impl::hash_combine; + std::size_t out = + hash_combine(0, ib.block_name, ib.max_len_parname, ib.max_len_parvalue); + for (parthenon::InputLine *pline = ib.pline; pline != nullptr; pline = pline->pnext) { + out = hash_combine(out, *pline); + } + return out; + } +}; + +template <> +struct hash { + std::size_t operator()(const parthenon::ParameterInput &in) { + using parthenon::InputBlock; + using parthenon::impl::hash_combine; + std::size_t out = 0; + out = hash_combine(out, in.last_filename_); + for (InputBlock *pblock = in.pfirst_block; pblock != nullptr; + pblock = pblock->pnext) { + out = hash_combine(out, *pblock); + } + return out; + } +}; +} // namespace std + #endif // PARAMETER_INPUT_HPP_ diff --git a/src/utils/hash.hpp b/src/utils/hash.hpp index 2f6592e3baa4..be28f197b6d3 100644 --- a/src/utils/hash.hpp +++ b/src/utils/hash.hpp @@ -20,15 +20,20 @@ #include #include +#include namespace parthenon { namespace impl { -template -std::size_t hash_combine(std::size_t lhs, const T &v) { +template +std::size_t hash_combine(std::size_t lhs, const T &v, Rest &&...rest) { std::size_t rhs = std::hash()(v); // The boost hash combine function lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); - return lhs; + if constexpr (sizeof...(Rest) > 0) { + return hash_combine(lhs, std::forward(rest)...); + } else { + return lhs; + } } template ::value - 1> diff --git a/tst/unit/CMakeLists.txt b/tst/unit/CMakeLists.txt index b44e6a7986da..c892572bdde9 100644 --- a/tst/unit/CMakeLists.txt +++ b/tst/unit/CMakeLists.txt @@ -36,7 +36,7 @@ list(APPEND unit_tests_SOURCES test_pararrays.cpp test_sparse_pack.cpp test_swarm.cpp - test_required_desired.cpp + test_parameter_input.cpp test_error_checking.cpp test_partitioning.cpp test_state_descriptor.cpp diff --git a/tst/unit/test_required_desired.cpp b/tst/unit/test_parameter_input.cpp similarity index 71% rename from tst/unit/test_required_desired.cpp rename to tst/unit/test_parameter_input.cpp index 407ecb4b390a..38a2e1b9d901 100644 --- a/tst/unit/test_required_desired.cpp +++ b/tst/unit/test_parameter_input.cpp @@ -101,3 +101,52 @@ TEST_CASE("Test required/desired checking from inputs", "[ParameterInput]") { } } } + +TEST_CASE("Parameter inputs can be hashed and hashing provides useful sanity checks", + "[ParameterInput][Hash]") { + GIVEN("Two ParameterInput objects already populated") { + ParameterInput in1, in2; + std::hash hasher; + std::stringstream ss; + ss << "" << std::endl + << "var1 = 0 # comment" << std::endl + << "var2 = 1, & # another comment" << std::endl + << " 2" << std::endl + << "" << std::endl + << "var3 = 3" << std::endl + << "# comment" << std::endl + << "var4 = 4" << std::endl; + + // JMM: streams are stateful. Need to be very careful here. + std::string ideck = ss.str(); + std::istringstream s1(ideck); + std::istringstream s2(ideck); + in1.LoadFromStream(s1); + in2.LoadFromStream(s2); + + WHEN("We hash these parameter inputs") { + std::size_t hash1 = hasher(in1); + std::size_t hash2 = hasher(in2); + THEN("The hashes agree") { REQUIRE(hash1 == hash2); } + + AND_WHEN("We modify both parameter inputs in the same way") { + in1.GetOrAddReal("block3", "var5", 2.0); + in2.GetOrAddReal("block3", "var5", 2.0); + THEN("The hashes agree") { + std::size_t hash1 = hasher(in1); + std::size_t hash2 = hasher(in2); + REQUIRE(hash1 == hash2); + + AND_WHEN("When we modify one input but not the other") { + in2.GetOrAddInteger("block3", "var6", 7); + THEN("The hashes will not agree") { + std::size_t hash1 = hasher(in1); + std::size_t hash2 = hasher(in2); + REQUIRE(hash1 != hash2); + } + } + } + } + } + } +} From 0a89ba42e97b7d72d18dd719c9ea7e4131ffe242 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 17:54:44 -0600 Subject: [PATCH 2/5] changelog --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c37f63c6d2d3..ad620f69d70a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,16 +5,14 @@ ### Added (new features/APIs/variables/...) - [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option - ### Changed (changing behavior/API/variables/...) - [[PR1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls ### Fixed (not changing behavior/API/variables/...) - +- [[PR1173]](https://github.com/parthenon-hpc-lab/parthenon/pull/1173) Make debugging easier by making parthenon throw an error if ParameterInput is different on multiple MPI ranks. ### Infrastructure (changes irrelevant to downstream codes) - ### Removed (removing behavior/API/varaibles/...) From a80e91e40fe26dcb879e39dcc184a5f64684bc57 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 17:55:36 -0600 Subject: [PATCH 3/5] CC --- src/parameter_input.hpp | 2 +- src/utils/hash.hpp | 2 +- tst/unit/test_parameter_input.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parameter_input.hpp b/src/parameter_input.hpp index 5379f5587dc4..cb23c1c2cc9e 100644 --- a/src/parameter_input.hpp +++ b/src/parameter_input.hpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/src/utils/hash.hpp b/src/utils/hash.hpp index be28f197b6d3..77642a64dba3 100644 --- a/src/utils/hash.hpp +++ b/src/utils/hash.hpp @@ -3,7 +3,7 @@ // Copyright(C) 2022 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2022-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/tst/unit/test_parameter_input.cpp b/tst/unit/test_parameter_input.cpp index 38a2e1b9d901..b6f03008eb2e 100644 --- a/tst/unit/test_parameter_input.cpp +++ b/tst/unit/test_parameter_input.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC From 94188156d8ceb9c71be833c300d03df4649231bd Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 18:00:00 -0600 Subject: [PATCH 4/5] move comment --- src/outputs/output_utils.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 4c3c46fee0b6..9714291293c2 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -309,6 +309,9 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { } constexpr void CheckMPISizeT() { #ifdef MPI_PARALLEL + // Need to use sizeof here because unsigned long long and unsigned + // long are identical under the hood but registered as different + // types static_assert(std::is_integral::value && !std::is_signed::value, "size_t is unsigned and integral"); @@ -319,9 +322,6 @@ constexpr void CheckMPISizeT() { } std::size_t MPISum(std::size_t val) { #ifdef MPI_PARALLEL - // Need to use sizeof here because unsigned long long and unsigned - // long are identical under the hood but registered as different - // types CheckMPISizeT(); PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &val, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD)); From 693b437b4ed5dd293b9019eda9d168522c96660f Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 12 Sep 2024 09:30:18 -0600 Subject: [PATCH 5/5] remove extraneous reduce --- src/outputs/output_utils.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 9714291293c2..9995869b8138 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -337,21 +337,14 @@ void CheckParameterInputConsistent(ParameterInput *pin) { std::size_t pin_hash_root = pin_hash; PARTHENON_MPI_CHECK( MPI_Bcast(&pin_hash_root, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD)); - - int is_same_local = (pin_hash == pin_hash_root); - int pinput_same_accross_ranks; - PARTHENON_MPI_CHECK(MPI_Reduce(&is_same_local, &pinput_same_accross_ranks, 1, MPI_INT, - MPI_LAND, 0, MPI_COMM_WORLD)); - if (Globals::my_rank == 0) { - PARTHENON_REQUIRE_THROWS( - pinput_same_accross_ranks, - "Parameter input object must be the same on every rank, otherwise I/O may " - "be\n\t\t" - "unable to write it safely. If you reached this error message, look to make " - "sure\n\t\t" - "that your calls to functions that look like pin->GetOrAdd are all called\n\t\t" - "exactly the same way on every MPI rank."); - } + PARTHENON_REQUIRE_THROWS( + pin_hash == pin_hash_root, + "Parameter input object must be the same on every rank, otherwise I/O may " + "be\n\t\t" + "unable to write it safely. If you reached this error message, look to make " + "sure\n\t\t" + "that your calls to functions that look like pin->GetOrAdd are all called\n\t\t" + "exactly the same way on every MPI rank."); #endif // MPI_PARALLEL } } // namespace OutputUtils