Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sanity check so ParameterInput is not allowed to be different on different MPI ranks #1173

Merged
merged 6 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
### Added (new features/APIs/variables/...)
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option


### Changed (changing behavior/API/variables/...)
- [[PR1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls

### Fixed (not changing behavior/API/variables/...)

- [[PR1173]](https://github.com/parthenon-hpc-lab/parthenon/pull/1173) Make debugging easier by making parthenon throw an error if ParameterInput is different on multiple MPI ranks.

### Infrastructure (changes irrelevant to downstream codes)


### Removed (removing behavior/API/varaibles/...)


Expand Down
34 changes: 33 additions & 1 deletion src/outputs/output_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "mesh/mesh_refinement.hpp"
#include "mesh/meshblock.hpp"
#include "outputs/output_utils.hpp"
#include "parameter_input.hpp"

namespace parthenon {
namespace OutputUtils {
Expand Down Expand Up @@ -306,7 +307,7 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) {
#endif // MPI_PARALLEL
return out;
}
std::size_t MPISum(std::size_t val) {
constexpr void CheckMPISizeT() {
#ifdef MPI_PARALLEL
// Need to use sizeof here because unsigned long long and unsigned
// long are identical under the hood but registered as different
Expand All @@ -316,11 +317,42 @@ std::size_t MPISum(std::size_t val) {
"size_t is unsigned and integral");
static_assert(sizeof(std::size_t) == sizeof(unsigned long long int),
"MPI_UNSIGNED_LONG_LONG same as size_t");

#endif
}
std::size_t MPISum(std::size_t val) {
#ifdef MPI_PARALLEL
CheckMPISizeT();
PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &val, 1, MPI_UNSIGNED_LONG_LONG,
MPI_SUM, MPI_COMM_WORLD));
#endif
return val;
}

void CheckParameterInputConsistent(ParameterInput *pin) {
#ifdef MPI_PARALLEL
CheckMPISizeT();

std::size_t pin_hash = std::hash<ParameterInput>()(*pin);
std::size_t pin_hash_root = pin_hash;
PARTHENON_MPI_CHECK(
MPI_Bcast(&pin_hash_root, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD));

int is_same_local = (pin_hash == pin_hash_root);
int pinput_same_accross_ranks;
PARTHENON_MPI_CHECK(MPI_Reduce(&is_same_local, &pinput_same_accross_ranks, 1, MPI_INT,
MPI_LAND, 0, MPI_COMM_WORLD));
if (Globals::my_rank == 0) {
PARTHENON_REQUIRE_THROWS(
pinput_same_accross_ranks,
"Parameter input object must be the same on every rank, otherwise I/O may "
"be\n\t\t"
"unable to write it safely. If you reached this error message, look to make "
"sure\n\t\t"
"that your calls to functions that look like pin->GetOrAdd are all called\n\t\t"
"exactly the same way on every MPI rank.");
}
Yurlungur marked this conversation as resolved.
Show resolved Hide resolved
#endif // MPI_PARALLEL
}
} // namespace OutputUtils
} // namespace parthenon
4 changes: 4 additions & 0 deletions src/outputs/output_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
#include "utils/error_checking.hpp"

namespace parthenon {
// forward declaration
class ParameterInput;

namespace OutputUtils {
// Helper struct containing some information about a variable
struct VarInfo {
Expand Down Expand Up @@ -348,6 +351,7 @@ std::vector<int> ComputeDerefinementCount(Mesh *pm);
std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count);
std::size_t MPISum(std::size_t local);

void CheckParameterInputConsistent(ParameterInput *pin);
} // namespace OutputUtils
} // namespace parthenon

Expand Down
3 changes: 3 additions & 0 deletions src/outputs/parthenon_hdf5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm
Kokkos::Profiling::pushRegion("PHDF5::WriteOutputFileRealPrec");
}

// Check that the parameter input is safe to write to HDF5
OutputUtils::CheckParameterInputConsistent(pin);

// writes all graphics variables to hdf file
// HDF5 structures
// Also writes companion xdmf file
Expand Down
46 changes: 45 additions & 1 deletion src/parameter_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Copyright(C) 2014 James M. Stone <jmstone@princeton.edu> and other code contributors
// Licensed under the 3-clause BSD License, see LICENSE file for details
//========================================================================================
// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved.
// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand Down Expand Up @@ -31,6 +31,7 @@
#include "config.hpp"
#include "defs.hpp"
#include "outputs/io_wrapper.hpp"
#include "utils/hash.hpp"
#include "utils/string_utils.hpp"

namespace parthenon {
Expand Down Expand Up @@ -74,6 +75,8 @@ class InputBlock {
// Functions are implemented in parameter_input.cpp

class ParameterInput {
friend class std::hash<ParameterInput>;

public:
// constructor/destructor
ParameterInput();
Expand Down Expand Up @@ -213,4 +216,45 @@ class ParameterInput {
}
};
} // namespace parthenon

// JMM: Believe it or not, this is the recommended way to overload hash functions
// See: https://en.cppreference.com/w/cpp/utility/hash
namespace std {
template <>
struct hash<parthenon::InputLine> {
std::size_t operator()(const parthenon::InputLine &il) {
return parthenon::impl::hash_combine(0, il.param_name, il.param_value,
il.param_comment);
}
};

template <>
struct hash<parthenon::InputBlock> {
std::size_t operator()(const parthenon::InputBlock &ib) {
using parthenon::impl::hash_combine;
std::size_t out =
hash_combine(0, ib.block_name, ib.max_len_parname, ib.max_len_parvalue);
for (parthenon::InputLine *pline = ib.pline; pline != nullptr; pline = pline->pnext) {
out = hash_combine(out, *pline);
}
return out;
}
};

template <>
struct hash<parthenon::ParameterInput> {
std::size_t operator()(const parthenon::ParameterInput &in) {
using parthenon::InputBlock;
using parthenon::impl::hash_combine;
std::size_t out = 0;
out = hash_combine(out, in.last_filename_);
for (InputBlock *pblock = in.pfirst_block; pblock != nullptr;
pblock = pblock->pnext) {
out = hash_combine(out, *pblock);
}
return out;
}
};
} // namespace std

#endif // PARAMETER_INPUT_HPP_
13 changes: 9 additions & 4 deletions src/utils/hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Copyright(C) 2022 The Parthenon collaboration
// Licensed under the 3-clause BSD License, see LICENSE file for details
//========================================================================================
// (C) (or copyright) 2022. Triad National Security, LLC. All rights reserved.
// (C) (or copyright) 2022-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand All @@ -20,15 +20,20 @@

#include <functional>
#include <tuple>
#include <utility>

namespace parthenon {
namespace impl {
template <class T>
std::size_t hash_combine(std::size_t lhs, const T &v) {
template <class T, typename... Rest>
std::size_t hash_combine(std::size_t lhs, const T &v, Rest &&...rest) {
std::size_t rhs = std::hash<T>()(v);
// The boost hash combine function
lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2);
return lhs;
if constexpr (sizeof...(Rest) > 0) {
return hash_combine(lhs, std::forward<Rest>(rest)...);
} else {
return lhs;
}
}

template <class Tup, std::size_t I = std::tuple_size<Tup>::value - 1>
Expand Down
2 changes: 1 addition & 1 deletion tst/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ list(APPEND unit_tests_SOURCES
test_pararrays.cpp
test_sparse_pack.cpp
test_swarm.cpp
test_required_desired.cpp
test_parameter_input.cpp
test_error_checking.cpp
test_partitioning.cpp
test_state_descriptor.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Copyright(C) 2014 James M. Stone <jmstone@princeton.edu> and other code contributors
// Licensed under the 3-clause BSD License, see LICENSE file for details
//========================================================================================
// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved.
// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand Down Expand Up @@ -101,3 +101,52 @@ TEST_CASE("Test required/desired checking from inputs", "[ParameterInput]") {
}
}
}

TEST_CASE("Parameter inputs can be hashed and hashing provides useful sanity checks",
"[ParameterInput][Hash]") {
GIVEN("Two ParameterInput objects already populated") {
ParameterInput in1, in2;
std::hash<ParameterInput> hasher;
std::stringstream ss;
ss << "<block1>" << std::endl
<< "var1 = 0 # comment" << std::endl
<< "var2 = 1, & # another comment" << std::endl
<< " 2" << std::endl
<< "<block2>" << std::endl
<< "var3 = 3" << std::endl
<< "# comment" << std::endl
<< "var4 = 4" << std::endl;

// JMM: streams are stateful. Need to be very careful here.
std::string ideck = ss.str();
std::istringstream s1(ideck);
std::istringstream s2(ideck);
in1.LoadFromStream(s1);
in2.LoadFromStream(s2);

WHEN("We hash these parameter inputs") {
std::size_t hash1 = hasher(in1);
std::size_t hash2 = hasher(in2);
THEN("The hashes agree") { REQUIRE(hash1 == hash2); }

AND_WHEN("We modify both parameter inputs in the same way") {
in1.GetOrAddReal("block3", "var5", 2.0);
in2.GetOrAddReal("block3", "var5", 2.0);
THEN("The hashes agree") {
std::size_t hash1 = hasher(in1);
std::size_t hash2 = hasher(in2);
REQUIRE(hash1 == hash2);

AND_WHEN("When we modify one input but not the other") {
in2.GetOrAddInteger("block3", "var6", 7);
THEN("The hashes will not agree") {
std::size_t hash1 = hasher(in1);
std::size_t hash2 = hasher(in2);
REQUIRE(hash1 != hash2);
}
}
}
}
}
}
}
Loading