From 23a57244f7f2aad2651d1d3b37f97ab057a00571 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 25 Feb 2022 18:12:42 -0800 Subject: [PATCH 01/46] Beginnings of signal handling machinery --- Source/Evolve/WarpXEvolve.cpp | 50 ++++++++++++++++++++++++++++++++++- Source/WarpX.H | 24 +++++++++++++++++ Source/WarpX.cpp | 7 +++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 006a587d170..6daa609f330 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -79,6 +79,8 @@ WarpX::Evolve (int numsteps) WARPX_PROFILE("WarpX::Evolve::step"); Real evolve_time_beg_step = amrex::second(); + CheckSignals(); + multi_diags->NewIteration(); // Start loop on time steps @@ -342,6 +344,8 @@ WarpX::Evolve (int numsteps) Real evolve_time_end_step = amrex::second(); evolve_time += evolve_time_end_step - evolve_time_beg_step; + HandleSignals(); + if (verbose) { amrex::Print()<< "STEP " << step+1 << " ends." << " TIME = " << cur_time << " DT = " << dt[0] << "\n"; @@ -350,7 +354,7 @@ WarpX::Evolve (int numsteps) << " s; Avg. per step = " << evolve_time/(step-step_begin+1) << " s\n"; } - if (cur_time >= stop_time - 1.e-3*dt[0]) { + if (cur_time >= stop_time - 1.e-3*dt[0] || signal_requested_break) { break; } @@ -929,3 +933,47 @@ WarpX::applyMirrors(Real time){ } } } + +void +WarpX::CheckSignals() +{ + // We assume that signals will definitely be delivered to rank 0, + // and may be delivered to other ranks as well. For coordination, + // we process them according to when they're received by rank 0. + if (amrex::ParallelDescriptor::MyProc() == 0) + { + for (int i = 0; i < 32; ++i) + { + // Read into a local temporary to ensure the same value is + // used throughout. Atomically exchange it with zero to + // unset the flag without risking loss of a signal - if a + // signal arrives after this, it will be handled the next + // time this function is called. + bool signal_i_received = signal_received_flags[i].exchange(false); + + if (signal_i_received) + { + signal_requested_break |= signal_conf_requests_break[i]; + signal_requested_checkpoint |= signal_conf_requests_checkpoint[i]; + } + } + } + + auto comm = amrex::ParallelDescriptor::Communicator(); + + MPI_Ibcast(&signal_requested_break , 1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_requests[0]); + MPI_Ibcast(&signal_requested_checkpoint, 1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_requests[1]); +} + +void +WarpX::HandleSignals() +{ + MPI_Waitall(std::size(signal_mpi_ibcast_requests), signal_mpi_ibcast_requests, MPI_STATUSES_IGNORE); + + // signal_requested_break is handled directly in WarpX::Evolve + + if (signal_requested_checkpoint) + { + // do checkpoint stuff + } +} diff --git a/Source/WarpX.H b/Source/WarpX.H index 6ac7692afba..e7f1ae56ed4 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -951,6 +951,30 @@ private: // Singleton is used when the code is run from python static WarpX* m_instance; + /** + * Signal handling + * + * Rank 0 will accept signals and asynchronously broadcast the + * configured resposne; other processes will ignore them and + * follow the lead of rank 0 to avoid potential for deadlocks or + * timestep-skewed response. + * + * Variables and functions are static rather than per-instance + * because signal handlers are configured at the process level. + */ + static void CheckSignals(); + static void HandleSignals(); + // Whether a given signal has been received since the last check + static std::atomic signal_received_flags[32]; + // Whether configuration requests the code to break out of the timestep loop on a particular signal + static bool signal_conf_requests_break[32]; + // Whether configuration requests the code to checkpoint at this timestep on a particular signal + static bool signal_conf_requests_checkpoint[32]; + // MPI requests for the asynchronous broadcasts of the signal-requested actions + static MPI_Request signal_mpi_ibcast_requests[2]; + static bool signal_requested_break; + static bool signal_requested_checkpoint; + /// /// Advance the simulation by numsteps steps, electromagnetic case. /// diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 4206f187e3a..c6a57fbe0a7 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -205,6 +205,13 @@ bool WarpX::do_device_synchronize = false; WarpX* WarpX::m_instance = nullptr; +std::atomic WarpX::signal_received_flags[32]; +bool WarpX::signal_conf_requests_break[32]; +bool WarpX::signal_conf_requests_checkpoint[32]; +MPI_Request WarpX::signal_mpi_ibcast_requests[2]; +bool WarpX::signal_requested_break; +bool WarpX::signal_requested_checkpoint; + WarpX& WarpX::GetInstance () { From 97da48f269a61e1413138d1b8b67232ccc0ab1ad Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 28 Feb 2022 10:41:05 -0800 Subject: [PATCH 02/46] Add tentative logic to make checkpoint call --- Source/Evolve/WarpXEvolve.cpp | 2 +- Source/WarpX.H | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 6daa609f330..3b096459252 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -974,6 +974,6 @@ WarpX::HandleSignals() if (signal_requested_checkpoint) { - // do checkpoint stuff + multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); } } diff --git a/Source/WarpX.H b/Source/WarpX.H index e7f1ae56ed4..be8a60f2d7d 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -963,7 +963,7 @@ private: * because signal handlers are configured at the process level. */ static void CheckSignals(); - static void HandleSignals(); + void HandleSignals(); // Whether a given signal has been received since the last check static std::atomic signal_received_flags[32]; // Whether configuration requests the code to break out of the timestep loop on a particular signal From aa96c6d0e4c84d87a213c1f2c2ba4dfded2ef9b5 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 28 Feb 2022 10:42:47 -0800 Subject: [PATCH 03/46] Adapt formatting slightly --- Source/Evolve/WarpXEvolve.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 3b096459252..57b1c3be41f 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -940,10 +940,8 @@ WarpX::CheckSignals() // We assume that signals will definitely be delivered to rank 0, // and may be delivered to other ranks as well. For coordination, // we process them according to when they're received by rank 0. - if (amrex::ParallelDescriptor::MyProc() == 0) - { - for (int i = 0; i < 32; ++i) - { + if (amrex::ParallelDescriptor::MyProc() == 0) { + for (int i = 0; i < 32; ++i) { // Read into a local temporary to ensure the same value is // used throughout. Atomically exchange it with zero to // unset the flag without risking loss of a signal - if a @@ -951,8 +949,7 @@ WarpX::CheckSignals() // time this function is called. bool signal_i_received = signal_received_flags[i].exchange(false); - if (signal_i_received) - { + if (signal_i_received) { signal_requested_break |= signal_conf_requests_break[i]; signal_requested_checkpoint |= signal_conf_requests_checkpoint[i]; } @@ -972,8 +969,7 @@ WarpX::HandleSignals() // signal_requested_break is handled directly in WarpX::Evolve - if (signal_requested_checkpoint) - { + if (signal_requested_checkpoint) { multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); } } From 0fd0bb93691997fe3148f9425214adb3a14d4016 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 2 Mar 2022 18:53:41 -0800 Subject: [PATCH 04/46] Add calls to read signals and set up signal handlers --- Source/WarpX.H | 1 + Source/WarpX.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Source/WarpX.H b/Source/WarpX.H index be8a60f2d7d..bea6af2e455 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -963,6 +963,7 @@ private: * because signal handlers are configured at the process level. */ static void CheckSignals(); + static void SignalSetFlag(int signal_number); void HandleSignals(); // Whether a given signal has been received since the last check static std::atomic signal_received_flags[32]; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index c6a57fbe0a7..9c7040ef8af 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -496,6 +496,12 @@ WarpX::PrintGlobalWarnings(const std::string& when) amrex::Print() << warn_string; } +void +WarpX::SignalSetFlag(int signal_number) +{ + signal_received_flags[signal_number] = true; +} + void WarpX::ReadParameters () { @@ -562,6 +568,33 @@ WarpX::ReadParameters () } } + std::vector signals_in; + pp_warpx.queryarr("break_signals", signals_in); + for (int i : signals_in) { + AMREX_ALWAYS_ASSERT(i < 32); + signal_conf_requests_break[i] = true; + } + signals_in.clear(); + pp_warpx.queryarr("checkpoint_signals", signals_in); + for (int i : signals_in) { + AMREX_ALWAYS_ASSERT(i < 32); + signal_conf_requests_checkpoint[i] = true; + } + { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + for (int i = 0; i < 32; ++i) { + if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { + if (ParallelDescriptor::MyProc() == 0) { + sa.sa_handler = &WarpX::SignalSetFlag; + } else { + sa.sa_handler = SIG_IGN; + } + sigaction(i, &sa, nullptr); + } + } + } + // set random seed std::string random_seed = "default"; pp_warpx.query("random_seed", random_seed); From 65bc0eccc2f3069f294ba58d614006722e338821 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 2 Mar 2022 19:02:31 -0800 Subject: [PATCH 05/46] Initialize signal flag array --- Source/WarpX.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 9c7040ef8af..939139886ae 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -584,6 +584,7 @@ WarpX::ReadParameters () struct sigaction sa; sigemptyset(&sa.sa_mask); for (int i = 0; i < 32; ++i) { + signal_received_flags[i] = false; if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { if (ParallelDescriptor::MyProc() == 0) { sa.sa_handler = &WarpX::SignalSetFlag; From 3eb85b8a6e630d0ff155b28f1aaaaf6a7125d979 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 7 Mar 2022 11:57:45 -0800 Subject: [PATCH 06/46] Add parsing of signal names, and fix some whitespace issues --- Source/Utils/WarpXUtil.H | 2 ++ Source/Utils/WarpXUtil.cpp | 34 ++++++++++++++++++++++ Source/WarpX.cpp | 58 ++++++++++++++++++++------------------ 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/Source/Utils/WarpXUtil.H b/Source/Utils/WarpXUtil.H index cc565be8551..c764bffb71b 100644 --- a/Source/Utils/WarpXUtil.H +++ b/Source/Utils/WarpXUtil.H @@ -249,6 +249,8 @@ double parseStringtoReal(std::string str); */ int parseStringtoInt(std::string str, std::string name); +int parseSignalNameToNumber(const std::string &str); + template amrex::ParserExecutor compileParser (amrex::Parser const* parser) { diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 14de3cf0387..1d44918a7a0 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -394,6 +395,39 @@ parseStringtoInt(std::string str, std::string name) return ival; } +int +parseSignalNameToNumber(const std::string &str) +{ + IParser signals_parser(str); + + for (int i = 0; i < 32; ++i) { +#if defined(__GNU_LIBRARY__) + // Returns upper-case, no "SIG" + std::string name_upper = sigabbrex_np(i); + std::string name_lower = name_upper; + for (char &c : name_lower) { + c = tolower(c); + } +#elif defined(__APPLE__) + // Contains lower-case, no "sig" + std::string name_lower = sys_signame[i]; + std::string name_upper = name_lower; + for (char &c : name_upper) { + c = toupper(c); + } +#endif + signals_parser.setConstant(name_upper, i); + signals_parser.setConstant(name_lower, i); + name_upper = "SIG" + name_upper; + name_lower = "sig" + name_lower; + signals_parser.setConstant(name_upper, i); + signals_parser.setConstant(name_lower, i); + } + auto spf = signals_parser.compileHost<0>(); + + return spf(); +} + // Overloads for float/double instead of amrex::Real to allow makeParser() to query for // my_constants as double even in single precision mode // Always parsing in double precision avoids potential overflows that may occur when parsing user's diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 939139886ae..81b2c095548 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -72,6 +72,7 @@ #include #include +#include #include #include #include @@ -568,33 +569,36 @@ WarpX::ReadParameters () } } - std::vector signals_in; - pp_warpx.queryarr("break_signals", signals_in); - for (int i : signals_in) { - AMREX_ALWAYS_ASSERT(i < 32); - signal_conf_requests_break[i] = true; - } - signals_in.clear(); - pp_warpx.queryarr("checkpoint_signals", signals_in); - for (int i : signals_in) { - AMREX_ALWAYS_ASSERT(i < 32); - signal_conf_requests_checkpoint[i] = true; - } - { - struct sigaction sa; - sigemptyset(&sa.sa_mask); - for (int i = 0; i < 32; ++i) { - signal_received_flags[i] = false; - if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { - if (ParallelDescriptor::MyProc() == 0) { - sa.sa_handler = &WarpX::SignalSetFlag; - } else { - sa.sa_handler = SIG_IGN; - } - sigaction(i, &sa, nullptr); - } - } - } + std::vector signals_in; + + pp_warpx.queryarr("break_signals", signals_in); + for (const std::string &str : signals_in) { + int sig = parseSignalNameToNumber(str); + AMREX_ALWAYS_ASSERT(sig < 32); + signal_conf_requests_break[sig] = true; + } + signals_in.clear(); + pp_warpx.queryarr("checkpoint_signals", signals_in); + for (const std::string &str : signals_in) { + int sig = parseSignalNameToNumber(str); + AMREX_ALWAYS_ASSERT(sig < 32); + signal_conf_requests_checkpoint[sig] = true; + } + { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + for (int i = 0; i < 32; ++i) { + signal_received_flags[i] = false; + if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { + if (ParallelDescriptor::MyProc() == 0) { + sa.sa_handler = &WarpX::SignalSetFlag; + } else { + sa.sa_handler = SIG_IGN; + } + sigaction(i, &sa, nullptr); + } + } + } // set random seed std::string random_seed = "default"; From ef160d763da354059e882647090e378181c9e56e Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 7 Mar 2022 11:59:23 -0800 Subject: [PATCH 07/46] Skip signal setup on Windows --- Source/WarpX.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 81b2c095548..d5e5e8e91d8 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -569,8 +569,8 @@ WarpX::ReadParameters () } } +#if defined(__GNU_LIBRARY__) || defined(__APPLE__) std::vector signals_in; - pp_warpx.queryarr("break_signals", signals_in); for (const std::string &str : signals_in) { int sig = parseSignalNameToNumber(str); @@ -599,6 +599,7 @@ WarpX::ReadParameters () } } } +#endif // set random seed std::string random_seed = "default"; From d9bf8e5c6ae362fc18c475168714a63e40a22d5c Mon Sep 17 00:00:00 2001 From: Roelof Date: Mon, 7 Mar 2022 12:38:42 -0800 Subject: [PATCH 08/46] added checkpoint and break signal inputs to picmi.py --- Python/pywarpx/picmi.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py index fac53dd3d6b..1ba6ed39a01 100644 --- a/Python/pywarpx/picmi.py +++ b/Python/pywarpx/picmi.py @@ -981,6 +981,9 @@ def init(self, kw): self.collisions = kw.pop('warpx_collisions', None) self.embedded_boundary = kw.pop('warpx_embedded_boundary', None) + self.break_signals = kw.pop('warpx_break_signals', None) + self.checkpoint_signals = kw.pop('warpx_checkpoint_signals', None) + self.inputs_initialized = False self.warpx_initialized = False @@ -1019,6 +1022,9 @@ def initialize_inputs(self): pywarpx.amr.check_input = self.amr_check_input + pywarpx.warpx.break_signals = self.break_signals + pywarpx.warpx.checkpoint_signals = self.checkpoint_signals + particle_shape = self.particle_shape for s in self.species: if s.particle_shape is not None: From 707a3a852fca98b7ebd7cbaaa4ac50e07dcdc866 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 11 Mar 2022 17:00:04 -0800 Subject: [PATCH 09/46] Address initial review requests --- Source/Evolve/WarpXEvolve.cpp | 15 +++++++-------- Source/WarpX.H | 21 ++++++++++++++------- Source/WarpX.cpp | 5 ++--- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 57b1c3be41f..026641b59b5 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -354,7 +354,7 @@ WarpX::Evolve (int numsteps) << " s; Avg. per step = " << evolve_time/(step-step_begin+1) << " s\n"; } - if (cur_time >= stop_time - 1.e-3*dt[0] || signal_requested_break) { + if (cur_time >= stop_time - 1.e-3*dt[0] || signal_actions_requested[SIGNAL_REQUESTS_BREAK]) { break; } @@ -950,26 +950,25 @@ WarpX::CheckSignals() bool signal_i_received = signal_received_flags[i].exchange(false); if (signal_i_received) { - signal_requested_break |= signal_conf_requests_break[i]; - signal_requested_checkpoint |= signal_conf_requests_checkpoint[i]; + signal_actions_requested[SIGNAL_REQUESTS_BREAK] |= signal_conf_requests_break[i]; + signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT] |= signal_conf_requests_checkpoint[i]; } } } auto comm = amrex::ParallelDescriptor::Communicator(); - MPI_Ibcast(&signal_requested_break , 1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_requests[0]); - MPI_Ibcast(&signal_requested_checkpoint, 1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_requests[1]); + MPI_Ibcast(signal_actions_requested, 2, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); } void WarpX::HandleSignals() { - MPI_Waitall(std::size(signal_mpi_ibcast_requests), signal_mpi_ibcast_requests, MPI_STATUSES_IGNORE); + MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE); - // signal_requested_break is handled directly in WarpX::Evolve + // SIGNAL_REQUESTs_BREAK is handled directly in WarpX::Evolve - if (signal_requested_checkpoint) { + if (signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT]) { multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); } } diff --git a/Source/WarpX.H b/Source/WarpX.H index bea6af2e455..45e1d954be5 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -952,6 +952,7 @@ private: static WarpX* m_instance; /** + * \brief * Signal handling * * Rank 0 will accept signals and asynchronously broadcast the @@ -962,19 +963,25 @@ private: * Variables and functions are static rather than per-instance * because signal handlers are configured at the process level. */ + //! Check and clear signal flags and asynchronously broadcast them from process 0 static void CheckSignals(); + //! Signal handler to set flags on process 0 (other processes ignore configured signals) static void SignalSetFlag(int signal_number); + //! Complete the asynchronous broadcast of signal flags, and initiate a checkpoint if requested void HandleSignals(); - // Whether a given signal has been received since the last check + //! On process 0, whether a given signal has been received since the last check static std::atomic signal_received_flags[32]; - // Whether configuration requests the code to break out of the timestep loop on a particular signal + //! Whether configuration requests the code to break out of the timestep loop on a particular signal static bool signal_conf_requests_break[32]; - // Whether configuration requests the code to checkpoint at this timestep on a particular signal + //! Whether configuration requests the code to checkpoint at this timestep on a particular signal static bool signal_conf_requests_checkpoint[32]; - // MPI requests for the asynchronous broadcasts of the signal-requested actions - static MPI_Request signal_mpi_ibcast_requests[2]; - static bool signal_requested_break; - static bool signal_requested_checkpoint; + //! Boolean flags transmitted between CheckSignals() and + //! HandleSignals() to indicate actions requested by signals + static bool signal_actions_requested[2]; + //! Labels for indexed positions in signal_actions_requests + enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1 }; + //! MPI requests for the asynchronous broadcasts of the signal-requested actions + static MPI_Request signal_mpi_ibcast_request; /// /// Advance the simulation by numsteps steps, electromagnetic case. diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index d5e5e8e91d8..d6e155cf482 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -209,9 +209,8 @@ WarpX* WarpX::m_instance = nullptr; std::atomic WarpX::signal_received_flags[32]; bool WarpX::signal_conf_requests_break[32]; bool WarpX::signal_conf_requests_checkpoint[32]; -MPI_Request WarpX::signal_mpi_ibcast_requests[2]; -bool WarpX::signal_requested_break; -bool WarpX::signal_requested_checkpoint; +MPI_Request WarpX::signal_mpi_ibcast_request; +bool WarpX::signal_actions_requested[2]; WarpX& WarpX::GetInstance () From bd27779c4699e5993049323f9561a09591f823fc Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 11 Mar 2022 17:25:00 -0800 Subject: [PATCH 10/46] Correct comment to match changed code --- Source/Evolve/WarpXEvolve.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 026641b59b5..88e4c000cfd 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -943,7 +943,7 @@ WarpX::CheckSignals() if (amrex::ParallelDescriptor::MyProc() == 0) { for (int i = 0; i < 32; ++i) { // Read into a local temporary to ensure the same value is - // used throughout. Atomically exchange it with zero to + // used throughout. Atomically exchange it with false to // unset the flag without risking loss of a signal - if a // signal arrives after this, it will be handled the next // time this function is called. From bf24ecafac89e8aaed1ca6929c33d6c1677bd48b Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 12 Mar 2022 10:56:13 -0800 Subject: [PATCH 11/46] Convert maximum signal number to a symbolic name --- Source/WarpX.H | 8 +++++--- Source/WarpX.cpp | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Source/WarpX.H b/Source/WarpX.H index 45e1d954be5..ed9643dae75 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -969,12 +969,14 @@ private: static void SignalSetFlag(int signal_number); //! Complete the asynchronous broadcast of signal flags, and initiate a checkpoint if requested void HandleSignals(); + //! The range of signal values to accept + static constexpr int NUM_SIGNALS = 32; //! On process 0, whether a given signal has been received since the last check - static std::atomic signal_received_flags[32]; + static std::atomic signal_received_flags[NUM_SIGNALS]; //! Whether configuration requests the code to break out of the timestep loop on a particular signal - static bool signal_conf_requests_break[32]; + static bool signal_conf_requests_break[NUM_SIGNALS]; //! Whether configuration requests the code to checkpoint at this timestep on a particular signal - static bool signal_conf_requests_checkpoint[32]; + static bool signal_conf_requests_checkpoint[NUM_SIGNALS]; //! Boolean flags transmitted between CheckSignals() and //! HandleSignals() to indicate actions requested by signals static bool signal_actions_requested[2]; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index d6e155cf482..02787bc59ac 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -573,20 +573,20 @@ WarpX::ReadParameters () pp_warpx.queryarr("break_signals", signals_in); for (const std::string &str : signals_in) { int sig = parseSignalNameToNumber(str); - AMREX_ALWAYS_ASSERT(sig < 32); + AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); signal_conf_requests_break[sig] = true; } signals_in.clear(); pp_warpx.queryarr("checkpoint_signals", signals_in); for (const std::string &str : signals_in) { int sig = parseSignalNameToNumber(str); - AMREX_ALWAYS_ASSERT(sig < 32); + AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); signal_conf_requests_checkpoint[sig] = true; } { struct sigaction sa; sigemptyset(&sa.sa_mask); - for (int i = 0; i < 32; ++i) { + for (int i = 0; i < NUM_SIGNALS; ++i) { signal_received_flags[i] = false; if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { if (ParallelDescriptor::MyProc() == 0) { From 6802c00e8c096359d9f904c8183555861d1f4456 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 12 Mar 2022 10:59:13 -0800 Subject: [PATCH 12/46] Always parse signal input, and error out on Windows or wherever it may be unsupported --- Source/WarpX.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 02787bc59ac..69326b6af27 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -568,7 +568,6 @@ WarpX::ReadParameters () } } -#if defined(__GNU_LIBRARY__) || defined(__APPLE__) std::vector signals_in; pp_warpx.queryarr("break_signals", signals_in); for (const std::string &str : signals_in) { @@ -583,6 +582,7 @@ WarpX::ReadParameters () AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); signal_conf_requests_checkpoint[sig] = true; } +#if defined(__GNU_LIBRARY__) || defined(__APPLE__) { struct sigaction sa; sigemptyset(&sa.sa_mask); @@ -598,6 +598,13 @@ WarpX::ReadParameters () } } } +#else + for (int i = 0; i < NUM_SIGNALS; ++i) { + if (signal_conf_requests_break[i] || + signal_conf_requests_checkpoint[i]) { + Abort("Signal handling requested in input, but is not supported on this platform"); + } + } #endif // set random seed From 98e22e2ad14104e0deeadc12c6f2cd0e28eb295a Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 12 Mar 2022 11:04:35 -0800 Subject: [PATCH 13/46] Typo fix --- Source/Evolve/WarpXEvolve.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 88e4c000cfd..d51e5d750d2 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -966,7 +966,7 @@ WarpX::HandleSignals() { MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE); - // SIGNAL_REQUESTs_BREAK is handled directly in WarpX::Evolve + // SIGNAL_REQUESTS_BREAK is handled directly in WarpX::Evolve if (signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT]) { multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); From 7dde1019bf72ad6f15ead1b24f312f27dc5a96e1 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 12 Mar 2022 11:07:09 -0800 Subject: [PATCH 14/46] Add missing reset of checkpoint signal flag --- Source/Evolve/WarpXEvolve.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index d51e5d750d2..bdd66d3ef70 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -970,5 +970,6 @@ WarpX::HandleSignals() if (signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT]) { multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); + signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT] = false; } } From ba52cbc86d34ca27c06f68e9db3b0a85da643157 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 12 Mar 2022 11:09:47 -0800 Subject: [PATCH 15/46] Add reset of break signal, in support of Python or library usage --- Source/Evolve/WarpXEvolve.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index bdd66d3ef70..5198e3109d2 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -355,6 +355,7 @@ WarpX::Evolve (int numsteps) } if (cur_time >= stop_time - 1.e-3*dt[0] || signal_actions_requested[SIGNAL_REQUESTS_BREAK]) { + signal_actions_requested[SIGNAL_REQUESTS_BREAK] = false; break; } From 827dc233f86615822443962badb859650515c676 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 14 Mar 2022 15:34:27 -0700 Subject: [PATCH 16/46] Test for a configured checkpoint diag when asked to checkpoint on a signal --- Source/WarpX.H | 2 ++ Source/WarpX.cpp | 72 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/Source/WarpX.H b/Source/WarpX.H index ed9643dae75..7dd065d3425 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -963,6 +963,8 @@ private: * Variables and functions are static rather than per-instance * because signal handlers are configured at the process level. */ + //! Set up signal handlers based on input configuration + void InitSignalHandling(); //! Check and clear signal flags and asynchronously broadcast them from process 0 static void CheckSignals(); //! Signal handler to set flags on process 0 (other processes ignore configured signals) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 69326b6af27..f392c011d6f 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -240,6 +240,8 @@ WarpX::WarpX () InitEB(); + InitSignalHandling(); + // Geometry on all levels has been defined already. // No valid BoxArray and DistributionMapping have been defined. // But the arrays for them have been resized. @@ -502,6 +504,52 @@ WarpX::SignalSetFlag(int signal_number) signal_received_flags[signal_number] = true; } +void +WarpX::InitSignalHandling() +{ + bool have_checkpoint_diagnostic = false; + + ParmParse pp("diagnostics"); + std::vector diags_names; + pp.queryarr("diags_names", diags_names); + + for (const auto &diag : diags_names) { + ParmParse dd(diag); + std::string format; + dd.query("format", format); + if (format == "checkpoint") { + have_checkpoint_diagnostic = true; + break; + } + } + +#if defined(__GNU_LIBRARY__) || defined(__APPLE__) + struct sigaction sa; + sigemptyset(&sa.sa_mask); + for (int i = 0; i < NUM_SIGNALS; ++i) { + signal_received_flags[i] = false; + if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { + if (ParallelDescriptor::MyProc() == 0) { + sa.sa_handler = &WarpX::SignalSetFlag; + } else { + sa.sa_handler = SIG_IGN; + } + sigaction(i, &sa, nullptr); + } + if (signal_conf_requests_checkpoint[i] && !have_checkpoint_diagnostic) { + Abort("Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); + } + } +#else + for (int i = 0; i < NUM_SIGNALS; ++i) { + if (signal_conf_requests_break[i] || + signal_conf_requests_checkpoint[i]) { + Abort("Signal handling requested in input, but is not supported on this platform"); + } + } +#endif +} + void WarpX::ReadParameters () { @@ -582,30 +630,6 @@ WarpX::ReadParameters () AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); signal_conf_requests_checkpoint[sig] = true; } -#if defined(__GNU_LIBRARY__) || defined(__APPLE__) - { - struct sigaction sa; - sigemptyset(&sa.sa_mask); - for (int i = 0; i < NUM_SIGNALS; ++i) { - signal_received_flags[i] = false; - if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { - if (ParallelDescriptor::MyProc() == 0) { - sa.sa_handler = &WarpX::SignalSetFlag; - } else { - sa.sa_handler = SIG_IGN; - } - sigaction(i, &sa, nullptr); - } - } - } -#else - for (int i = 0; i < NUM_SIGNALS; ++i) { - if (signal_conf_requests_break[i] || - signal_conf_requests_checkpoint[i]) { - Abort("Signal handling requested in input, but is not supported on this platform"); - } - } -#endif // set random seed std::string random_seed = "default"; From 7fa6ec43d08983ce9964da6f00572d7f232fbf07 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 14 Mar 2022 17:39:04 -0700 Subject: [PATCH 17/46] Fix typo in Linux code path --- Source/Utils/WarpXUtil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 1d44918a7a0..2eb7cb7a1cd 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -403,7 +403,7 @@ parseSignalNameToNumber(const std::string &str) for (int i = 0; i < 32; ++i) { #if defined(__GNU_LIBRARY__) // Returns upper-case, no "SIG" - std::string name_upper = sigabbrex_np(i); + std::string name_upper = sigabbrev_np(i); std::string name_lower = name_upper; for (char &c : name_lower) { c = tolower(c); From ca41283e64e3564da3b8b60acac460cc5da18147 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 14 Mar 2022 17:58:17 -0700 Subject: [PATCH 18/46] Clean up MPI support --- Source/Evolve/WarpXEvolve.cpp | 8 ++++++++ Source/WarpX.H | 2 ++ Source/WarpX.cpp | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 5198e3109d2..3df95f7423d 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -46,6 +46,10 @@ #include #include +#if defined(AMREX_USE_MPI) +# include +#endif + #include #include #include @@ -959,13 +963,17 @@ WarpX::CheckSignals() auto comm = amrex::ParallelDescriptor::Communicator(); +#if defined(AMREX_USE_MPI) MPI_Ibcast(signal_actions_requested, 2, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); +#endif } void WarpX::HandleSignals() { +#if defined(AMREX_USE_MPI) MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE); +#endif // SIGNAL_REQUESTS_BREAK is handled directly in WarpX::Evolve diff --git a/Source/WarpX.H b/Source/WarpX.H index 7dd065d3425..553b23d0699 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -984,8 +984,10 @@ private: static bool signal_actions_requested[2]; //! Labels for indexed positions in signal_actions_requests enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1 }; +#if defined(AMREX_USE_MPI) //! MPI requests for the asynchronous broadcasts of the signal-requested actions static MPI_Request signal_mpi_ibcast_request; +#endif /// /// Advance the simulation by numsteps steps, electromagnetic case. diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index f392c011d6f..2ab3b538588 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -209,8 +209,10 @@ WarpX* WarpX::m_instance = nullptr; std::atomic WarpX::signal_received_flags[32]; bool WarpX::signal_conf_requests_break[32]; bool WarpX::signal_conf_requests_checkpoint[32]; -MPI_Request WarpX::signal_mpi_ibcast_request; bool WarpX::signal_actions_requested[2]; +#if defined(AMREX_USE_MPI) +MPI_Request WarpX::signal_mpi_ibcast_request; +#endif WarpX& WarpX::GetInstance () From 9c761996219d955e3a55769cd43140bc381f03f6 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 14 Mar 2022 17:58:43 -0700 Subject: [PATCH 19/46] Use symbolic name for maximum signal number --- Source/WarpX.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 2ab3b538588..9c64a4d6a1a 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -206,9 +206,9 @@ bool WarpX::do_device_synchronize = false; WarpX* WarpX::m_instance = nullptr; -std::atomic WarpX::signal_received_flags[32]; -bool WarpX::signal_conf_requests_break[32]; -bool WarpX::signal_conf_requests_checkpoint[32]; +std::atomic WarpX::signal_received_flags[NUM_SIGNALS]; +bool WarpX::signal_conf_requests_break[NUM_SIGNALS]; +bool WarpX::signal_conf_requests_checkpoint[NUM_SIGNALS]; bool WarpX::signal_actions_requested[2]; #if defined(AMREX_USE_MPI) MPI_Request WarpX::signal_mpi_ibcast_request; From 64a8b521f9e26edf1bc44257714ed7895d038358 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 14 Mar 2022 18:09:05 -0700 Subject: [PATCH 20/46] Fix unused variable in the no-MPI case --- Source/Evolve/WarpXEvolve.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 3df95f7423d..0405b642d85 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -961,9 +961,8 @@ WarpX::CheckSignals() } } - auto comm = amrex::ParallelDescriptor::Communicator(); - #if defined(AMREX_USE_MPI) + auto comm = amrex::ParallelDescriptor::Communicator(); MPI_Ibcast(signal_actions_requested, 2, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); #endif } From 6c83790e3e19f31ec231e9ca90bbaa8d712beab3 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 14 Mar 2022 18:13:17 -0700 Subject: [PATCH 21/46] Add missing header inclusions --- Source/Utils/WarpXUtil.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 2eb7cb7a1cd..359e889259d 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -38,6 +38,11 @@ #include #include +// C library header for sigabbrev_np on GNU/Linux +#include +// System header for sys_signame on macOS +#include + using namespace amrex; void PreparseAMReXInputIntArray(amrex::ParmParse& a_pp, char const * const input_str, const bool replace) From 9c82613413de4ebf40c2a5fd14d6c6d3132fd2ef Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:11:04 -0700 Subject: [PATCH 22/46] Switch signal parsing to an enumerated table --- Source/Utils/WarpXUtil.cpp | 75 ++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 359e889259d..6cde7989b9c 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -405,29 +405,72 @@ parseSignalNameToNumber(const std::string &str) { IParser signals_parser(str); - for (int i = 0; i < 32; ++i) { -#if defined(__GNU_LIBRARY__) - // Returns upper-case, no "SIG" - std::string name_upper = sigabbrev_np(i); + struct { + const char* abbrev; + const int value; + } signals_to_parse[] = { + {"ABRT", SIGABRT}, + {"ALRM", SIGALRM}, + {"BUS", SIGBUS}, + {"CHLD", SIGCHLD}, + {"CLD", SIGCHLD}, // Synonymous to SIGCHLD on Linux + {"CONT", SIGCONT}, +#if defined(SIGEMT) + {"EMT", SIGEMT}, // macOS and some Linux architectures +#endif + // Omitted because AMReX typically handles SIGFPE specially + // {"FPE", SIGFPE}, + {"HUP", SIGHUP}, + {"ILL", SIGILL}, +#if defined(SIGINFO) + {"INFO", SIGINFO}, // macOS and some Linux architectures +#endif + {"INT", SIGINT}, + {"IO", SIGIO}, + {"IOT", SIGABRT}, // Synonymous to SIGABRT on Linux + // {"KILL", SIGKILL}, // Cannot be handled + {"PIPE", SIGPIPE}, + {"POLL", SIGIO}, // Synonymous to SIGIO on Linux + {"PROF", SIGPROF}, +#if defined(SIGPWR) + {"PWR", SIGPWR}, // Linux-only +#endif + {"QUIT", SIGQUIT}, + {"SEGV", SIGSEGV}, +#if defined(SIGSTKFLT) + {"STKFLT", SIGSTKFLT}, // Linux-only +#endif + // {"STOP", SIGSTOP}, // Cannot be handled + {"SYS", SIGSYS}, + {"TERM", SIGTERM}, + {"TRAP", SIGTRAP}, + {"TSTP", SIGTSTP}, + {"TTIN", SIGTTIN}, + {"TTOU", SIGTTOU}, + {"URG", SIGURG}, + {"USR1", SIGUSR1}, + {"USR2", SIGUSR2}, + {"VTALRM", SIGVTALRM}, + {"WINCH", SIGWINCH}, + {"XCPU", SIGXCPU}, + {"XFSZ", SIGXFSZ}, + }; + + for (const auto& sp : signals_to_parse) { + std::string name_upper = sp.abbrev; std::string name_lower = name_upper; for (char &c : name_lower) { c = tolower(c); } -#elif defined(__APPLE__) - // Contains lower-case, no "sig" - std::string name_lower = sys_signame[i]; - std::string name_upper = name_lower; - for (char &c : name_upper) { - c = toupper(c); - } -#endif - signals_parser.setConstant(name_upper, i); - signals_parser.setConstant(name_lower, i); + + signals_parser.setConstant(name_upper, sp.value); + signals_parser.setConstant(name_lower, sp.value); name_upper = "SIG" + name_upper; name_lower = "sig" + name_lower; - signals_parser.setConstant(name_upper, i); - signals_parser.setConstant(name_lower, i); + signals_parser.setConstant(name_upper, sp.value); + signals_parser.setConstant(name_lower, sp.value); } + auto spf = signals_parser.compileHost<0>(); return spf(); From 1fd000d87d840be1511df2b53f53f20e15263ecb Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:17:41 -0700 Subject: [PATCH 23/46] Test signal handling for Linux, not GNU C library --- Source/WarpX.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 9c64a4d6a1a..455a320b7df 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -525,7 +525,7 @@ WarpX::InitSignalHandling() } } -#if defined(__GNU_LIBRARY__) || defined(__APPLE__) +#if defined(__linux__) || defined(__APPLE__) struct sigaction sa; sigemptyset(&sa.sa_mask); for (int i = 0; i < NUM_SIGNALS; ++i) { From bf93ea3410a0a46bada6fb98b5d5611033615cd5 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:46:07 -0700 Subject: [PATCH 24/46] Avoid another magic number --- Source/WarpX.H | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/WarpX.H b/Source/WarpX.H index 553b23d0699..665dc9c1c47 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -979,11 +979,11 @@ private: static bool signal_conf_requests_break[NUM_SIGNALS]; //! Whether configuration requests the code to checkpoint at this timestep on a particular signal static bool signal_conf_requests_checkpoint[NUM_SIGNALS]; + //! Labels for indexed positions in signal_actions_requests + enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1, SIGNAL_REQUESTS_MAX = 1 }; //! Boolean flags transmitted between CheckSignals() and //! HandleSignals() to indicate actions requested by signals - static bool signal_actions_requested[2]; - //! Labels for indexed positions in signal_actions_requests - enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1 }; + static bool signal_actions_requested[SIGNAL_REQUESTS_MAX+1]; #if defined(AMREX_USE_MPI) //! MPI requests for the asynchronous broadcasts of the signal-requested actions static MPI_Request signal_mpi_ibcast_request; From d0ceffcbb75f89131eafb24cabb7b543b4237455 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:47:47 -0700 Subject: [PATCH 25/46] Update MPI_Ibcast call to match symbolic array length --- Source/Evolve/WarpXEvolve.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 0405b642d85..257303dbc0e 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -963,7 +963,7 @@ WarpX::CheckSignals() #if defined(AMREX_USE_MPI) auto comm = amrex::ParallelDescriptor::Communicator(); - MPI_Ibcast(signal_actions_requested, 2, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); + MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_MAX+1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); #endif } From 2efa4b600e9b63d1382b5c1d1fddb6f8853f9fe4 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:49:02 -0700 Subject: [PATCH 26/46] Update loop over signal flags to use symbolic limit --- Source/Evolve/WarpXEvolve.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 257303dbc0e..3b865213e51 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -946,7 +946,7 @@ WarpX::CheckSignals() // and may be delivered to other ranks as well. For coordination, // we process them according to when they're received by rank 0. if (amrex::ParallelDescriptor::MyProc() == 0) { - for (int i = 0; i < 32; ++i) { + for (int i = 0; i < NUM_SIGNALS; ++i) { // Read into a local temporary to ensure the same value is // used throughout. Atomically exchange it with false to // unset the flag without risking loss of a signal - if a From c464dd4eb228412c00cecfeee8b48228a00bf30b Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:52:26 -0700 Subject: [PATCH 27/46] Match #includes to usage --- Source/Utils/WarpXUtil.cpp | 4 +--- Source/WarpX.cpp | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 6cde7989b9c..e928182092b 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -38,9 +38,7 @@ #include #include -// C library header for sigabbrev_np on GNU/Linux -#include -// System header for sys_signame on macOS +// For definitions of signal values #include using namespace amrex; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 455a320b7df..97425832960 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -78,6 +78,9 @@ #include #include +// For sigaction() et al. +#include + using namespace amrex; Vector WarpX::E_external_grid(3, 0.0); From 1efee657cd06cec17e7011c2ea86a2a9569daa9d Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 17:53:50 -0700 Subject: [PATCH 28/46] Add omitted C++ std header include --- Source/WarpX.H | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/WarpX.H b/Source/WarpX.H index 665dc9c1c47..5a47cee622a 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -60,6 +60,7 @@ #include #include +#include #include #include #include From dc5379c30e14d26488c86eea263fbad20134cf67 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 21 Mar 2022 22:03:10 -0700 Subject: [PATCH 29/46] Guard entire set of signal definitions as *nix-only, not for Windows --- Source/Utils/WarpXUtil.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index e928182092b..3d34e1a84c7 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -407,6 +407,7 @@ parseSignalNameToNumber(const std::string &str) const char* abbrev; const int value; } signals_to_parse[] = { +#if defined(__linux__) || defined(__APPLE__) {"ABRT", SIGABRT}, {"ALRM", SIGALRM}, {"BUS", SIGBUS}, @@ -452,6 +453,7 @@ parseSignalNameToNumber(const std::string &str) {"WINCH", SIGWINCH}, {"XCPU", SIGXCPU}, {"XFSZ", SIGXFSZ}, +#endif // #if defined(__linux__) || defined(__APPLE__) }; for (const auto& sp : signals_to_parse) { From 95ce13dcc428aa9033ac3de4694d0849eedce5a3 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 22 Mar 2022 08:59:02 -0700 Subject: [PATCH 30/46] Broaden Windows exclusion to avoid zero-length array that displeases MSVC++ --- Source/Utils/WarpXUtil.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 3d34e1a84c7..091daf66e00 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -403,11 +403,11 @@ parseSignalNameToNumber(const std::string &str) { IParser signals_parser(str); +#if defined(__linux__) || defined(__APPLE__) struct { const char* abbrev; const int value; } signals_to_parse[] = { -#if defined(__linux__) || defined(__APPLE__) {"ABRT", SIGABRT}, {"ALRM", SIGALRM}, {"BUS", SIGBUS}, @@ -453,7 +453,6 @@ parseSignalNameToNumber(const std::string &str) {"WINCH", SIGWINCH}, {"XCPU", SIGXCPU}, {"XFSZ", SIGXFSZ}, -#endif // #if defined(__linux__) || defined(__APPLE__) }; for (const auto& sp : signals_to_parse) { @@ -470,6 +469,7 @@ parseSignalNameToNumber(const std::string &str) signals_parser.setConstant(name_upper, sp.value); signals_parser.setConstant(name_lower, sp.value); } +#endif // #if defined(__linux__) || defined(__APPLE__) auto spf = signals_parser.compileHost<0>(); From d9befb1ea8886fb809482d5f0b0c8a1db97ea862 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 22 Mar 2022 16:44:50 -0700 Subject: [PATCH 31/46] Check return value from sigaction() --- Source/WarpX.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 97425832960..2e08ad645ae 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -539,7 +539,10 @@ WarpX::InitSignalHandling() } else { sa.sa_handler = SIG_IGN; } - sigaction(i, &sa, nullptr); + int result = sigaction(i, &sa, nullptr); + if (result != 0) { + Abort("Failed to install signal handler for a configured signal"); + } } if (signal_conf_requests_checkpoint[i] && !have_checkpoint_diagnostic) { Abort("Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); From 77483d073f5fc8c8939dd6079290e11c31434726 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 14:41:22 -0700 Subject: [PATCH 32/46] Convert conditional calls to Abort() to assertions --- Source/WarpX.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 2e08ad645ae..65f5f597ce0 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -540,13 +540,11 @@ WarpX::InitSignalHandling() sa.sa_handler = SIG_IGN; } int result = sigaction(i, &sa, nullptr); - if (result != 0) { - Abort("Failed to install signal handler for a configured signal"); - } - } - if (signal_conf_requests_checkpoint[i] && !have_checkpoint_diagnostic) { - Abort("Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); + WARPX_ALWAYS_ASSERT_WITH_MESSAGE(result == 0, + "Failed to install signal handler for a configured signal"); } + WARPX_ALWAYS_ASSERT_WITH_MESSAGE(!(signal_conf_requests_checkpoint[i] && !have_checkpoint_diagnostic), + "Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); } #else for (int i = 0; i < NUM_SIGNALS; ++i) { From 99f2c6da9a2847cd3507994700d266ac3c315964 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 14:59:25 -0700 Subject: [PATCH 33/46] Move check for platform support to input parsing --- Source/WarpX.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 65f5f597ce0..914f4db6e27 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -512,6 +512,7 @@ WarpX::SignalSetFlag(int signal_number) void WarpX::InitSignalHandling() { +#if defined(__linux__) || defined(__APPLE__) bool have_checkpoint_diagnostic = false; ParmParse pp("diagnostics"); @@ -528,7 +529,6 @@ WarpX::InitSignalHandling() } } -#if defined(__linux__) || defined(__APPLE__) struct sigaction sa; sigemptyset(&sa.sa_mask); for (int i = 0; i < NUM_SIGNALS; ++i) { @@ -546,13 +546,6 @@ WarpX::InitSignalHandling() WARPX_ALWAYS_ASSERT_WITH_MESSAGE(!(signal_conf_requests_checkpoint[i] && !have_checkpoint_diagnostic), "Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); } -#else - for (int i = 0; i < NUM_SIGNALS; ++i) { - if (signal_conf_requests_break[i] || - signal_conf_requests_checkpoint[i]) { - Abort("Signal handling requested in input, but is not supported on this platform"); - } - } #endif } @@ -624,18 +617,30 @@ WarpX::ReadParameters () std::vector signals_in; pp_warpx.queryarr("break_signals", signals_in); + +#if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { int sig = parseSignalNameToNumber(str); AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); signal_conf_requests_break[sig] = true; } signals_in.clear(); +#else + WARPX_ALWAYS_ASSERT_WITH_MESSAGE(signals_in.empty(), + "Signal handling requested in input, but is not supported on this platform"); +#endif + pp_warpx.queryarr("checkpoint_signals", signals_in); +#if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { int sig = parseSignalNameToNumber(str); AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); signal_conf_requests_checkpoint[sig] = true; } +#else + WARPX_ALWAYS_ASSERT_WITH_MESSAGE(signals_in.empty(), + "Signal handling requested in input, but is not supported on this platform"); +#endif // set random seed std::string random_seed = "default"; From fe6556c3ac767aba0774c7d74727b590ea032a9a Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 16:07:09 -0700 Subject: [PATCH 34/46] Shift signal handling code over toward ABLASTR to share with ImpactX and Hipace++ --- Source/Evolve/WarpXEvolve.cpp | 36 +---- Source/Utils/WarpXUtil.H | 2 - Source/Utils/WarpXUtil.cpp | 81 ----------- Source/WarpX.H | 34 ----- Source/WarpX.cpp | 89 ++++-------- Source/ablastr/utils/CMakeLists.txt | 1 + Source/ablastr/utils/SignalHandling.H | 78 +++++++++++ Source/ablastr/utils/SignalHandling.cpp | 179 ++++++++++++++++++++++++ 8 files changed, 289 insertions(+), 211 deletions(-) create mode 100644 Source/ablastr/utils/SignalHandling.H create mode 100644 Source/ablastr/utils/SignalHandling.cpp diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 3b865213e51..bd5320a2b7d 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -33,6 +33,8 @@ #include "Utils/WarpXProfilerWrapper.H" #include "Utils/WarpXUtil.H" +#include + #include #include #include @@ -358,8 +360,7 @@ WarpX::Evolve (int numsteps) << " s; Avg. per step = " << evolve_time/(step-step_begin+1) << " s\n"; } - if (cur_time >= stop_time - 1.e-3*dt[0] || signal_actions_requested[SIGNAL_REQUESTS_BREAK]) { - signal_actions_requested[SIGNAL_REQUESTS_BREAK] = false; + if (cur_time >= stop_time - 1.e-3*dt[0] || SignalState::TestAndResetActionRequestFlag(SignalState::SIGNAL_REQUESTS_BREAK)) { break; } @@ -942,42 +943,17 @@ WarpX::applyMirrors(Real time){ void WarpX::CheckSignals() { - // We assume that signals will definitely be delivered to rank 0, - // and may be delivered to other ranks as well. For coordination, - // we process them according to when they're received by rank 0. - if (amrex::ParallelDescriptor::MyProc() == 0) { - for (int i = 0; i < NUM_SIGNALS; ++i) { - // Read into a local temporary to ensure the same value is - // used throughout. Atomically exchange it with false to - // unset the flag without risking loss of a signal - if a - // signal arrives after this, it will be handled the next - // time this function is called. - bool signal_i_received = signal_received_flags[i].exchange(false); - - if (signal_i_received) { - signal_actions_requested[SIGNAL_REQUESTS_BREAK] |= signal_conf_requests_break[i]; - signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT] |= signal_conf_requests_checkpoint[i]; - } - } - } - -#if defined(AMREX_USE_MPI) - auto comm = amrex::ParallelDescriptor::Communicator(); - MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_MAX+1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); -#endif + SignalState::CheckSignals(); } void WarpX::HandleSignals() { -#if defined(AMREX_USE_MPI) - MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE); -#endif + SignalState::WaitSignals(); // SIGNAL_REQUESTS_BREAK is handled directly in WarpX::Evolve - if (signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT]) { + if (SignalState::TestAndResetActionRequestFlag(SignalState::SIGNAL_REQUESTS_CHECKPOINT)) { multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); - signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT] = false; } } diff --git a/Source/Utils/WarpXUtil.H b/Source/Utils/WarpXUtil.H index c764bffb71b..cc565be8551 100644 --- a/Source/Utils/WarpXUtil.H +++ b/Source/Utils/WarpXUtil.H @@ -249,8 +249,6 @@ double parseStringtoReal(std::string str); */ int parseStringtoInt(std::string str, std::string name); -int parseSignalNameToNumber(const std::string &str); - template amrex::ParserExecutor compileParser (amrex::Parser const* parser) { diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 091daf66e00..d889e0e1069 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -38,9 +38,6 @@ #include #include -// For definitions of signal values -#include - using namespace amrex; void PreparseAMReXInputIntArray(amrex::ParmParse& a_pp, char const * const input_str, const bool replace) @@ -398,84 +395,6 @@ parseStringtoInt(std::string str, std::string name) return ival; } -int -parseSignalNameToNumber(const std::string &str) -{ - IParser signals_parser(str); - -#if defined(__linux__) || defined(__APPLE__) - struct { - const char* abbrev; - const int value; - } signals_to_parse[] = { - {"ABRT", SIGABRT}, - {"ALRM", SIGALRM}, - {"BUS", SIGBUS}, - {"CHLD", SIGCHLD}, - {"CLD", SIGCHLD}, // Synonymous to SIGCHLD on Linux - {"CONT", SIGCONT}, -#if defined(SIGEMT) - {"EMT", SIGEMT}, // macOS and some Linux architectures -#endif - // Omitted because AMReX typically handles SIGFPE specially - // {"FPE", SIGFPE}, - {"HUP", SIGHUP}, - {"ILL", SIGILL}, -#if defined(SIGINFO) - {"INFO", SIGINFO}, // macOS and some Linux architectures -#endif - {"INT", SIGINT}, - {"IO", SIGIO}, - {"IOT", SIGABRT}, // Synonymous to SIGABRT on Linux - // {"KILL", SIGKILL}, // Cannot be handled - {"PIPE", SIGPIPE}, - {"POLL", SIGIO}, // Synonymous to SIGIO on Linux - {"PROF", SIGPROF}, -#if defined(SIGPWR) - {"PWR", SIGPWR}, // Linux-only -#endif - {"QUIT", SIGQUIT}, - {"SEGV", SIGSEGV}, -#if defined(SIGSTKFLT) - {"STKFLT", SIGSTKFLT}, // Linux-only -#endif - // {"STOP", SIGSTOP}, // Cannot be handled - {"SYS", SIGSYS}, - {"TERM", SIGTERM}, - {"TRAP", SIGTRAP}, - {"TSTP", SIGTSTP}, - {"TTIN", SIGTTIN}, - {"TTOU", SIGTTOU}, - {"URG", SIGURG}, - {"USR1", SIGUSR1}, - {"USR2", SIGUSR2}, - {"VTALRM", SIGVTALRM}, - {"WINCH", SIGWINCH}, - {"XCPU", SIGXCPU}, - {"XFSZ", SIGXFSZ}, - }; - - for (const auto& sp : signals_to_parse) { - std::string name_upper = sp.abbrev; - std::string name_lower = name_upper; - for (char &c : name_lower) { - c = tolower(c); - } - - signals_parser.setConstant(name_upper, sp.value); - signals_parser.setConstant(name_lower, sp.value); - name_upper = "SIG" + name_upper; - name_lower = "sig" + name_lower; - signals_parser.setConstant(name_upper, sp.value); - signals_parser.setConstant(name_lower, sp.value); - } -#endif // #if defined(__linux__) || defined(__APPLE__) - - auto spf = signals_parser.compileHost<0>(); - - return spf(); -} - // Overloads for float/double instead of amrex::Real to allow makeParser() to query for // my_constants as double even in single precision mode // Always parsing in double precision avoids potential overflows that may occur when parsing user's diff --git a/Source/WarpX.H b/Source/WarpX.H index 5a47cee622a..86dec872654 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -60,7 +60,6 @@ #include #include -#include #include #include #include @@ -952,43 +951,10 @@ private: // Singleton is used when the code is run from python static WarpX* m_instance; - /** - * \brief - * Signal handling - * - * Rank 0 will accept signals and asynchronously broadcast the - * configured resposne; other processes will ignore them and - * follow the lead of rank 0 to avoid potential for deadlocks or - * timestep-skewed response. - * - * Variables and functions are static rather than per-instance - * because signal handlers are configured at the process level. - */ - //! Set up signal handlers based on input configuration - void InitSignalHandling(); //! Check and clear signal flags and asynchronously broadcast them from process 0 static void CheckSignals(); - //! Signal handler to set flags on process 0 (other processes ignore configured signals) - static void SignalSetFlag(int signal_number); //! Complete the asynchronous broadcast of signal flags, and initiate a checkpoint if requested void HandleSignals(); - //! The range of signal values to accept - static constexpr int NUM_SIGNALS = 32; - //! On process 0, whether a given signal has been received since the last check - static std::atomic signal_received_flags[NUM_SIGNALS]; - //! Whether configuration requests the code to break out of the timestep loop on a particular signal - static bool signal_conf_requests_break[NUM_SIGNALS]; - //! Whether configuration requests the code to checkpoint at this timestep on a particular signal - static bool signal_conf_requests_checkpoint[NUM_SIGNALS]; - //! Labels for indexed positions in signal_actions_requests - enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1, SIGNAL_REQUESTS_MAX = 1 }; - //! Boolean flags transmitted between CheckSignals() and - //! HandleSignals() to indicate actions requested by signals - static bool signal_actions_requested[SIGNAL_REQUESTS_MAX+1]; -#if defined(AMREX_USE_MPI) - //! MPI requests for the asynchronous broadcasts of the signal-requested actions - static MPI_Request signal_mpi_ibcast_request; -#endif /// /// Advance the simulation by numsteps steps, electromagnetic case. diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 914f4db6e27..676d42ed039 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -38,6 +38,8 @@ #include "Utils/WarpXProfilerWrapper.H" #include "Utils/WarpXUtil.H" +#include + #ifdef AMREX_USE_SENSEI_INSITU # include #endif @@ -78,9 +80,6 @@ #include #include -// For sigaction() et al. -#include - using namespace amrex; Vector WarpX::E_external_grid(3, 0.0); @@ -209,14 +208,6 @@ bool WarpX::do_device_synchronize = false; WarpX* WarpX::m_instance = nullptr; -std::atomic WarpX::signal_received_flags[NUM_SIGNALS]; -bool WarpX::signal_conf_requests_break[NUM_SIGNALS]; -bool WarpX::signal_conf_requests_checkpoint[NUM_SIGNALS]; -bool WarpX::signal_actions_requested[2]; -#if defined(AMREX_USE_MPI) -MPI_Request WarpX::signal_mpi_ibcast_request; -#endif - WarpX& WarpX::GetInstance () { @@ -245,7 +236,7 @@ WarpX::WarpX () InitEB(); - InitSignalHandling(); + SignalState::InitSignalHandling(); // Geometry on all levels has been defined already. // No valid BoxArray and DistributionMapping have been defined. @@ -503,52 +494,6 @@ WarpX::PrintGlobalWarnings(const std::string& when) amrex::Print() << warn_string; } -void -WarpX::SignalSetFlag(int signal_number) -{ - signal_received_flags[signal_number] = true; -} - -void -WarpX::InitSignalHandling() -{ -#if defined(__linux__) || defined(__APPLE__) - bool have_checkpoint_diagnostic = false; - - ParmParse pp("diagnostics"); - std::vector diags_names; - pp.queryarr("diags_names", diags_names); - - for (const auto &diag : diags_names) { - ParmParse dd(diag); - std::string format; - dd.query("format", format); - if (format == "checkpoint") { - have_checkpoint_diagnostic = true; - break; - } - } - - struct sigaction sa; - sigemptyset(&sa.sa_mask); - for (int i = 0; i < NUM_SIGNALS; ++i) { - signal_received_flags[i] = false; - if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { - if (ParallelDescriptor::MyProc() == 0) { - sa.sa_handler = &WarpX::SignalSetFlag; - } else { - sa.sa_handler = SIG_IGN; - } - int result = sigaction(i, &sa, nullptr); - WARPX_ALWAYS_ASSERT_WITH_MESSAGE(result == 0, - "Failed to install signal handler for a configured signal"); - } - WARPX_ALWAYS_ASSERT_WITH_MESSAGE(!(signal_conf_requests_checkpoint[i] && !have_checkpoint_diagnostic), - "Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); - } -#endif -} - void WarpX::ReadParameters () { @@ -620,9 +565,8 @@ WarpX::ReadParameters () #if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { - int sig = parseSignalNameToNumber(str); - AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); - signal_conf_requests_break[sig] = true; + int sig = SignalState::parseSignalNameToNumber(str); + SignalState::signal_conf_requests_break[sig] = true; } signals_in.clear(); #else @@ -630,12 +574,29 @@ WarpX::ReadParameters () "Signal handling requested in input, but is not supported on this platform"); #endif + bool have_checkpoint_diagnostic = false; + + ParmParse pp("diagnostics"); + std::vector diags_names; + pp.queryarr("diags_names", diags_names); + + for (const auto &diag : diags_names) { + ParmParse dd(diag); + std::string format; + dd.query("format", format); + if (format == "checkpoint") { + have_checkpoint_diagnostic = true; + break; + } + } + pp_warpx.queryarr("checkpoint_signals", signals_in); #if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { - int sig = parseSignalNameToNumber(str); - AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); - signal_conf_requests_checkpoint[sig] = true; + int sig = SignalState::parseSignalNameToNumber(str); + SignalState::signal_conf_requests_checkpoint[sig] = true; + WARPX_ALWAYS_ASSERT_WITH_MESSAGE(have_checkpoint_diagnostic, + "Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); } #else WARPX_ALWAYS_ASSERT_WITH_MESSAGE(signals_in.empty(), diff --git a/Source/ablastr/utils/CMakeLists.txt b/Source/ablastr/utils/CMakeLists.txt index b452dc45c63..7a174a17444 100644 --- a/Source/ablastr/utils/CMakeLists.txt +++ b/Source/ablastr/utils/CMakeLists.txt @@ -1,4 +1,5 @@ target_sources(ablastr PRIVATE TextMsg.cpp + SignalHandling.cpp ) diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H new file mode 100644 index 00000000000..9f211705b99 --- /dev/null +++ b/Source/ablastr/utils/SignalHandling.H @@ -0,0 +1,78 @@ +/* Copyright 2022 Philip Miller + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef ABLASTR_SIGNAL_HANDLING_H_ +#define ABLASTR_SIGNAL_HANDLING_H_ + +#include + +#if defined(AMREX_USE_MPI) +#include +#endif + +#include +#include + +/** + * \brief + * Signal handling + * + * Rank 0 will accept signals and asynchronously broadcast the + * configured response; other processes will ignore them and + * follow the lead of rank 0 to avoid potential for deadlocks or + * timestep-skewed response. + * + * Variables and functions are static rather than per-instance + * because signal handlers are configured at the process level. + */ +class SignalState +{ +public: + //! The range of signal values to accept + static constexpr int NUM_SIGNALS = 32; + + //! Whether configuration requests the code to break out of the timestep loop on a particular signal + static bool signal_conf_requests_break[NUM_SIGNALS]; + //! Whether configuration requests the code to checkpoint at this timestep on a particular signal + static bool signal_conf_requests_checkpoint[NUM_SIGNALS]; + + //! Labels for indexed positions in signal_actions_requests + enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1, SIGNAL_REQUESTS_MAX = 1 }; + + //! Take a string and convert it to a corresponding signal number if possible + static int parseSignalNameToNumber(const std::string &str); + + //! Set up signal handlers based on input configuration provided in `signal_conf_requests_*` + static void InitSignalHandling (); + + //! Check and clear signal flags and asynchronously broadcast them from process 0 + static void CheckSignals(); + //! Complete the asynchronous broadcast of signal flags + static void WaitSignals(); + + //! Check whether a given action has been requested, and reset the associated flag + static bool TestAndResetActionRequestFlag (int action_to_test); + +private: + //! On process 0, whether a given signal has been received since the last check + static std::atomic signal_received_flags[NUM_SIGNALS]; + +#if defined(AMREX_USE_MPI) + //! MPI requests for the asynchronous broadcasts of the signal-requested actions + static MPI_Request signal_mpi_ibcast_request; +#endif + + //! Signal handler to set flags on process 0 (other processes ignore configured signals) + static void SignalSetFlag (int signal_number); + + //! Boolean flags transmitted between CheckSignals() and + //! HandleSignals() to indicate actions requested by signals + static bool signal_actions_requested[SIGNAL_REQUESTS_MAX+1]; +}; + + +#endif // ABLASTR_SIGNAL_HANDLING_H_ diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp new file mode 100644 index 00000000000..62582013cbb --- /dev/null +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -0,0 +1,179 @@ +/* Copyright 2022 Philip Miller + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#include "SignalHandling.H" + +#include +#include +#include + +// For sigaction() et al. +#if defined(__linux__) || defined(__APPLE__) +#include +#endif + +std::atomic SignalState::signal_received_flags[NUM_SIGNALS]; +bool SignalState::signal_conf_requests_break[NUM_SIGNALS]; +bool SignalState::signal_conf_requests_checkpoint[NUM_SIGNALS]; +bool SignalState::signal_actions_requested[2]; +#if defined(AMREX_USE_MPI) +MPI_Request SignalState::signal_mpi_ibcast_request; +#endif + +int +SignalState::parseSignalNameToNumber(const std::string &str) +{ + amrex::IParser signals_parser(str); + +#if defined(__linux__) || defined(__APPLE__) + struct { + const char* abbrev; + const int value; + } signals_to_parse[] = { + {"ABRT", SIGABRT}, + {"ALRM", SIGALRM}, + {"BUS", SIGBUS}, + {"CHLD", SIGCHLD}, + {"CLD", SIGCHLD}, // Synonymous to SIGCHLD on Linux + {"CONT", SIGCONT}, +#if defined(SIGEMT) + {"EMT", SIGEMT}, // macOS and some Linux architectures +#endif + // Omitted because AMReX typically handles SIGFPE specially + // {"FPE", SIGFPE}, + {"HUP", SIGHUP}, + {"ILL", SIGILL}, +#if defined(SIGINFO) + {"INFO", SIGINFO}, // macOS and some Linux architectures +#endif + {"INT", SIGINT}, + {"IO", SIGIO}, + {"IOT", SIGABRT}, // Synonymous to SIGABRT on Linux + // {"KILL", SIGKILL}, // Cannot be handled + {"PIPE", SIGPIPE}, + {"POLL", SIGIO}, // Synonymous to SIGIO on Linux + {"PROF", SIGPROF}, +#if defined(SIGPWR) + {"PWR", SIGPWR}, // Linux-only +#endif + {"QUIT", SIGQUIT}, + {"SEGV", SIGSEGV}, +#if defined(SIGSTKFLT) + {"STKFLT", SIGSTKFLT}, // Linux-only +#endif + // {"STOP", SIGSTOP}, // Cannot be handled + {"SYS", SIGSYS}, + {"TERM", SIGTERM}, + {"TRAP", SIGTRAP}, + {"TSTP", SIGTSTP}, + {"TTIN", SIGTTIN}, + {"TTOU", SIGTTOU}, + {"URG", SIGURG}, + {"USR1", SIGUSR1}, + {"USR2", SIGUSR2}, + {"VTALRM", SIGVTALRM}, + {"WINCH", SIGWINCH}, + {"XCPU", SIGXCPU}, + {"XFSZ", SIGXFSZ}, + }; + + for (const auto& sp : signals_to_parse) { + std::string name_upper = sp.abbrev; + std::string name_lower = name_upper; + for (char &c : name_lower) { + c = tolower(c); + } + + signals_parser.setConstant(name_upper, sp.value); + signals_parser.setConstant(name_lower, sp.value); + name_upper = "SIG" + name_upper; + name_lower = "sig" + name_lower; + signals_parser.setConstant(name_upper, sp.value); + signals_parser.setConstant(name_lower, sp.value); + } +#endif // #if defined(__linux__) || defined(__APPLE__) + + auto spf = signals_parser.compileHost<0>(); + + int sig = spf(); + AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); + + return sig; +} + +void +SignalState::InitSignalHandling() +{ +#if defined(__linux__) || defined(__APPLE__) + struct sigaction sa; + sigemptyset(&sa.sa_mask); + for (int i = 0; i < NUM_SIGNALS; ++i) { + signal_received_flags[i] = false; + if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { + if (amrex::ParallelDescriptor::MyProc() == 0) { + sa.sa_handler = &SignalState::SignalSetFlag; + } else { + sa.sa_handler = SIG_IGN; + } + int result = sigaction(i, &sa, nullptr); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(result == 0, + "Failed to install signal handler for a configured signal"); + } + } +#endif +} + +void +SignalState::CheckSignals() +{ + // We assume that signals will definitely be delivered to rank 0, + // and may be delivered to other ranks as well. For coordination, + // we process them according to when they're received by rank 0. + if (amrex::ParallelDescriptor::MyProc() == 0) { + for (int i = 0; i < NUM_SIGNALS; ++i) { + // Read into a local temporary to ensure the same value is + // used throughout. Atomically exchange it with false to + // unset the flag without risking loss of a signal - if a + // signal arrives after this, it will be handled the next + // time this function is called. + bool signal_i_received = signal_received_flags[i].exchange(false); + + if (signal_i_received) { + signal_actions_requested[SIGNAL_REQUESTS_BREAK] |= signal_conf_requests_break[i]; + signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT] |= signal_conf_requests_checkpoint[i]; + } + } + } + +#if defined(AMREX_USE_MPI) + auto comm = amrex::ParallelDescriptor::Communicator(); + MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_MAX+1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); +#endif +} + +void +SignalState::WaitSignals() +{ +#if defined(AMREX_USE_MPI) + MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE); +#endif +} + +bool +SignalState::TestAndResetActionRequestFlag(int action_to_test) +{ + bool retval = signal_actions_requested[action_to_test]; + signal_actions_requested[action_to_test] = false; + return retval; +} + +void +SignalState::SignalSetFlag(int signal_number) +{ + signal_received_flags[signal_number] = true; +} + From 6c17d4310382ffc3f56a2fe175f555758e9193a7 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 16:09:03 -0700 Subject: [PATCH 35/46] Minor cleanup --- Source/Evolve/WarpXEvolve.cpp | 4 ---- Source/Utils/WarpXUtil.cpp | 1 - 2 files changed, 5 deletions(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index bd5320a2b7d..a184d4fc372 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -48,10 +48,6 @@ #include #include -#if defined(AMREX_USE_MPI) -# include -#endif - #include #include #include diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index d889e0e1069..14de3cf0387 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include From a4bb27095b025d9451993be173de623bd6e42039 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 16:14:31 -0700 Subject: [PATCH 36/46] A bit more cleanup --- Source/WarpX.cpp | 1 - Source/ablastr/utils/SignalHandling.cpp | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 676d42ed039..a1db83a2c08 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -74,7 +74,6 @@ #include #include -#include #include #include #include diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 62582013cbb..3738717f14a 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -11,6 +11,8 @@ #include #include +#include + // For sigaction() et al. #if defined(__linux__) || defined(__APPLE__) #include @@ -85,7 +87,7 @@ SignalState::parseSignalNameToNumber(const std::string &str) std::string name_upper = sp.abbrev; std::string name_lower = name_upper; for (char &c : name_lower) { - c = tolower(c); + c = std::tolower(c); } signals_parser.setConstant(name_upper, sp.value); From a739857f93ce286cc47752dc840766b984f0c83b Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 16:19:44 -0700 Subject: [PATCH 37/46] Fix formatting nits --- Source/WarpX.H | 4 ++-- Source/ablastr/utils/SignalHandling.H | 6 +++--- Source/ablastr/utils/SignalHandling.cpp | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Source/WarpX.H b/Source/WarpX.H index 86dec872654..a5b1f5672ae 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -952,9 +952,9 @@ private: static WarpX* m_instance; //! Check and clear signal flags and asynchronously broadcast them from process 0 - static void CheckSignals(); + static void CheckSignals (); //! Complete the asynchronous broadcast of signal flags, and initiate a checkpoint if requested - void HandleSignals(); + void HandleSignals (); /// /// Advance the simulation by numsteps steps, electromagnetic case. diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H index 9f211705b99..2b6be4a6215 100644 --- a/Source/ablastr/utils/SignalHandling.H +++ b/Source/ablastr/utils/SignalHandling.H @@ -44,15 +44,15 @@ public: enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1, SIGNAL_REQUESTS_MAX = 1 }; //! Take a string and convert it to a corresponding signal number if possible - static int parseSignalNameToNumber(const std::string &str); + static int parseSignalNameToNumber (const std::string &str); //! Set up signal handlers based on input configuration provided in `signal_conf_requests_*` static void InitSignalHandling (); //! Check and clear signal flags and asynchronously broadcast them from process 0 - static void CheckSignals(); + static void CheckSignals (); //! Complete the asynchronous broadcast of signal flags - static void WaitSignals(); + static void WaitSignals (); //! Check whether a given action has been requested, and reset the associated flag static bool TestAndResetActionRequestFlag (int action_to_test); diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 3738717f14a..961ccca7d2b 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -178,4 +178,3 @@ SignalState::SignalSetFlag(int signal_number) { signal_received_flags[signal_number] = true; } - From b98a1ef13bb0c3aa6fe6049490217b5f6b08b85a Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 16:30:09 -0700 Subject: [PATCH 38/46] Add AMReX error handling on MPI calls --- Source/ablastr/utils/SignalHandling.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 961ccca7d2b..1fc0a82a9a8 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -153,7 +153,7 @@ SignalState::CheckSignals() #if defined(AMREX_USE_MPI) auto comm = amrex::ParallelDescriptor::Communicator(); - MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_MAX+1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request); + BL_MPI_REQUIRE(MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_MAX+1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request)); #endif } @@ -161,7 +161,7 @@ void SignalState::WaitSignals() { #if defined(AMREX_USE_MPI) - MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE); + BL_MPI_REQUIRE(MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE)); #endif } From 6d0e28205b8464bcf863ae012ee5397a0a9e129d Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 17:08:24 -0700 Subject: [PATCH 39/46] Add ABLASTR signal handling code to GNU makefile too --- Source/ablastr/utils/Make.package | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/ablastr/utils/Make.package b/Source/ablastr/utils/Make.package index 8fbee209803..dd343d9c11f 100644 --- a/Source/ablastr/utils/Make.package +++ b/Source/ablastr/utils/Make.package @@ -1,3 +1,3 @@ -CEXE_sources += TextMsg.cpp +CEXE_sources += TextMsg.cpp SignalHandling.cpp VPATH_LOCATIONS += $(WARPX_HOME)/Source/ablastr/utils From 493aa8ce9e8df467f10e3f41047b62cad5c8c07c Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 24 Mar 2022 17:16:45 -0700 Subject: [PATCH 40/46] Document new input parameters --- Docs/source/usage/parameters.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst index cdefa2259a4..24e54d63364 100644 --- a/Docs/source/usage/parameters.rst +++ b/Docs/source/usage/parameters.rst @@ -64,6 +64,15 @@ Overall simulation parameters It is mainly intended for debug purposes, and is best used with ``warpx.always_warn_immediately=1``. +* ``warpx.break_signals`` (array of `string`, separated by spaces) optional + A list of signal names or numbers that the simulation should + handle by cleanly terminating at the next timestep + +* ``warpx.checkpoint_signals`` (array of `string`, separated by spaces) optional + A list of signal names or numbers that the simulation should + handle by outputting a checkpoint at the next timestep. A + diagnostic of type `checkpoint` must be configured. + * ``warpx.random_seed`` (`string` or `int` > 0) optional If provided ``warpx.random_seed = random``, the random seed will be determined using `std::random_device` and `std::clock()`, From 74edd725b53cae36802ea1f25dcacc2f0a4eb91c Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 28 Mar 2022 20:03:56 -0700 Subject: [PATCH 41/46] Use ABLASTR assertion macros in ABLASTR code --- Source/ablastr/utils/SignalHandling.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 1fc0a82a9a8..32f6e8ea57e 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -6,6 +6,7 @@ */ #include "SignalHandling.H" +#include "TextMsg.H" #include #include @@ -102,7 +103,8 @@ SignalState::parseSignalNameToNumber(const std::string &str) auto spf = signals_parser.compileHost<0>(); int sig = spf(); - AMREX_ALWAYS_ASSERT(sig < NUM_SIGNALS); + ABLASTR_ALWAYS_ASSERT_WITH_MESSAGE(sig < NUM_SIGNALS, + "Parsed signal value is outside the supported range of [1, 31]"); return sig; } @@ -122,8 +124,8 @@ SignalState::InitSignalHandling() sa.sa_handler = SIG_IGN; } int result = sigaction(i, &sa, nullptr); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(result == 0, - "Failed to install signal handler for a configured signal"); + ABLASTR_ALWAYS_ASSERT_WITH_MESSAGE(result == 0, + "Failed to install signal handler for a configured signal"); } } #endif From cfb49de3751204010858ecd5f371433b6fa49f67 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 28 Mar 2022 20:07:28 -0700 Subject: [PATCH 42/46] Convert requests limit value to a requests array size --- Source/ablastr/utils/SignalHandling.H | 12 ++++++++---- Source/ablastr/utils/SignalHandling.cpp | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H index 2b6be4a6215..8ef8a2c67b2 100644 --- a/Source/ablastr/utils/SignalHandling.H +++ b/Source/ablastr/utils/SignalHandling.H @@ -35,14 +35,18 @@ public: //! The range of signal values to accept static constexpr int NUM_SIGNALS = 32; + //! Labels for indexed positions in signal_actions_requests + enum signal_action_requested_labels { + SIGNAL_REQUESTS_BREAK = 0, + SIGNAL_REQUESTS_CHECKPOINT = 1, + SIGNAL_REQUESTS_SIZE = 2 // This should always be 1 greater than the last valid value + }; + //! Whether configuration requests the code to break out of the timestep loop on a particular signal static bool signal_conf_requests_break[NUM_SIGNALS]; //! Whether configuration requests the code to checkpoint at this timestep on a particular signal static bool signal_conf_requests_checkpoint[NUM_SIGNALS]; - //! Labels for indexed positions in signal_actions_requests - enum signal_action_requested_labels { SIGNAL_REQUESTS_BREAK = 0, SIGNAL_REQUESTS_CHECKPOINT = 1, SIGNAL_REQUESTS_MAX = 1 }; - //! Take a string and convert it to a corresponding signal number if possible static int parseSignalNameToNumber (const std::string &str); @@ -71,7 +75,7 @@ private: //! Boolean flags transmitted between CheckSignals() and //! HandleSignals() to indicate actions requested by signals - static bool signal_actions_requested[SIGNAL_REQUESTS_MAX+1]; + static bool signal_actions_requested[SIGNAL_REQUESTS_SIZE]; }; diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 32f6e8ea57e..4eaea56609f 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -155,7 +155,8 @@ SignalState::CheckSignals() #if defined(AMREX_USE_MPI) auto comm = amrex::ParallelDescriptor::Communicator(); - BL_MPI_REQUIRE(MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_MAX+1, MPI_CXX_BOOL, 0, comm, &signal_mpi_ibcast_request)); + BL_MPI_REQUIRE(MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_SIZE, + MPI_CXX_BOOL, 0, comm,&signal_mpi_ibcast_request)); #endif } From 37f076fc8330c39eaea1abdc94386d0cb3dad86c Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 28 Mar 2022 20:40:47 -0700 Subject: [PATCH 43/46] Generalize signal handling to an arbitrary set of potential actions --- Source/WarpX.cpp | 4 ++-- Source/ablastr/utils/SignalHandling.H | 8 +++---- Source/ablastr/utils/SignalHandling.cpp | 29 +++++++++++++++---------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index a1db83a2c08..184cb23ffeb 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -565,7 +565,7 @@ WarpX::ReadParameters () #if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { int sig = SignalState::parseSignalNameToNumber(str); - SignalState::signal_conf_requests_break[sig] = true; + SignalState::signal_conf_requests[SignalState::SIGNAL_REQUESTS_BREAK][sig] = true; } signals_in.clear(); #else @@ -593,7 +593,7 @@ WarpX::ReadParameters () #if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { int sig = SignalState::parseSignalNameToNumber(str); - SignalState::signal_conf_requests_checkpoint[sig] = true; + SignalState::signal_conf_requests[SignalState::SIGNAL_REQUESTS_CHECKPOINT][sig] = true; WARPX_ALWAYS_ASSERT_WITH_MESSAGE(have_checkpoint_diagnostic, "Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); } diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H index 8ef8a2c67b2..2897a4fc81f 100644 --- a/Source/ablastr/utils/SignalHandling.H +++ b/Source/ablastr/utils/SignalHandling.H @@ -37,15 +37,15 @@ public: //! Labels for indexed positions in signal_actions_requests enum signal_action_requested_labels { + //! Cleanly stop execution, as if the simulation reached its configured end SIGNAL_REQUESTS_BREAK = 0, + //! Produce a checkpoint SIGNAL_REQUESTS_CHECKPOINT = 1, SIGNAL_REQUESTS_SIZE = 2 // This should always be 1 greater than the last valid value }; - //! Whether configuration requests the code to break out of the timestep loop on a particular signal - static bool signal_conf_requests_break[NUM_SIGNALS]; - //! Whether configuration requests the code to checkpoint at this timestep on a particular signal - static bool signal_conf_requests_checkpoint[NUM_SIGNALS]; + //! Whether configuration requests the code take a particular action on a particular signal + static bool signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS]; //! Take a string and convert it to a corresponding signal number if possible static int parseSignalNameToNumber (const std::string &str); diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 4eaea56609f..44953135fb8 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -20,9 +20,8 @@ #endif std::atomic SignalState::signal_received_flags[NUM_SIGNALS]; -bool SignalState::signal_conf_requests_break[NUM_SIGNALS]; -bool SignalState::signal_conf_requests_checkpoint[NUM_SIGNALS]; -bool SignalState::signal_actions_requested[2]; +bool SignalState::signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS]; +bool SignalState::signal_actions_requested[SIGNAL_REQUESTS_SIZE]; #if defined(AMREX_USE_MPI) MPI_Request SignalState::signal_mpi_ibcast_request; #endif @@ -115,15 +114,20 @@ SignalState::InitSignalHandling() #if defined(__linux__) || defined(__APPLE__) struct sigaction sa; sigemptyset(&sa.sa_mask); - for (int i = 0; i < NUM_SIGNALS; ++i) { - signal_received_flags[i] = false; - if (signal_conf_requests_checkpoint[i] || signal_conf_requests_break[i]) { + for (int signal_number = 0; signal_number < NUM_SIGNALS; ++signal_number) { + signal_received_flags[signal_number] = false; + + bool signal_active = false; + for (int signal_request = 0; signal_request < SIGNAL_REQUESTS_SIZE; ++signal_request) { + signal_active |= signal_conf_requests[signal_request][signal_number]; + } + if (signal_active) { if (amrex::ParallelDescriptor::MyProc() == 0) { sa.sa_handler = &SignalState::SignalSetFlag; } else { sa.sa_handler = SIG_IGN; } - int result = sigaction(i, &sa, nullptr); + int result = sigaction(signal_number, &sa, nullptr); ABLASTR_ALWAYS_ASSERT_WITH_MESSAGE(result == 0, "Failed to install signal handler for a configured signal"); } @@ -138,17 +142,18 @@ SignalState::CheckSignals() // and may be delivered to other ranks as well. For coordination, // we process them according to when they're received by rank 0. if (amrex::ParallelDescriptor::MyProc() == 0) { - for (int i = 0; i < NUM_SIGNALS; ++i) { + for (int signal_number = 0; signal_number < NUM_SIGNALS; ++signal_number) { // Read into a local temporary to ensure the same value is // used throughout. Atomically exchange it with false to // unset the flag without risking loss of a signal - if a // signal arrives after this, it will be handled the next // time this function is called. - bool signal_i_received = signal_received_flags[i].exchange(false); + bool signal_received = signal_received_flags[signal_number].exchange(false); - if (signal_i_received) { - signal_actions_requested[SIGNAL_REQUESTS_BREAK] |= signal_conf_requests_break[i]; - signal_actions_requested[SIGNAL_REQUESTS_CHECKPOINT] |= signal_conf_requests_checkpoint[i]; + if (signal_received) { + for (int signal_request = 0; signal_request < SIGNAL_REQUESTS_SIZE; ++signal_request) { + signal_actions_requested[signal_request] |= signal_conf_requests[signal_request][signal_number]; + } } } } From 23a58f5a75314a2a3ff1be6cf3700bf2e6515449 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 28 Mar 2022 20:44:29 -0700 Subject: [PATCH 44/46] Rename class to match usage and file name --- Source/Evolve/WarpXEvolve.cpp | 8 ++++---- Source/WarpX.cpp | 10 +++++----- Source/ablastr/utils/SignalHandling.H | 5 ++++- Source/ablastr/utils/SignalHandling.cpp | 22 +++++++++++----------- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index a184d4fc372..c1a68d99dbe 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -356,7 +356,7 @@ WarpX::Evolve (int numsteps) << " s; Avg. per step = " << evolve_time/(step-step_begin+1) << " s\n"; } - if (cur_time >= stop_time - 1.e-3*dt[0] || SignalState::TestAndResetActionRequestFlag(SignalState::SIGNAL_REQUESTS_BREAK)) { + if (cur_time >= stop_time - 1.e-3*dt[0] || SignalHandling::TestAndResetActionRequestFlag(SignalHandling::SIGNAL_REQUESTS_BREAK)) { break; } @@ -939,17 +939,17 @@ WarpX::applyMirrors(Real time){ void WarpX::CheckSignals() { - SignalState::CheckSignals(); + SignalHandling::CheckSignals(); } void WarpX::HandleSignals() { - SignalState::WaitSignals(); + SignalHandling::WaitSignals(); // SIGNAL_REQUESTS_BREAK is handled directly in WarpX::Evolve - if (SignalState::TestAndResetActionRequestFlag(SignalState::SIGNAL_REQUESTS_CHECKPOINT)) { + if (SignalHandling::TestAndResetActionRequestFlag(SignalHandling::SIGNAL_REQUESTS_CHECKPOINT)) { multi_diags->FilterComputePackFlushLastTimestep( istep[0] ); } } diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 184cb23ffeb..a5ddbd24507 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -235,7 +235,7 @@ WarpX::WarpX () InitEB(); - SignalState::InitSignalHandling(); + SignalHandling::InitSignalHandling(); // Geometry on all levels has been defined already. // No valid BoxArray and DistributionMapping have been defined. @@ -564,8 +564,8 @@ WarpX::ReadParameters () #if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { - int sig = SignalState::parseSignalNameToNumber(str); - SignalState::signal_conf_requests[SignalState::SIGNAL_REQUESTS_BREAK][sig] = true; + int sig = SignalHandling::parseSignalNameToNumber(str); + SignalHandling::signal_conf_requests[SignalHandling::SIGNAL_REQUESTS_BREAK][sig] = true; } signals_in.clear(); #else @@ -592,8 +592,8 @@ WarpX::ReadParameters () pp_warpx.queryarr("checkpoint_signals", signals_in); #if defined(__linux__) || defined(__APPLE__) for (const std::string &str : signals_in) { - int sig = SignalState::parseSignalNameToNumber(str); - SignalState::signal_conf_requests[SignalState::SIGNAL_REQUESTS_CHECKPOINT][sig] = true; + int sig = SignalHandling::parseSignalNameToNumber(str); + SignalHandling::signal_conf_requests[SignalHandling::SIGNAL_REQUESTS_CHECKPOINT][sig] = true; WARPX_ALWAYS_ASSERT_WITH_MESSAGE(have_checkpoint_diagnostic, "Signal handling was requested to checkpoint, but no checkpoint diagnostic is configured"); } diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H index 2897a4fc81f..7e77aaa98cb 100644 --- a/Source/ablastr/utils/SignalHandling.H +++ b/Source/ablastr/utils/SignalHandling.H @@ -29,7 +29,7 @@ * Variables and functions are static rather than per-instance * because signal handlers are configured at the process level. */ -class SignalState +class SignalHandling { public: //! The range of signal values to accept @@ -76,6 +76,9 @@ private: //! Boolean flags transmitted between CheckSignals() and //! HandleSignals() to indicate actions requested by signals static bool signal_actions_requested[SIGNAL_REQUESTS_SIZE]; + + // Don't allow clients to incorrectly try to construct and use an instance of this type + SignalHandling() = delete; }; diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 44953135fb8..346fec5e447 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -19,15 +19,15 @@ #include #endif -std::atomic SignalState::signal_received_flags[NUM_SIGNALS]; -bool SignalState::signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS]; -bool SignalState::signal_actions_requested[SIGNAL_REQUESTS_SIZE]; +std::atomic SignalHandling::signal_received_flags[NUM_SIGNALS]; +bool SignalHandling::signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS]; +bool SignalHandling::signal_actions_requested[SIGNAL_REQUESTS_SIZE]; #if defined(AMREX_USE_MPI) -MPI_Request SignalState::signal_mpi_ibcast_request; +MPI_Request SignalHandling::signal_mpi_ibcast_request; #endif int -SignalState::parseSignalNameToNumber(const std::string &str) +SignalHandling::parseSignalNameToNumber(const std::string &str) { amrex::IParser signals_parser(str); @@ -109,7 +109,7 @@ SignalState::parseSignalNameToNumber(const std::string &str) } void -SignalState::InitSignalHandling() +SignalHandling::InitSignalHandling() { #if defined(__linux__) || defined(__APPLE__) struct sigaction sa; @@ -123,7 +123,7 @@ SignalState::InitSignalHandling() } if (signal_active) { if (amrex::ParallelDescriptor::MyProc() == 0) { - sa.sa_handler = &SignalState::SignalSetFlag; + sa.sa_handler = &SignalHandling::SignalSetFlag; } else { sa.sa_handler = SIG_IGN; } @@ -136,7 +136,7 @@ SignalState::InitSignalHandling() } void -SignalState::CheckSignals() +SignalHandling::CheckSignals() { // We assume that signals will definitely be delivered to rank 0, // and may be delivered to other ranks as well. For coordination, @@ -166,7 +166,7 @@ SignalState::CheckSignals() } void -SignalState::WaitSignals() +SignalHandling::WaitSignals() { #if defined(AMREX_USE_MPI) BL_MPI_REQUIRE(MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE)); @@ -174,7 +174,7 @@ SignalState::WaitSignals() } bool -SignalState::TestAndResetActionRequestFlag(int action_to_test) +SignalHandling::TestAndResetActionRequestFlag(int action_to_test) { bool retval = signal_actions_requested[action_to_test]; signal_actions_requested[action_to_test] = false; @@ -182,7 +182,7 @@ SignalState::TestAndResetActionRequestFlag(int action_to_test) } void -SignalState::SignalSetFlag(int signal_number) +SignalHandling::SignalSetFlag(int signal_number) { signal_received_flags[signal_number] = true; } From d408a5d0c2a4cbf8365e9e545c4b7b5f8145668d Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 29 Mar 2022 09:57:04 -0700 Subject: [PATCH 45/46] Stick stuff in ABLASTR namespace --- Source/Evolve/WarpXEvolve.cpp | 1 + Source/WarpX.cpp | 3 ++- Source/ablastr/utils/SignalHandling.H | 3 +++ Source/ablastr/utils/SignalHandling.cpp | 4 ++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index c1a68d99dbe..8919feabf15 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -55,6 +55,7 @@ #include using namespace amrex; +using ablastr::utils::SignalHandling; void WarpX::Evolve (int numsteps) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index a5ddbd24507..8788ca88d61 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -235,7 +235,7 @@ WarpX::WarpX () InitEB(); - SignalHandling::InitSignalHandling(); + ablastr::utils::SignalHandling::InitSignalHandling(); // Geometry on all levels has been defined already. // No valid BoxArray and DistributionMapping have been defined. @@ -559,6 +559,7 @@ WarpX::ReadParameters () } } + using ablastr::utils::SignalHandling; std::vector signals_in; pp_warpx.queryarr("break_signals", signals_in); diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H index 7e77aaa98cb..93ef71516b7 100644 --- a/Source/ablastr/utils/SignalHandling.H +++ b/Source/ablastr/utils/SignalHandling.H @@ -17,6 +17,8 @@ #include #include +namespace ablastr::utils { + /** * \brief * Signal handling @@ -81,5 +83,6 @@ private: SignalHandling() = delete; }; +} // namespace ablastr::utils #endif // ABLASTR_SIGNAL_HANDLING_H_ diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 346fec5e447..585a43d04de 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -19,6 +19,8 @@ #include #endif +namespace ablastr::utils { + std::atomic SignalHandling::signal_received_flags[NUM_SIGNALS]; bool SignalHandling::signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS]; bool SignalHandling::signal_actions_requested[SIGNAL_REQUESTS_SIZE]; @@ -186,3 +188,5 @@ SignalHandling::SignalSetFlag(int signal_number) { signal_received_flags[signal_number] = true; } + +} // namespace ablastr::utils From 5a4c5683260ca95f14dc2361e8bdbdd23d80f240 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 29 Mar 2022 09:58:00 -0700 Subject: [PATCH 46/46] Indent conditional includes as requested --- Source/ablastr/utils/SignalHandling.H | 2 +- Source/ablastr/utils/SignalHandling.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H index 93ef71516b7..b633c386077 100644 --- a/Source/ablastr/utils/SignalHandling.H +++ b/Source/ablastr/utils/SignalHandling.H @@ -11,7 +11,7 @@ #include #if defined(AMREX_USE_MPI) -#include +# include #endif #include diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp index 585a43d04de..cdec9b653cc 100644 --- a/Source/ablastr/utils/SignalHandling.cpp +++ b/Source/ablastr/utils/SignalHandling.cpp @@ -16,7 +16,7 @@ // For sigaction() et al. #if defined(__linux__) || defined(__APPLE__) -#include +# include #endif namespace ablastr::utils {