From b4be20b16dfac0ced11e462433466a81fdb5d342 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Mon, 18 May 2020 14:12:53 -0400 Subject: [PATCH 1/3] No reruns, SST diagnostics added --- scripts/dashboard/adios_common.cmake | 2 +- source/adios2/toolkit/sst/cp/cp_reader.c | 1 + source/adios2/toolkit/sst/dp/evpath_dp.c | 16 ++++++++++++++++ .../engine/staging-common/run_test.py.gen.in | 3 +-- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/scripts/dashboard/adios_common.cmake b/scripts/dashboard/adios_common.cmake index 0b7a0a236b..51298730bd 100644 --- a/scripts/dashboard/adios_common.cmake +++ b/scripts/dashboard/adios_common.cmake @@ -85,7 +85,7 @@ if(NOT dashboard_model) endif() if(NOT DEFINED ADIOS_TEST_REPEAT) - set(ADIOS_TEST_REPEAT 5) + set(ADIOS_TEST_REPEAT 0) endif() if((CMAKE_VERSION VERSION_GREATER 3.16.20191201 ) AND diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index 53869154b8..e9acf3bce3 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -21,6 +21,7 @@ #include "cp_internal.h" #define gettid() pthread_self() +#define MUTEX_DEBUG #ifdef MUTEX_DEBUG #define STREAM_MUTEX_LOCK(Stream) \ { \ diff --git a/source/adios2/toolkit/sst/dp/evpath_dp.c b/source/adios2/toolkit/sst/dp/evpath_dp.c index b0b4f8ff01..80a5c7022a 100644 --- a/source/adios2/toolkit/sst/dp/evpath_dp.c +++ b/source/adios2/toolkit/sst/dp/evpath_dp.c @@ -605,6 +605,11 @@ static void EvpathPreloadHandler(CManager cm, CMConnection conn, void *msg_v, } Requests = Next; } + Svcs->verbose(RS_Stream->CP_Stream, + "Done processing preload message from writer rank %d for " + "timestep %ld\n", + PreloadMsg->WriterRank, PreloadMsg->Timestep); + pthread_mutex_unlock(&RS_Stream->DataLock); return; } @@ -912,6 +917,11 @@ static void *EvpathReadRemoteMemory(CP_Services Svcs, DP_RS_Stream Stream_v, int HadPreload; static long LastRequestedTimestep = -1; + Svcs->verbose(Stream->CP_Stream, + "EVPATH read request Timestep %d from Rank %d, WSR_Stream = " + "%p, DP_TimestepInfo %p\n", + Timestep, Rank, Stream->WriterContactInfo[Rank].WS_Stream, + DP_TimestepInfo); pthread_mutex_lock(&Stream->DataLock); if ((LastRequestedTimestep != -1) && (LastRequestedTimestep != Timestep)) { @@ -936,6 +946,8 @@ static void *EvpathReadRemoteMemory(CP_Services Svcs, DP_RS_Stream Stream_v, ret->CMcondition = -1; Stream->ReadRequestsFromPreload++; pthread_mutex_unlock(&Stream->DataLock); + Svcs->verbose(Stream->CP_Stream, "EVPATH satisfied with preload\n"); + return ret; } @@ -1173,6 +1185,10 @@ static void SendPreloadMsgs(CP_Services Svcs, Evpath_WSR_Stream WSR_Stream, if (WSR_Stream->ReaderRequestArray[i]) { PreloadMsg.RS_Stream = WSR_Stream->ReaderContactInfo[i].RS_Stream; + Svcs->verbose( + WS_Stream->CP_Stream, + "EVPATH Preload message for timestep %ld, going to rank %d\n", + TS->Timestep); CMwrite(WSR_Stream->ReaderContactInfo[i].Conn, WS_Stream->PreloadFormat, &PreloadMsg); } diff --git a/testing/adios2/engine/staging-common/run_test.py.gen.in b/testing/adios2/engine/staging-common/run_test.py.gen.in index f3bafae29c..f9ca5f814e 100755 --- a/testing/adios2/engine/staging-common/run_test.py.gen.in +++ b/testing/adios2/engine/staging-common/run_test.py.gen.in @@ -64,8 +64,7 @@ def do_simple_test(writer_cmd, reader_cmd, reader_delay, file_test): return_code = 0 -# if "--lock_geometry" in writer_cmd: -# my_env["SstVerbose"] = "1" + my_env["SstVerbose"] = "1" print("TestDriver: Doing simple with file_test = " + str(file_test)) sys.stdout.flush() From 4907f4bb66dda9fc96678916302e192e0e8d56ca Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Mon, 18 May 2020 18:33:31 -0400 Subject: [PATCH 2/3] remove mutex_debug in the hopes of getting heisen failures again --- source/adios2/toolkit/sst/cp/cp_reader.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index e9acf3bce3..53869154b8 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -21,7 +21,6 @@ #include "cp_internal.h" #define gettid() pthread_self() -#define MUTEX_DEBUG #ifdef MUTEX_DEBUG #define STREAM_MUTEX_LOCK(Stream) \ { \ From 7a8b069a4e43e0ab836bb0b4173ff007438a3673 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Tue, 19 May 2020 14:16:54 -0400 Subject: [PATCH 3/3] Re-add LockGeometry test for SST --- testing/adios2/engine/staging-common/CMakeLists.txt | 5 +---- testing/adios2/engine/staging-common/TestSupp.cmake | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index e66a6e0675..9c6c75fea0 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -91,7 +91,7 @@ if(ADIOS2_HAVE_MPI AND MPIEXEC_EXECUTABLE) endforeach() endif() -set (SIMPLE_TESTS "1x1;NoReaderNoWait;TimeoutOnOpen;1x1.NoData;1x1.Modes;1x1.Attrs;1x1.Local;1x1.SharedNothing;1x1.SharedIO;1x1.SharedVar;1x1.SharedNothingSync;1x1.SharedIOSync;1x1.SharedVarSync") +set (SIMPLE_TESTS "1x1;NoReaderNoWait;TimeoutOnOpen;1x1.NoData;1x1.Modes;1x1.Attrs;1x1.Local;1x1.SharedNothing;1x1.SharedIO;1x1.SharedVar;1x1.SharedNothingSync;1x1.SharedIOSync;1x1.SharedVarSync;1x1LockGeometry") set (SIMPLE_FORTRAN_TESTS "") if(ADIOS2_HAVE_Fortran) @@ -147,9 +147,6 @@ LIST (APPEND SST_TESTS ${FFS_SST_TESTS} ${BP_SST_TESTS}) # Zero Data tests are unreliable with SST and BP marshaling list (FILTER SST_TESTS EXCLUDE REGEX "2x1ZeroData.*BP") -# Temporarily exclude SST LockGeometry tests -list (FILTER SST_TESTS EXCLUDE REGEX "LockGeometry") - foreach(test ${SST_TESTS}) add_common_test(${test} SST) endforeach() diff --git a/testing/adios2/engine/staging-common/TestSupp.cmake b/testing/adios2/engine/staging-common/TestSupp.cmake index c2470a6b0f..7184cc6236 100644 --- a/testing/adios2/engine/staging-common/TestSupp.cmake +++ b/testing/adios2/engine/staging-common/TestSupp.cmake @@ -69,7 +69,7 @@ set (2x2.NoData_CMD "run_test.py.$ -nw 2 -nr 2 --warg=--no_data --rarg=- set (2x2.HalfNoData_CMD "run_test.py.$ -nw 2 -nr 2 --warg=--no_data --warg=--no_data_node --warg=1 --rarg=--no_data --rarg=--no_data_node --rarg=1" ) set (1x1.ForcePreload_CMD "run_test.py.$ -nw 1 -nr 1 --rarg=PreloadMode=SstPreloadOn,RENGINE_PARAMS") set (1x1Bulk_CMD "run_test.py.$ -nw 1 -nr 1 --warg=--nx --warg=10000 --warg=--num_steps --warg=101 --rarg=--num_steps --rarg=101") -set (1x1BulkLockGeometry_CMD "run_test.py.$ -nw 1 -nr 1 --warg=--num_steps --warg=101 --warg=--nx --warg=10000 --rarg=--num_steps --rarg=101 --warg=--lock_geometry --rarg=--lock_geometry") +set (1x1LockGeometry_CMD "run_test.py.$ -nw 1 -nr 1 --warg=--num_steps --warg=101 --warg=--nx --warg=50 --rarg=--num_steps --rarg=101 --warg=--lock_geometry --rarg=--lock_geometry") set (2x1_CMD "run_test.py.$ -nw 2 -nr 1") set (2x1ZeroDataVar_CMD "run_test.py.$ -nw 2 -nr 1 --warg=--zero_data_var") set (2x1ZeroDataR64_CMD "run_test.py.$ -nw 2 -nr 1 -r $ --warg=--zero_data_var")