BUG: fix pytest hang/lock

Fixes darshan-hpc#851 * the testsuite now always uses `DarshanReport` with a context manager to avoid shenanigans with `__del__` and garbage collection/`pytest`/multiple threads * this appears to fix the problem with testsuite hangs described in darshan-hpcgh-839 and darshan-hpcgh-851; I pushed this commit into darshan-hpcgh-839 recently so if the CI there stops hanging with `3.10` on top of my local confirmation, hopefully we're good to go on this annoyance * if the fix is confirmed by the CI over there, I do suggest we encourage the use of `DarshanReport` with a context manager in our documentation--perhaps we could open an issue for doing that and maybe looking for cases in our source (beyond the tests) where we may also consider the switchover
tylerjereddy · Nov 30, 2022 · d430635 · d430635
1 parent e36053e
commit d430635
Show file tree

Hide file tree

Showing 9 changed files with 371 additions and 381 deletions.
diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py
@@ -57,10 +57,10 @@ def test_file_hash_type(log_path):
 
     # additionally check that the dataframes
     # generated are of the correct types
-    report = darshan.DarshanReport(log_path, read_all=True)
-    report.mod_read_all_records("POSIX", dtype="pandas")
-    rec_counters = report.records["POSIX"][0]["counters"]
-    rec_fcounters = report.records["POSIX"][0]["fcounters"]
+    with darshan.DarshanReport(log_path, read_all=True) as report:
+        report.mod_read_all_records("POSIX", dtype="pandas")
+        rec_counters = report.records["POSIX"][0]["counters"]
+        rec_fcounters = report.records["POSIX"][0]["fcounters"]
     # verify the records returned have the correct
     # data type for the ids/hashes
     assert rec_counters["id"].dtype == np.uint64

diff --git a/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py b/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py
diff --git a/darshan-util/pydarshan/darshan/tests/test_heatmap_handling.py b/darshan-util/pydarshan/darshan/tests/test_heatmap_handling.py
@@ -203,11 +203,11 @@ def test_get_rd_wr_dfs_no_write(dict_list_no_writes):
 def test_get_single_df_dict(expected_df_dict, ops):
     # regression test for `heatmap_handling.get_single_df_dict()`
 
-    report = darshan.DarshanReport(get_log_path("sample-dxt-simple.darshan"))
+    with darshan.DarshanReport(get_log_path("sample-dxt-simple.darshan")) as report:
 
-    actual_df_dict = heatmap_handling.get_single_df_dict(
-        report=report, mod="DXT_POSIX", ops=ops
-    )
+        actual_df_dict = heatmap_handling.get_single_df_dict(
+            report=report, mod="DXT_POSIX", ops=ops
+        )
 
     # make sure we get the same key(s) ("read", "write")
     assert actual_df_dict.keys() == expected_df_dict.keys()
@@ -300,24 +300,24 @@ def test_get_aggregate_data(log_file, expected_agg_data, mod, ops):
     # regression test for `heatmap_handling.get_aggregate_data()`
 
     log_file = get_log_path(log_file)
-    report = darshan.DarshanReport(log_file)
+    with darshan.DarshanReport(log_file) as report:
 
-    if ops == ["read"]:
-        expected_msg = (
-            "No data available for selected module\\(s\\) and operation\\(s\\)."
-        )
-        with pytest.raises(ValueError, match=expected_msg):
-            # expect an error because there are no read segments
-            # in sample-dxt-simple.darshan
+        if ops == ["read"]:
+            expected_msg = (
+                "No data available for selected module\\(s\\) and operation\\(s\\)."
+            )
+            with pytest.raises(ValueError, match=expected_msg):
+                # expect an error because there are no read segments
+                # in sample-dxt-simple.darshan
+                actual_agg_data = heatmap_handling.get_aggregate_data(
+                    report=report, mod=mod, ops=ops
+                )
+        else:
             actual_agg_data = heatmap_handling.get_aggregate_data(
                 report=report, mod=mod, ops=ops
             )
-    else:
-        actual_agg_data = heatmap_handling.get_aggregate_data(
-            report=report, mod=mod, ops=ops
-        )
-        # for other cases, make sure the value arrays are identically valued
-        assert_allclose(actual_agg_data.values, expected_agg_data)
+            # for other cases, make sure the value arrays are identically valued
+            assert_allclose(actual_agg_data.values, expected_agg_data)
 
 
 @pytest.mark.parametrize(
@@ -559,13 +559,13 @@ def test_get_heatmap_df(
 
     # generate the report and use it to obtain the aggregated data
     filepath = get_log_path(filepath)
-    report = darshan.DarshanReport(filepath)
-    agg_df = heatmap_handling.get_aggregate_data(
-        report=report, mod="DXT_POSIX", ops=ops
-    )
-    nprocs = report.metadata["job"]["nprocs"]
-    # run the aggregated data through the heatmap data code
-    actual_hmap_data = heatmap_handling.get_heatmap_df(agg_df=agg_df, xbins=xbins, nprocs=nprocs)
+    with darshan.DarshanReport(filepath) as report:
+        agg_df = heatmap_handling.get_aggregate_data(
+            report=report, mod="DXT_POSIX", ops=ops
+        )
+        nprocs = report.metadata["job"]["nprocs"]
+        # run the aggregated data through the heatmap data code
+        actual_hmap_data = heatmap_handling.get_heatmap_df(agg_df=agg_df, xbins=xbins, nprocs=nprocs)
 
     if "sample-dxt-simple.darshan" in filepath:
         # check the data is conserved

diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_common_access_table.py b/darshan-util/pydarshan/darshan/tests/test_plot_common_access_table.py
@@ -80,10 +80,10 @@
 def test_common_access_table(filename, mod, expected_df):
     log_path = get_log_path(filename=filename)
     expected_df.columns = ["Access Size", "Count"]
-    report = darshan.DarshanReport(log_path)
-    # collect the number of rows from the expected dataframe
-    n_rows = expected_df.shape[0]
-    actual_df = plot_common_access_table.plot_common_access_table(report=report, mod=mod, n_rows=n_rows).df
+    with darshan.DarshanReport(log_path) as report:
+        # collect the number of rows from the expected dataframe
+        n_rows = expected_df.shape[0]
+        actual_df = plot_common_access_table.plot_common_access_table(report=report, mod=mod, n_rows=n_rows).df
     assert_frame_equal(actual_df, expected_df)
 
 

diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_dxt_heatmap.py b/darshan-util/pydarshan/darshan/tests/test_plot_dxt_heatmap.py
@@ -103,11 +103,11 @@ def test_set_x_axis_ticks_and_labels(
     else:
         filepath = get_log_path(filepath)
         # for all other data sets just load the data from the log file
-        report = darshan.DarshanReport(filepath)
-        agg_df = heatmap_handling.get_aggregate_data(
-            report=report, mod="DXT_POSIX", ops=["read", "write"]
-        )
-        runtime = report.metadata["job"]["run_time"]
+        with darshan.DarshanReport(filepath) as report:
+            agg_df = heatmap_handling.get_aggregate_data(
+                report=report, mod="DXT_POSIX", ops=["read", "write"]
+            )
+            runtime = report.metadata["job"]["run_time"]
 
     tmax_dxt = float(agg_df["end_time"].max())
 
@@ -251,34 +251,34 @@ def test_set_y_axis_ticks_and_labels(
     filepath = get_log_path(filepath)
 
     # load the report and generate the aggregate data dataframe
-    report = darshan.DarshanReport(filepath)
-    agg_df = heatmap_handling.get_aggregate_data(
-        report=report, mod="DXT_POSIX", ops=["read", "write"]
-    )
+    with darshan.DarshanReport(filepath) as report:
+        agg_df = heatmap_handling.get_aggregate_data(
+            report=report, mod="DXT_POSIX", ops=["read", "write"]
+        )
 
-    # x-axis bins are arbitrary
-    xbins = 100
-    nprocs = report.metadata["job"]["nprocs"]
+        # x-axis bins are arbitrary
+        xbins = 100
+        nprocs = report.metadata["job"]["nprocs"]
 
-    # generate the heatmap data
-    data = heatmap_handling.get_heatmap_df(agg_df=agg_df, xbins=xbins, nprocs=nprocs)
+        # generate the heatmap data
+        data = heatmap_handling.get_heatmap_df(agg_df=agg_df, xbins=xbins, nprocs=nprocs)
 
-    # generate a joint plot object, then add the heatmap to it
-    jointgrid = sns.jointplot(kind="hist", bins=[xbins, nprocs])
-    sns.heatmap(data, ax=jointgrid.ax_joint)
+        # generate a joint plot object, then add the heatmap to it
+        jointgrid = sns.jointplot(kind="hist", bins=[xbins, nprocs])
+        sns.heatmap(data, ax=jointgrid.ax_joint)
 
-    # set the x-axis ticks and tick labels
-    plot_dxt_heatmap.set_y_axis_ticks_and_labels(
-        jointgrid=jointgrid, n_ylabels=n_ylabels
-    )
+        # set the x-axis ticks and tick labels
+        plot_dxt_heatmap.set_y_axis_ticks_and_labels(
+            jointgrid=jointgrid, n_ylabels=n_ylabels
+        )
 
-    # collect the actual x-axis tick labels
-    actual_yticks = jointgrid.ax_joint.get_yticks()
-    actual_yticklabels = [tl.get_text() for tl in jointgrid.ax_joint.get_yticklabels()]
-    actual_yticklabels = np.asarray(actual_yticklabels, dtype=float)
+        # collect the actual x-axis tick labels
+        actual_yticks = jointgrid.ax_joint.get_yticks()
+        actual_yticklabels = [tl.get_text() for tl in jointgrid.ax_joint.get_yticklabels()]
+        actual_yticklabels = np.asarray(actual_yticklabels, dtype=float)
 
-    # make sure the figure object gets closed
-    plt.close()
+        # make sure the figure object gets closed
+        plt.close()
 
     # verify the actual ticks/labels match the expected
     assert_allclose(actual_yticks, expected_yticks, atol=1e-14, rtol=1e-17)
@@ -298,11 +298,11 @@ def test_remove_marginal_graph_ticks_and_labels(filepath):
     # not have any x/y tick labels or frames
 
     filepath = get_log_path(filepath)
-    report = darshan.DarshanReport(filepath)
+    with darshan.DarshanReport(filepath) as report:
 
-    jgrid = plot_dxt_heatmap.plot_heatmap(
-        report=report, mod="DXT_POSIX", ops=["read", "write"], xbins=100
-    )
+        jgrid = plot_dxt_heatmap.plot_heatmap(
+            report=report, mod="DXT_POSIX", ops=["read", "write"], xbins=100
+        )
 
     # verify the heatmap axis is on
     assert jgrid.ax_joint.axison
@@ -332,9 +332,9 @@ def test_adjust_for_colorbar(filepath):
     # regression test for `plot_dxt_heatmap.adjust_for_colorbar()`
 
     filepath = get_log_path(filepath)
-    report = darshan.DarshanReport(filepath)
+    with darshan.DarshanReport(filepath) as report:
 
-    jgrid = plot_dxt_heatmap.plot_heatmap(report=report)
+        jgrid = plot_dxt_heatmap.plot_heatmap(report=report)
 
     # the plot positions change based on the number of unique ranks.
     # If there is only 1 rank, there is no horizontal bar graph
@@ -413,56 +413,56 @@ def test_plot_heatmap(filepath, mod, ops):
     # test the primary plotting function, `plot_dxt_heatmap.plot_heatmap()`
 
     filepath = get_log_path(filepath)
-    report = darshan.DarshanReport(filepath)
-
-    if mod == "POSIX":
-        with pytest.raises(NotImplementedError, match="Only DXT and HEATMAP modules are supported."):
-            plot_dxt_heatmap.plot_heatmap(report=report, mod=mod)
-    elif ("dxt.darshan" in filepath) & (mod == "DXT_MPIIO"):
-        # if the input module is not "DXT_POSIX" check
-        # that we raise the appropriate error
-        with pytest.raises(ValueError, match="DXT_MPIIO not found in"):
-            jgrid = plot_dxt_heatmap.plot_heatmap(
-                report=report, mod=mod, ops=ops, xbins=100
+    with darshan.DarshanReport(filepath) as report:
+
+        if mod == "POSIX":
+            with pytest.raises(NotImplementedError, match="Only DXT and HEATMAP modules are supported."):
+                plot_dxt_heatmap.plot_heatmap(report=report, mod=mod)
+        elif ("dxt.darshan" in filepath) & (mod == "DXT_MPIIO"):
+            # if the input module is not "DXT_POSIX" check
+            # that we raise the appropriate error
+            with pytest.raises(ValueError, match="DXT_MPIIO not found in"):
+                jgrid = plot_dxt_heatmap.plot_heatmap(
+                    report=report, mod=mod, ops=ops, xbins=100
+                )
+        elif ("sample-dxt-simple.darshan" in filepath) & (ops == ["read"]):
+            # this log file is known to not have any read data, so
+            # make sure we raise a ValueError here
+            expected_msg = (
+                "No data available for selected module\\(s\\) and operation\\(s\\)."
             )
-    elif ("sample-dxt-simple.darshan" in filepath) & (ops == ["read"]):
-        # this log file is known to not have any read data, so
-        # make sure we raise a ValueError here
-        expected_msg = (
-            "No data available for selected module\\(s\\) and operation\\(s\\)."
-        )
-        with pytest.raises(ValueError, match=expected_msg):
+            with pytest.raises(ValueError, match=expected_msg):
+                jgrid = plot_dxt_heatmap.plot_heatmap(
+                    report=report, mod=mod, ops=ops, xbins=100
+                )
+        else:
             jgrid = plot_dxt_heatmap.plot_heatmap(
                 report=report, mod=mod, ops=ops, xbins=100
             )
-    else:
-        jgrid = plot_dxt_heatmap.plot_heatmap(
-            report=report, mod=mod, ops=ops, xbins=100
-        )
-
-        # verify the margins for all plots
-        assert jgrid.ax_joint.margins() == (0.05, 0.05)
-        assert jgrid.ax_marg_x.margins() == (0.05, 0.05)
-        assert jgrid.ax_marg_y.margins() == (0.05, 0.05)
-
-        # ensure the heatmap spines are all visible
-        for _, spine in jgrid.ax_joint.spines.items():
-            assert spine.get_visible()
-
-        # for single-rank files, check that the
-        # horizontal bar graph does not exist
-        assert jgrid.ax_marg_x.has_data()
-        assert jgrid.ax_joint.has_data()
-        if "dxt.darshan" in filepath:
-            # verify the horizontal bar graph does not contain data since there
-            # is only 1 rank for this case
-            assert not jgrid.ax_marg_y.has_data()
-        else:
-            # verify the horizontal bar graph contains data for multirank cases
-            assert jgrid.ax_marg_y.has_data()
 
-        # check that the axis labels are as expected
-        assert jgrid.ax_joint.get_xlabel() == "Time (s)"
-        assert jgrid.ax_joint.get_ylabel() == "Rank"
+            # verify the margins for all plots
+            assert jgrid.ax_joint.margins() == (0.05, 0.05)
+            assert jgrid.ax_marg_x.margins() == (0.05, 0.05)
+            assert jgrid.ax_marg_y.margins() == (0.05, 0.05)
+
+            # ensure the heatmap spines are all visible
+            for _, spine in jgrid.ax_joint.spines.items():
+                assert spine.get_visible()
+
+            # for single-rank files, check that the
+            # horizontal bar graph does not exist
+            assert jgrid.ax_marg_x.has_data()
+            assert jgrid.ax_joint.has_data()
+            if "dxt.darshan" in filepath:
+                # verify the horizontal bar graph does not contain data since there
+                # is only 1 rank for this case
+                assert not jgrid.ax_marg_y.has_data()
+            else:
+                # verify the horizontal bar graph contains data for multirank cases
+                assert jgrid.ax_marg_y.has_data()
+
+            # check that the axis labels are as expected
+            assert jgrid.ax_joint.get_xlabel() == "Time (s)"
+            assert jgrid.ax_joint.get_ylabel() == "Rank"
 
     plt.close()
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
@@ -146,9 +146,9 @@ def test_xticks_and_labels(log_path, func, expected_xticklabels, mod):
     # check the x-axis tick mark locations and
     # labels
     log_path = get_log_path(log_path)
-    report = darshan.DarshanReport(log_path)
+    with darshan.DarshanReport(log_path) as report:
 
-    fig = func(report=report, mod=mod)
+        fig = func(report=report, mod=mod)
 
     # retrieve the x-axis tick mark locations and labels
     # from the output figure object
@@ -382,10 +382,10 @@ def test_xticks_and_labels(log_path, func, expected_xticklabels, mod):
 def test_bar_heights(filename, mod, fig_func, expected_heights):
     # check bar graph heights
     log_path = get_log_path(filename)
-    report = darshan.DarshanReport(log_path)
-    fig, ax = plt.subplots()
+    with darshan.DarshanReport(log_path) as report:
+        fig, ax = plt.subplots()
 
-    fig_func(report=report, mod=mod, ax=ax)
+        fig_func(report=report, mod=mod, ax=ax)
 
     # retrieve the bar graph heights
     actual_heights = []

diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py
@@ -58,8 +58,8 @@
 )
 def test_get_io_cost_df(logname, expected_df):
     # regression test for `plot_io_cost.get_io_cost_df()`
-    report = darshan.DarshanReport(get_log_path(logname))
-    actual_df = get_io_cost_df(report=report)
+    with darshan.DarshanReport(get_log_path(logname)) as report:
+        actual_df = get_io_cost_df(report=report)
     assert_frame_equal(actual_df, expected_df)
 
 
@@ -90,8 +90,8 @@ def test_get_io_cost_df(logname, expected_df):
 def test_plot_io_cost_ylims(logname, expected_ylims):
     # test the y limits for both axes for the IO cost stacked bar graph
 
-    report = darshan.DarshanReport(get_log_path(logname))
-    fig = plot_io_cost(report=report)
+    with darshan.DarshanReport(get_log_path(logname)) as report:
+        fig = plot_io_cost(report=report)
     for i, ax in enumerate(fig.axes):
         # there are only 2 axes, the first being the "raw" data
         # and the second being the normalized data (percent)
@@ -122,8 +122,8 @@ def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks):
     expected_yticklabels = [str(i) for i in expected_yticks]
 
     logpath = get_log_path(logname)
-    report = darshan.DarshanReport(logpath)
-    fig = plot_io_cost(report=report)
+    with darshan.DarshanReport(logpath) as report:
+        fig = plot_io_cost(report=report)
     for i, ax in enumerate(fig.axes):
         # there are only 2 axes, the first being the "raw" data
         # and the second being the normalized data (percent)
@@ -227,8 +227,8 @@ def test_issue_590(filename, expected_df):
     # regression test for issue #590
     # see: https://github.com/darshan-hpc/darshan/issues/590
     log_path = get_log_path(filename)
-    report = darshan.DarshanReport(log_path)
-    actual_df = get_io_cost_df(report=report)
+    with darshan.DarshanReport(log_path) as report:
+        actual_df = get_io_cost_df(report=report)
     assert_frame_equal(actual_df, expected_df)