From 255b6236d564db01c5d9fe22d1f247cd90b8b25e Mon Sep 17 00:00:00 2001 From: Evan Goetz Date: Mon, 12 Jun 2023 14:59:33 -0700 Subject: [PATCH] Added more robust archive file corruption handling This PR will add an additional check when reading archive hdf5 files so that each group is read once to check that there are no runtime errors due to file corruption in the group --- gwsumm/archive.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gwsumm/archive.py b/gwsumm/archive.py index 80545f5b..d340a138 100644 --- a/gwsumm/archive.py +++ b/gwsumm/archive.py @@ -200,6 +200,19 @@ def read_data_archive(sourcefile, rm_source_on_fail=True): with File(sourcefile, 'r') as h5file: + # Make sure that each part of the archive file is not corrupted by + # trying to read the data. If any part is broken, delete the file and + # return without loading anything into the gwsumm.globalv variables + try: + # simple lambda function here to do nothing but visit each item + h5file.visititems(lambda name, obj: None) + except RuntimeError as exc: + if not rm_source_on_fail: + raise + warnings.warn(f"failed to read {sourcefile} [{exc}], removing...") + os.remove(sourcefile) + return + # -- channels --------------------------- try: