From a46ad0b8117d708064b7e031a3711d43f31da9e9 Mon Sep 17 00:00:00 2001 From: john-corcoran <24858700+john-corcoran@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:11:08 +0100 Subject: [PATCH] Exception handling for file permissions and file not found events --- ia_downloader.py | 62 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/ia_downloader.py b/ia_downloader.py index 4cd0fff..d92c875 100644 --- a/ia_downloader.py +++ b/ia_downloader.py @@ -133,8 +133,18 @@ def bytes_filesize_to_readable_str(bytes_filesize: int) -> str: def file_paths_in_folder(folder_path: str) -> typing.List[str]: """Return sorted list of paths of files at a directory (and its subdirectories)""" + log = logging.getLogger(__name__) + + def walk_error(os_error: OSError) -> None: + """Log any errors occurring during os.walk""" + log.warning( + "'%s' could not be accessed during folder scanning - any contents will not be" + " processed. Try running script as admin", + os_error.filename, + ) + file_paths = [] - for root, _, file_names in os.walk(folder_path): + for root, _, file_names in os.walk(folder_path, onerror=walk_error): for name in file_names: file_paths.append(os.path.join(root, name)) return sorted(file_paths) @@ -172,6 +182,7 @@ def get_metadata_from_files_in_folder( relative_paths_from_ia_metadata: typing.Optional[typing.List[str]] = None, ) -> typing.Dict[str, str]: """Return dict of file paths and metadata of files at a directory (and its subdirectories)""" + log = logging.getLogger(__name__) results = {} # type: typing.Dict[str, str] if relative_paths_from_ia_metadata is not None: file_paths = [ @@ -182,17 +193,33 @@ def get_metadata_from_files_in_folder( file_paths = file_paths_in_folder(folder_path) if hash_flag: for file_path in tqdm.tqdm(file_paths): - if os.path.isfile(file_path): - md5 = md5_hash_file(file_path) - results[ - os.path.normpath(os.path.relpath(file_path, folder_path)) - ] = md5.lower().strip() + if os.path.isfile(file_path): # We will alert on this elsewhere if the file isn't found + try: + md5 = md5_hash_file(file_path) + results[ + os.path.normpath(os.path.relpath(file_path, folder_path)) + ] = md5.lower().strip() + except (PermissionError, OSError): + log.warning( + "PermissionError/OSError occurred when accessing file '%s' - try running " + "script as admin", + file_path, + ) else: # Return file sizes if we're not checking hash values for file_path in file_paths: - if os.path.isfile(file_path): - file_size = os.path.getsize(file_path) - results[os.path.normpath(os.path.relpath(file_path, folder_path))] = str(file_size) + if os.path.isfile(file_path): # We will alert on this elsewhere if the file isn't found + try: + file_size = os.path.getsize(file_path) + results[os.path.normpath(os.path.relpath(file_path, folder_path))] = str( + file_size + ) + except (PermissionError, OSError): + log.warning( + "PermissionError/OSError occurred when accessing file '%s' - try running " + "script as admin", + file_path, + ) return results @@ -222,7 +249,22 @@ def check_hash(file_path: str, md5_value_from_ia: str) -> typing.Tuple[str, str] hash check of a file """ - md5_value_local = md5_hash_file(file_path) + try: + md5_value_local = md5_hash_file(file_path) + except FileNotFoundError: + return ( + "warning", + "'{}' file seems to have been deleted before hashing could complete".format( + os.path.basename(file_path) + ), + ) + except (PermissionError, OSError): + return ( + "warning", + "PermissionError/OSError when attempting to hash '{}'".format( + os.path.basename(file_path) + ), + ) if md5_value_local.lower().strip() == md5_value_from_ia.lower().strip(): return ( "debug",