From 11d5ca4c817c929e358b8ca77f1973510fb75c76 Mon Sep 17 00:00:00 2001 From: Charles Coulombe Date: Tue, 29 Oct 2024 20:15:27 -0400 Subject: [PATCH] [train][tune] Safely check if the storage filesystem is `pyarrow.fs.S3FileSystem` (#48216) ## Why are these changes needed? When PyArrow does not have support for S3, accessing `pyarrow.fs.S3FileSystem` raises an exception. Fix by checking if the `S3FileSystem` is importable before doing this typecheck. --------- Signed-off-by: Charles Coulombe Signed-off-by: Charles Coulombe --- python/ray/train/_internal/storage.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/ray/train/_internal/storage.py b/python/ray/train/_internal/storage.py index 81f8541692f3..05970988862e 100644 --- a/python/ray/train/_internal/storage.py +++ b/python/ray/train/_internal/storage.py @@ -19,6 +19,12 @@ "pyarrow is a required dependency of Ray Train and Ray Tune. " "Please install with: `pip install pyarrow`" ) from e + +try: + # check if Arrow has S3 support + from pyarrow.fs import S3FileSystem +except ImportError: + S3FileSystem = None # isort: on import fnmatch @@ -98,7 +104,7 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): def _pyarrow_fs_copy_files( source, destination, source_filesystem=None, destination_filesystem=None, **kwargs ): - if isinstance(destination_filesystem, pyarrow.fs.S3FileSystem): + if S3FileSystem and isinstance(destination_filesystem, pyarrow.fs.S3FileSystem): # Workaround multi-threading issue with pyarrow. Note that use_threads=True # is safe for download, just not for uploads, see: # https://github.com/apache/arrow/issues/32372