From 31db3d5bb1e371e7b78ede77f772dc04e5bff19a Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 8 Sep 2021 16:38:28 +0200 Subject: [PATCH 01/41] Add PrefixFileSystem The PrefixFileSystem is a filesystem-wrapper. It assumes every path it is dealing with is relative to the `prefix`. After performing the necessary paths operation it delegates everything to the wrapped filesystem. Resolves #395 --- fsspec/implementations/prefix.py | 135 ++++++++++++++++++++ fsspec/implementations/tests/test_prefix.py | 135 ++++++++++++++++++++ 2 files changed, 270 insertions(+) create mode 100644 fsspec/implementations/prefix.py create mode 100644 fsspec/implementations/tests/test_prefix.py diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py new file mode 100644 index 000000000..3515e592f --- /dev/null +++ b/fsspec/implementations/prefix.py @@ -0,0 +1,135 @@ +import os +from pathlib import Path +from typing import Any, Iterable, Sequence, Union + +import fsspec +from fsspec import AbstractFileSystem +from fsspec.core import split_protocol +from fsspec.utils import stringify_path + + +class PrefixFileSystem(AbstractFileSystem): + def __init__( + self, + prefix: str, + filesystem: fsspec.AbstractFileSystem, + *args, + **storage_options, + ) -> None: + super().__init__(*args, **storage_options) + self.prefix = prefix + self.filesystem = filesystem + + def _add_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: + if isinstance(path, (str, Path)): + path = stringify_path(path) + protocol, path = split_protocol(path) + path = os.path.join(self.prefix, path) + return protocol + "://" + path if protocol is not None else path + elif isinstance(path, Iterable): + return [self._add_fs_prefix(x) for x in path] + assert False + + def _remove_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: + if isinstance(path, (str, Path)): + path = stringify_path(path) + protocol, path = split_protocol(path) + path = os.path.relpath(path, start=self.prefix) + return protocol + "://" + path if protocol is not None else path + elif isinstance(path, Iterable): + return [self._remove_fs_prefix(x) for x in path] + assert False + + def mkdir(self, path: str, create_parents: bool = True, **kwargs) -> None: + path = self._add_fs_prefix(path) + return self.filesystem.mkdir(path=path, create_parents=create_parents, **kwargs) + + def makedirs(self, path: str, exist_ok: bool = False): + path = self._add_fs_prefix(path) + return self.filesystem.mkdirs(path=path, exist_ok=exist_ok) + + def rmdir(self, path: str): + path = self._add_fs_prefix(path) + return self.filesystem.rmdir(path=path) + + def ls( + self, + path: str, + detail=False, + **kwargs, + ) -> Sequence[str]: + path = self._add_fs_prefix(path) + ls_out = self.filesystem.ls(path=path, detail=detail, **kwargs) + if detail: + for out in ls_out: + out["name"] = self._remove_fs_prefix(out["name"]) + return ls_out + return self._remove_fs_prefix(ls_out) + + def glob(self, path: str, **kwargs): + path = self._add_fs_prefix(path) + glob_out = self.filesystem.glob(path=path, **kwargs) + return [self._remove_fs_prefix(x) for x in glob_out] + + def info(self, path: str, **kwargs): + path = self._add_fs_prefix(path) + return self.filesystem.info(path=path, **kwargs) + + def cp_file(self, path1: str, path2: str, **kwargs): + path1 = self._add_fs_prefix(path1) + path2 = self._add_fs_prefix(path2) + return self.filesystem.cp_file(path1, path2, **kwargs) + + def get_file(self, path1: str, path2: str, callback=None, **kwargs): + path1 = self._add_fs_prefix(path1) + path2 = self._add_fs_prefix(path2) + return self.filesystem.get_file(path1, path2, callback, **kwargs) + + def put_file(self, path1: str, path2: str, callback=None, **kwargs): + path1 = self._add_fs_prefix(path1) + path2 = self._add_fs_prefix(path2) + return self.filesystem.put_file(path1, path2, callback, **kwargs) + + def mv_file(self, path1: str, path2: str, **kwargs): + path1 = self._add_fs_prefix(path1) + path2 = self._add_fs_prefix(path2) + return self.filesystem.mv_file(path1, path2, **kwargs) + + def rm_file(self, path: str): + path = self._add_fs_prefix(path) + return self.filesystem.rm_file(path) + + def rm(self, path: str, recursive=False, maxdepth=None): + path = self._add_fs_prefix(path) + return self.filesystem.rm(path, recursive=recursive, maxdepth=maxdepth) + + def touch(self, path: str, **kwargs): + path = self._add_fs_prefix(path) + return self.filesystem.touch(path, **kwargs) + + def created(self, path: str): + path = self._add_fs_prefix(path) + return self.filesystem.created(path) + + def modified(self, path: str): + path = self._add_fs_prefix(path) + return self.filesystem.modified(path) + + def sign(self, path: str, expiration=100, **kwargs): + path = self._add_fs_prefix(path) + return self.filesystem.sign(path, expiration=100, **kwargs) + + def cat( + self, + path: str, + recursive: bool = False, + on_error: str = "raise", + **kwargs: Any, + ): + path = self._add_fs_prefix(path) + return self.filesystem.cat( + path, recursive=recursive, on_error=on_error, **kwargs + ) + + def __repr__(self) -> str: + return f"{self.__class__.__qualname__}(prefix='{self.prefix}', filesystem={self.filesystem})" diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py new file mode 100644 index 000000000..095f470df --- /dev/null +++ b/fsspec/implementations/tests/test_prefix.py @@ -0,0 +1,135 @@ +from __future__ import absolute_import, division, print_function + +import os +import os.path +import tempfile +from contextlib import contextmanager + +import pytest + +import fsspec +from fsspec.core import OpenFile +from fsspec.implementations.local import make_path_posix +from fsspec.implementations.prefix import PrefixFileSystem + +files = { + ".test.accounts.1.json": ( + b'{"amount": 100, "name": "Alice"}\n' + b'{"amount": 200, "name": "Bob"}\n' + b'{"amount": 300, "name": "Charlie"}\n' + b'{"amount": 400, "name": "Dennis"}\n' + ), + ".test.accounts.2.json": ( + b'{"amount": 500, "name": "Alice"}\n' + b'{"amount": 600, "name": "Bob"}\n' + b'{"amount": 700, "name": "Charlie"}\n' + b'{"amount": 800, "name": "Dennis"}\n' + ), +} + + +csv_files = { + ".test.fakedata.1.csv": (b"a,b\n" b"1,2\n"), + ".test.fakedata.2.csv": (b"a,b\n" b"3,4\n"), +} +odir = os.getcwd() + + +@contextmanager +def filetexts(d, open=open, mode="t"): + """Dumps a number of textfiles to disk + + d - dict + a mapping from filename to text like {'a.csv': '1,1\n2,2'} + + Since this is meant for use in tests, this context manager will + automatically switch to a temporary current directory, to avoid + race conditions when running tests in parallel. + """ + dirname = tempfile.mkdtemp() + try: + os.chdir(dirname) + for filename, text in d.items(): + f = open(filename, "w" + mode) + try: + f.write(text) + finally: + try: + f.close() + except AttributeError: + pass + + yield list(d) + + for filename in d: + if os.path.exists(filename): + try: + os.remove(filename) + except (IOError, OSError): + pass + finally: + os.chdir(odir) + + +def test_cats(): + with filetexts(csv_files, mode="b"): + fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + assert fs.cat(".test.fakedata.1.csv") == b"a,b\n" b"1,2\n" + out = set(fs.cat([".test.fakedata.1.csv", ".test.fakedata.2.csv"]).values()) + assert out == {b"a,b\n" b"1,2\n", b"a,b\n" b"3,4\n"} + assert fs.cat(".test.fakedata.1.csv", None, None) == b"a,b\n" b"1,2\n" + assert fs.cat(".test.fakedata.1.csv", start=1, end=6) == b"a,b\n" b"1,2\n"[1:6] + assert fs.cat(".test.fakedata.1.csv", start=-1) == b"a,b\n" b"1,2\n"[-1:] + assert ( + fs.cat(".test.fakedata.1.csv", start=1, end=-2) == b"a,b\n" b"1,2\n"[1:-2] + ) + out = set( + fs.cat( + [".test.fakedata.1.csv", ".test.fakedata.2.csv"], start=1, end=-1 + ).values() + ) + assert out == {b"a,b\n" b"1,2\n"[1:-1], b"a,b\n" b"3,4\n"[1:-1]} + + +def test_not_found(): + fn = "not-a-file" + fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + with pytest.raises((FileNotFoundError, OSError)): + with OpenFile(fs, fn, mode="rb"): + pass + + +def test_isfile(): + fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + with filetexts(files, mode="b"): + for f in files.keys(): + assert fs.isfile(f) + assert fs.isfile("file://" + f) + assert not fs.isfile("not-a-file") + assert not fs.isfile("file://not-a-file") + + +def test_isdir(): + fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + with filetexts(files, mode="b"): + for f in files.keys(): + assert fs.isdir(os.path.dirname(os.path.abspath(f))) + assert not fs.isdir(f) + assert not fs.isdir("not-a-dir") + + +@pytest.mark.parametrize("prefix", ["/", "/tmp"]) +def test_directories(tmpdir, prefix): + tmpdir = make_path_posix(str(tmpdir)) + rel_tmpdir = os.path.relpath(tmpdir, prefix) + + fs = PrefixFileSystem(prefix=prefix, filesystem=fsspec.filesystem("file")) + + fs.mkdir(rel_tmpdir + "/dir") + + assert not fs.ls(tmpdir + "/dir") + + assert rel_tmpdir + "/dir" in fs.ls(rel_tmpdir) + assert fs.ls(rel_tmpdir, True)[0]["type"] == "directory" + fs.rmdir(rel_tmpdir + "/dir") + assert not fs.ls(rel_tmpdir) From e42d5a4c4cc9c8d17019f4db7cce4088b33da9c9 Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 11:59:56 +0200 Subject: [PATCH 02/41] Refactor test_directories to remove linux-specific paths --- fsspec/implementations/tests/test_prefix.py | 27 ++++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 095f470df..b08764a68 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -118,18 +118,21 @@ def test_isdir(): assert not fs.isdir("not-a-dir") -@pytest.mark.parametrize("prefix", ["/", "/tmp"]) -def test_directories(tmpdir, prefix): +@pytest.mark.parametrize("dirname", ["/dir", "dir"]) +@pytest.mark.parametrize("prefix", ["a/b/c/d/e", "a/b/c/d/e/"]) +def test_directories(tmpdir, prefix, dirname): tmpdir = make_path_posix(str(tmpdir)) - rel_tmpdir = os.path.relpath(tmpdir, prefix) + prefix = os.path.join(tmpdir, prefix) fs = PrefixFileSystem(prefix=prefix, filesystem=fsspec.filesystem("file")) - - fs.mkdir(rel_tmpdir + "/dir") - - assert not fs.ls(tmpdir + "/dir") - - assert rel_tmpdir + "/dir" in fs.ls(rel_tmpdir) - assert fs.ls(rel_tmpdir, True)[0]["type"] == "directory" - fs.rmdir(rel_tmpdir + "/dir") - assert not fs.ls(rel_tmpdir) + fs.mkdir(dirname) + assert not os.path.exists(os.path.join(tmpdir, "dir")) + assert os.path.exists(os.path.join(prefix, "dir")) + assert fs.ls(".") == ["./dir"] + fs.rmdir(dirname) + assert not os.path.exists(os.path.join(prefix, "dir")) + + fs = PrefixFileSystem(prefix=f"{tmpdir}/a", filesystem=fsspec.filesystem("file")) + assert fs.ls(".") == ["./b"] + fs.rm("b", recursive=True) + assert fs.ls(".") == [] From d60f884a9a8b7df77c91634eb3ecf999a090dbe5 Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 12:02:11 +0200 Subject: [PATCH 03/41] Fix test_isdir to avoid (failing) folder access in prefix --- fsspec/implementations/tests/test_prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index b08764a68..a653c9649 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -113,7 +113,7 @@ def test_isdir(): fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) with filetexts(files, mode="b"): for f in files.keys(): - assert fs.isdir(os.path.dirname(os.path.abspath(f))) + assert fs.isfile(f) assert not fs.isdir(f) assert not fs.isdir("not-a-dir") From 63ef3b65cad6a642094a2cdb4ca31e873861a4d9 Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 12:02:56 +0200 Subject: [PATCH 04/41] Assume empty prefix indicates the root --- fsspec/implementations/prefix.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 3515e592f..a139f4688 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -17,7 +17,11 @@ def __init__( **storage_options, ) -> None: super().__init__(*args, **storage_options) - self.prefix = prefix + self.prefix = stringify_path(prefix) + + if not self.prefix: + self.prefix = self.sep + self.filesystem = filesystem def _add_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: From f0540096b52b7004ad1413e88042e9b2773cb9bc Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 12:05:01 +0200 Subject: [PATCH 05/41] Refactor the `_add_fs_prefix` without using os.path.join --- fsspec/implementations/prefix.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index a139f4688..5ea8e1a4e 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -24,11 +24,23 @@ def __init__( self.filesystem = filesystem + def _get_relative_path(self, path: str) -> str: + if path[: len(self.sep)] == self.sep: + return path[len(self.sep) :] + return path + def _add_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: if isinstance(path, (str, Path)): path = stringify_path(path) protocol, path = split_protocol(path) - path = os.path.join(self.prefix, path) + + path = self._get_relative_path(path) + + if self.prefix == self.sep: + path = f"{self.sep}{path}" # don't add twice the same sep + else: + path = f"{self.prefix}{self.sep}{path}" + return protocol + "://" + path if protocol is not None else path elif isinstance(path, Iterable): return [self._add_fs_prefix(x) for x in path] From f97f9d1c806a51b42fc7e8b203d365db0faa1d40 Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 12:05:52 +0200 Subject: [PATCH 06/41] Remove `Union[str, Path]` typing in favor of `str` --- fsspec/implementations/prefix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 5ea8e1a4e..3a31e4215 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -29,7 +29,7 @@ def _get_relative_path(self, path: str) -> str: return path[len(self.sep) :] return path - def _add_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: + def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: if isinstance(path, (str, Path)): path = stringify_path(path) protocol, path = split_protocol(path) @@ -46,7 +46,7 @@ def _add_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: return [self._add_fs_prefix(x) for x in path] assert False - def _remove_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]: + def _remove_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: if isinstance(path, (str, Path)): path = stringify_path(path) protocol, path = split_protocol(path) From 2a8ffb4b7901365b4520bf489895b9b1050095ca Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 12:16:15 +0200 Subject: [PATCH 07/41] Refactor the `_remove_fs_prefix` without using os.path.relpath --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 3a31e4215..d40dfeba1 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -50,7 +50,7 @@ def _remove_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: if isinstance(path, (str, Path)): path = stringify_path(path) protocol, path = split_protocol(path) - path = os.path.relpath(path, start=self.prefix) + path = path[len(self.prefix) + 1 :] return protocol + "://" + path if protocol is not None else path elif isinstance(path, Iterable): return [self._remove_fs_prefix(x) for x in path] From 507745ab7e2d978a6a226589d8dc64dc82332898 Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Wed, 22 Sep 2021 13:39:19 +0200 Subject: [PATCH 08/41] Remove unused import --- fsspec/implementations/prefix.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index d40dfeba1..6b6aa78ee 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -1,4 +1,3 @@ -import os from pathlib import Path from typing import Any, Iterable, Sequence, Union From 7b938dd70b422f998891fa07adf3f5116ebbace9 Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 24 Sep 2021 16:55:10 +0200 Subject: [PATCH 09/41] Add prefix on open --- fsspec/implementations/prefix.py | 13 +++++++++++++ fsspec/implementations/tests/test_prefix.py | 18 ++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 6b6aa78ee..4e118b4f0 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -4,9 +4,15 @@ import fsspec from fsspec import AbstractFileSystem from fsspec.core import split_protocol +from fsspec.spec import AbstractBufferedFile from fsspec.utils import stringify_path +class PrefixBufferedFile(AbstractBufferedFile): + def _fetch_range(self, start, end): + pass + + class PrefixFileSystem(AbstractFileSystem): def __init__( self, @@ -148,3 +154,10 @@ def cat( def __repr__(self) -> str: return f"{self.__class__.__qualname__}(prefix='{self.prefix}', filesystem={self.filesystem})" + + def open( + self, + path, + **kwargs, + ): + return self.filesystem.open(self._add_fs_prefix(path), **kwargs) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index a653c9649..20081041c 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -4,6 +4,7 @@ import os.path import tempfile from contextlib import contextmanager +from pathlib import Path import pytest @@ -29,8 +30,9 @@ csv_files = { - ".test.fakedata.1.csv": (b"a,b\n" b"1,2\n"), - ".test.fakedata.2.csv": (b"a,b\n" b"3,4\n"), + ".test.fakedata.1.csv": b"a,b\n1,2\n", + ".test.fakedata.2.csv": b"a,b\n3,4\n", + "a/b/c/.test.fakedata.3.csv": b"a,b\n3,4,5\n", } odir = os.getcwd() @@ -50,6 +52,11 @@ def filetexts(d, open=open, mode="t"): try: os.chdir(dirname) for filename, text in d.items(): + filename = Path(filename) + + if not filename.parent.exists(): + filename.parent.mkdir(parents=True, exist_ok=True) + f = open(filename, "w" + mode) try: f.write(text) @@ -71,6 +78,13 @@ def filetexts(d, open=open, mode="t"): os.chdir(odir) +def test_open(): + with filetexts(csv_files, mode="b"): + fs = PrefixFileSystem(prefix="a", filesystem=fsspec.filesystem("file")) + with fs.open("b/c/.test.fakedata.3.csv") as f: + assert f.read() == b"a,b\n3,4,5\n" + + def test_cats(): with filetexts(csv_files, mode="b"): fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) From 1685e6b3ed5b33c61ac844c312a953817395cc2f Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:46:19 +0200 Subject: [PATCH 10/41] Add PrefixFileSystem behaviour on open() docstring --- fsspec/implementations/prefix.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 4e118b4f0..848b30750 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -160,4 +160,26 @@ def open( path, **kwargs, ): + """ + Return a file-like object from the filesystem + + The file-like object returned ignores an eventual PrefixFileSystem: + - the ``.path`` attribute is always an absolute path + - the ``.fs`` attribute, if present, would be the wrapped file-system + + The resultant instance must function correctly in a context ``with`` + block. + + Parameters + ---------- + path: str + Target file + mode: str like 'rb', 'w' + See builtin ``open()`` + block_size: int + Some indication of buffering - this is a value in bytes + cache_options : dict, optional + Extra arguments to pass through to the cache. + encoding, errors, newline: passed on to TextIOWrapper for text mode + """ return self.filesystem.open(self._add_fs_prefix(path), **kwargs) From 799da25045b5485f4d0e4030515a24bdf6d2c1d6 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:41:23 +0200 Subject: [PATCH 11/41] Remove checks for Path --- fsspec/implementations/prefix.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 848b30750..1f25965b2 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -1,4 +1,3 @@ -from pathlib import Path from typing import Any, Iterable, Sequence, Union import fsspec @@ -35,7 +34,7 @@ def _get_relative_path(self, path: str) -> str: return path def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: - if isinstance(path, (str, Path)): + if isinstance(path, str): path = stringify_path(path) protocol, path = split_protocol(path) @@ -52,7 +51,7 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: assert False def _remove_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: - if isinstance(path, (str, Path)): + if isinstance(path, str): path = stringify_path(path) protocol, path = split_protocol(path) path = path[len(self.prefix) + 1 :] From 687d7e870927a802c3317771fc2e0ebc78a9f5c3 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:48:22 +0200 Subject: [PATCH 12/41] Add PrefixFileSystem to API docs --- docs/source/api.rst | 4 ++++ fsspec/implementations/prefix.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index 9c8afa8f9..1420b8315 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -119,6 +119,7 @@ Built-in Implementations fsspec.implementations.libarchive.LibArchiveFileSystem fsspec.implementations.dbfs.DatabricksFileSystem fsspec.implementations.reference.ReferenceFileSystem + fsspec.implementations.prefix.PrefixFileSystem .. autoclass:: fsspec.implementations.ftp.FTPFileSystem :members: __init__ @@ -183,6 +184,9 @@ Built-in Implementations .. autoclass:: fsspec.implementations.reference.ReferenceFileSystem :members: __init__ +.. autoclass:: fsspec.implementations.prefix.PrefixFileSystem + :members: __init__, open + Other Known Implementations --------------------------- diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 1f25965b2..71f2d5949 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -13,6 +13,8 @@ def _fetch_range(self, start, end): class PrefixFileSystem(AbstractFileSystem): + """A meta-filesystem to add a prefix and delegate to another filesystem""" + def __init__( self, prefix: str, @@ -20,6 +22,16 @@ def __init__( *args, **storage_options, ) -> None: + """ + Parameters + ---------- + prefix: str + The prefix to append to all paths + + fs: fsspec.AbstractFileSystem + An instantiated filesystem to wrap. All operations are delegated to + this filesystem after appending the specified prefix + """ super().__init__(*args, **storage_options) self.prefix = stringify_path(prefix) From 1b31851e57e85341f2c1a2055edf5e3c4ec9b30c Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:50:08 +0200 Subject: [PATCH 13/41] Default to root_marker if prefix is not specified --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 71f2d5949..0ff47e208 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -36,7 +36,7 @@ def __init__( self.prefix = stringify_path(prefix) if not self.prefix: - self.prefix = self.sep + self.prefix = self.root_marker self.filesystem = filesystem From 91b09ec3fca6bd9e67352da458a9062c81b3222b Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:52:04 +0200 Subject: [PATCH 14/41] Rename filesystem variable to fs --- fsspec/implementations/prefix.py | 46 ++++++++++----------- fsspec/implementations/tests/test_prefix.py | 14 +++---- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 0ff47e208..171270def 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -18,7 +18,7 @@ class PrefixFileSystem(AbstractFileSystem): def __init__( self, prefix: str, - filesystem: fsspec.AbstractFileSystem, + fs: fsspec.AbstractFileSystem, *args, **storage_options, ) -> None: @@ -38,7 +38,7 @@ def __init__( if not self.prefix: self.prefix = self.root_marker - self.filesystem = filesystem + self.fs = fs def _get_relative_path(self, path: str) -> str: if path[: len(self.sep)] == self.sep: @@ -74,15 +74,15 @@ def _remove_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: def mkdir(self, path: str, create_parents: bool = True, **kwargs) -> None: path = self._add_fs_prefix(path) - return self.filesystem.mkdir(path=path, create_parents=create_parents, **kwargs) + return self.fs.mkdir(path=path, create_parents=create_parents, **kwargs) def makedirs(self, path: str, exist_ok: bool = False): path = self._add_fs_prefix(path) - return self.filesystem.mkdirs(path=path, exist_ok=exist_ok) + return self.fs.mkdirs(path=path, exist_ok=exist_ok) def rmdir(self, path: str): path = self._add_fs_prefix(path) - return self.filesystem.rmdir(path=path) + return self.fs.rmdir(path=path) def ls( self, @@ -91,7 +91,7 @@ def ls( **kwargs, ) -> Sequence[str]: path = self._add_fs_prefix(path) - ls_out = self.filesystem.ls(path=path, detail=detail, **kwargs) + ls_out = self.fs.ls(path=path, detail=detail, **kwargs) if detail: for out in ls_out: out["name"] = self._remove_fs_prefix(out["name"]) @@ -100,56 +100,56 @@ def ls( def glob(self, path: str, **kwargs): path = self._add_fs_prefix(path) - glob_out = self.filesystem.glob(path=path, **kwargs) + glob_out = self.fs.glob(path=path, **kwargs) return [self._remove_fs_prefix(x) for x in glob_out] def info(self, path: str, **kwargs): path = self._add_fs_prefix(path) - return self.filesystem.info(path=path, **kwargs) + return self.fs.info(path=path, **kwargs) def cp_file(self, path1: str, path2: str, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) - return self.filesystem.cp_file(path1, path2, **kwargs) + return self.fs.cp_file(path1, path2, **kwargs) def get_file(self, path1: str, path2: str, callback=None, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) - return self.filesystem.get_file(path1, path2, callback, **kwargs) + return self.fs.get_file(path1, path2, callback, **kwargs) def put_file(self, path1: str, path2: str, callback=None, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) - return self.filesystem.put_file(path1, path2, callback, **kwargs) + return self.fs.put_file(path1, path2, callback, **kwargs) def mv_file(self, path1: str, path2: str, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) - return self.filesystem.mv_file(path1, path2, **kwargs) + return self.fs.mv_file(path1, path2, **kwargs) def rm_file(self, path: str): path = self._add_fs_prefix(path) - return self.filesystem.rm_file(path) + return self.fs.rm_file(path) def rm(self, path: str, recursive=False, maxdepth=None): path = self._add_fs_prefix(path) - return self.filesystem.rm(path, recursive=recursive, maxdepth=maxdepth) + return self.fs.rm(path, recursive=recursive, maxdepth=maxdepth) def touch(self, path: str, **kwargs): path = self._add_fs_prefix(path) - return self.filesystem.touch(path, **kwargs) + return self.fs.touch(path, **kwargs) def created(self, path: str): path = self._add_fs_prefix(path) - return self.filesystem.created(path) + return self.fs.created(path) def modified(self, path: str): path = self._add_fs_prefix(path) - return self.filesystem.modified(path) + return self.fs.modified(path) def sign(self, path: str, expiration=100, **kwargs): path = self._add_fs_prefix(path) - return self.filesystem.sign(path, expiration=100, **kwargs) + return self.fs.sign(path, expiration=100, **kwargs) def cat( self, @@ -159,12 +159,10 @@ def cat( **kwargs: Any, ): path = self._add_fs_prefix(path) - return self.filesystem.cat( - path, recursive=recursive, on_error=on_error, **kwargs - ) + return self.fs.cat(path, recursive=recursive, on_error=on_error, **kwargs) def __repr__(self) -> str: - return f"{self.__class__.__qualname__}(prefix='{self.prefix}', filesystem={self.filesystem})" + return f"{self.__class__.__qualname__}(prefix='{self.prefix}', fs={self.fs})" def open( self, @@ -172,7 +170,7 @@ def open( **kwargs, ): """ - Return a file-like object from the filesystem + Return a file-like object from the fs The file-like object returned ignores an eventual PrefixFileSystem: - the ``.path`` attribute is always an absolute path @@ -193,4 +191,4 @@ def open( Extra arguments to pass through to the cache. encoding, errors, newline: passed on to TextIOWrapper for text mode """ - return self.filesystem.open(self._add_fs_prefix(path), **kwargs) + return self.fs.open(self._add_fs_prefix(path), **kwargs) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 20081041c..6c93f7c50 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -80,14 +80,14 @@ def filetexts(d, open=open, mode="t"): def test_open(): with filetexts(csv_files, mode="b"): - fs = PrefixFileSystem(prefix="a", filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix="a", fs=fsspec.filesystem("file")) with fs.open("b/c/.test.fakedata.3.csv") as f: assert f.read() == b"a,b\n3,4,5\n" def test_cats(): with filetexts(csv_files, mode="b"): - fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) assert fs.cat(".test.fakedata.1.csv") == b"a,b\n" b"1,2\n" out = set(fs.cat([".test.fakedata.1.csv", ".test.fakedata.2.csv"]).values()) assert out == {b"a,b\n" b"1,2\n", b"a,b\n" b"3,4\n"} @@ -107,14 +107,14 @@ def test_cats(): def test_not_found(): fn = "not-a-file" - fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) with pytest.raises((FileNotFoundError, OSError)): with OpenFile(fs, fn, mode="rb"): pass def test_isfile(): - fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) with filetexts(files, mode="b"): for f in files.keys(): assert fs.isfile(f) @@ -124,7 +124,7 @@ def test_isfile(): def test_isdir(): - fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) with filetexts(files, mode="b"): for f in files.keys(): assert fs.isfile(f) @@ -138,7 +138,7 @@ def test_directories(tmpdir, prefix, dirname): tmpdir = make_path_posix(str(tmpdir)) prefix = os.path.join(tmpdir, prefix) - fs = PrefixFileSystem(prefix=prefix, filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix=prefix, fs=fsspec.filesystem("file")) fs.mkdir(dirname) assert not os.path.exists(os.path.join(tmpdir, "dir")) assert os.path.exists(os.path.join(prefix, "dir")) @@ -146,7 +146,7 @@ def test_directories(tmpdir, prefix, dirname): fs.rmdir(dirname) assert not os.path.exists(os.path.join(prefix, "dir")) - fs = PrefixFileSystem(prefix=f"{tmpdir}/a", filesystem=fsspec.filesystem("file")) + fs = PrefixFileSystem(prefix=f"{tmpdir}/a", fs=fsspec.filesystem("file")) assert fs.ls(".") == ["./b"] fs.rm("b", recursive=True) assert fs.ls(".") == [] From b219d33c88789ea60aa05b0f9d0697f782940334 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:53:03 +0200 Subject: [PATCH 15/41] Delegate to `fs.makedirs` when overriding `makedirs` --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 171270def..11dd40019 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -78,7 +78,7 @@ def mkdir(self, path: str, create_parents: bool = True, **kwargs) -> None: def makedirs(self, path: str, exist_ok: bool = False): path = self._add_fs_prefix(path) - return self.fs.mkdirs(path=path, exist_ok=exist_ok) + return self.fs.makedirs(path=path, exist_ok=exist_ok) def rmdir(self, path: str): path = self._add_fs_prefix(path) From 24ea21f8a8739fb480f3fae4d2d5630d5c1b5d0e Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:53:37 +0200 Subject: [PATCH 16/41] Fix `fs.sign` delegation --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 11dd40019..4e841a813 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -149,7 +149,7 @@ def modified(self, path: str): def sign(self, path: str, expiration=100, **kwargs): path = self._add_fs_prefix(path) - return self.fs.sign(path, expiration=100, **kwargs) + return self.fs.sign(path, expiration=expiration, **kwargs) def cat( self, From 4f78a17fa0949f07bf7c909af3c3fa2ae9965d03 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 5 Oct 2021 15:54:55 +0200 Subject: [PATCH 17/41] Fix `open` to match parent signature --- fsspec/implementations/prefix.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 4e841a813..4cfaaf354 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -164,11 +164,7 @@ def cat( def __repr__(self) -> str: return f"{self.__class__.__qualname__}(prefix='{self.prefix}', fs={self.fs})" - def open( - self, - path, - **kwargs, - ): + def open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs): """ Return a file-like object from the fs @@ -191,4 +187,10 @@ def open( Extra arguments to pass through to the cache. encoding, errors, newline: passed on to TextIOWrapper for text mode """ - return self.fs.open(self._add_fs_prefix(path), **kwargs) + return self.fs.open( + self._add_fs_prefix(path), + mode=mode, + block_size=block_size, + cache_options=cache_options, + **kwargs, + ) From 7ee0107f4006e1f7a2d6b36c127d32260567e91f Mon Sep 17 00:00:00 2001 From: Luca Moschella Date: Tue, 5 Oct 2021 16:39:28 +0200 Subject: [PATCH 18/41] Update fsspec/implementations/prefix.py Co-authored-by: Ruslan Kuprieiev --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 4cfaaf354..6f591a43e 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -18,7 +18,7 @@ class PrefixFileSystem(AbstractFileSystem): def __init__( self, prefix: str, - fs: fsspec.AbstractFileSystem, + fs: AbstractFileSystem, *args, **storage_options, ) -> None: From 7d0bbcd0e83c5363798ce925f30ef40437b81f1a Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:01:15 +0200 Subject: [PATCH 19/41] Raise error on `prefix` ill defined --- fsspec/implementations/prefix.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 6f591a43e..51a7a1a73 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -36,7 +36,7 @@ def __init__( self.prefix = stringify_path(prefix) if not self.prefix: - self.prefix = self.root_marker + raise ValueError(f"empty prefix is not a valid prefix") self.fs = fs @@ -52,8 +52,8 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: path = self._get_relative_path(path) - if self.prefix == self.sep: - path = f"{self.sep}{path}" # don't add twice the same sep + if self.prefix == self.root_marker: + path = f"{self.root_marker}{path}" # don't add twice the same sep else: path = f"{self.prefix}{self.sep}{path}" From 72061bae8c45271a3fc6d909f463d3e6d915e1d9 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:02:03 +0200 Subject: [PATCH 20/41] Remove the `root_marker` not `sep` from user-paths to force them to be relative --- fsspec/implementations/prefix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 51a7a1a73..562b950ef 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -41,8 +41,8 @@ def __init__( self.fs = fs def _get_relative_path(self, path: str) -> str: - if path[: len(self.sep)] == self.sep: - return path[len(self.sep) :] + if path[: len(self.root_marker)] == self.root_marker: + return path[len(self.root_marker) :] return path def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: From 573a41b20a9f3d3b3026244ce93fee896977313b Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:25:31 +0200 Subject: [PATCH 21/41] Add prefix sanity checks, remove trailing sep --- fsspec/implementations/prefix.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 562b950ef..b587232b3 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -33,14 +33,20 @@ def __init__( this filesystem after appending the specified prefix """ super().__init__(*args, **storage_options) - self.prefix = stringify_path(prefix) + self.fs = fs - if not self.prefix: + if not prefix: raise ValueError(f"empty prefix is not a valid prefix") - self.fs = fs + prefix = stringify_path(prefix) + self.prefix = self._remove_trailing_sep(prefix) + + def _remove_trailing_sep(self, prefix: str) -> str: + if prefix[-len(self.fs.sep) :] == self.fs.sep: + return prefix[: -len(self.fs.sep)] + return prefix - def _get_relative_path(self, path: str) -> str: + def _remove_root_marker(self, path: str) -> str: if path[: len(self.root_marker)] == self.root_marker: return path[len(self.root_marker) :] return path @@ -50,7 +56,7 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: path = stringify_path(path) protocol, path = split_protocol(path) - path = self._get_relative_path(path) + path = self._remove_root_marker(path) if self.prefix == self.root_marker: path = f"{self.root_marker}{path}" # don't add twice the same sep From 5f540b2752e7fbb2495a2b5f878ffd75a9a2f562 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:25:49 +0200 Subject: [PATCH 22/41] Use `sep` and `root_marker` of the wrapped fs --- fsspec/implementations/prefix.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index b587232b3..f850f9191 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -42,13 +42,13 @@ def __init__( self.prefix = self._remove_trailing_sep(prefix) def _remove_trailing_sep(self, prefix: str) -> str: - if prefix[-len(self.fs.sep) :] == self.fs.sep: + if prefix[-len(self.fs.sep) :] == self.fs.sep and prefix != self.root_marker: return prefix[: -len(self.fs.sep)] return prefix def _remove_root_marker(self, path: str) -> str: - if path[: len(self.root_marker)] == self.root_marker: - return path[len(self.root_marker) :] + if path[: len(self.fs.root_marker)] == self.fs.root_marker: + return path[len(self.fs.root_marker) :] return path def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: @@ -58,10 +58,10 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: path = self._remove_root_marker(path) - if self.prefix == self.root_marker: - path = f"{self.root_marker}{path}" # don't add twice the same sep + if self.prefix == self.fs.root_marker: + path = f"{self.fs.root_marker}{path}" # don't add twice the same sep else: - path = f"{self.prefix}{self.sep}{path}" + path = f"{self.prefix}{self.fs.sep}{path}" return protocol + "://" + path if protocol is not None else path elif isinstance(path, Iterable): From b62589f440f51572215ad2ed81a0d750b8bba752 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:39:30 +0200 Subject: [PATCH 23/41] Refactor utility functions and add tests --- fsspec/implementations/prefix.py | 28 ++++++----- fsspec/implementations/tests/test_prefix.py | 51 ++++++++++++++++++++- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index f850f9191..7ecd06642 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -12,6 +12,18 @@ def _fetch_range(self, start, end): pass +def remove_trailing_sep(prefix: str, sep: str, root_marker: str) -> str: + if prefix[-len(sep) :] == sep and prefix != root_marker: + return prefix[: -len(sep)] + return prefix + + +def remove_root_marker(path: str, root_marker: str) -> str: + if path[: len(root_marker)] == root_marker: + return path[len(root_marker) :] + return path + + class PrefixFileSystem(AbstractFileSystem): """A meta-filesystem to add a prefix and delegate to another filesystem""" @@ -39,24 +51,16 @@ def __init__( raise ValueError(f"empty prefix is not a valid prefix") prefix = stringify_path(prefix) - self.prefix = self._remove_trailing_sep(prefix) - - def _remove_trailing_sep(self, prefix: str) -> str: - if prefix[-len(self.fs.sep) :] == self.fs.sep and prefix != self.root_marker: - return prefix[: -len(self.fs.sep)] - return prefix - - def _remove_root_marker(self, path: str) -> str: - if path[: len(self.fs.root_marker)] == self.fs.root_marker: - return path[len(self.fs.root_marker) :] - return path + self.prefix = remove_trailing_sep( + prefix, sep=self.fs.sep, root_marker=self.fs.root_marker + ) def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: if isinstance(path, str): path = stringify_path(path) protocol, path = split_protocol(path) - path = self._remove_root_marker(path) + path = remove_root_marker(path, root_marker=self.fs.root_marker) if self.prefix == self.fs.root_marker: path = f"{self.fs.root_marker}{path}" # don't add twice the same sep diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 6c93f7c50..ac85b37ef 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -11,7 +11,11 @@ import fsspec from fsspec.core import OpenFile from fsspec.implementations.local import make_path_posix -from fsspec.implementations.prefix import PrefixFileSystem +from fsspec.implementations.prefix import ( + PrefixFileSystem, + remove_root_marker, + remove_trailing_sep, +) files = { ".test.accounts.1.json": ( @@ -150,3 +154,48 @@ def test_directories(tmpdir, prefix, dirname): assert fs.ls(".") == ["./b"] fs.rm("b", recursive=True) assert fs.ls(".") == [] + + +def test_emtpy_prefix(): + with pytest.raises(ValueError): + PrefixFileSystem(prefix="", fs=fsspec.filesystem("file")) + + with pytest.raises(ValueError): + PrefixFileSystem(prefix=None, fs=fsspec.filesystem("file")) + + +@pytest.mark.parametrize( + "prefix", + [ + ("/", "/"), + ("a", "a"), + ("/a", "/a"), + ("a/", "a"), + ("/a/", "/a"), + ("/a/b/c/", "/a/b/c"), + ], +) +def test_remove_trailing_sep(prefix): + fs = fsspec.filesystem("file") + prefix, normalized_prefix = prefix + assert ( + remove_trailing_sep(prefix, sep=fs.sep, root_marker=fs.root_marker) + == normalized_prefix + ) + + +@pytest.mark.parametrize( + "prefix", + [ + ("/", ""), + ("a", "a"), + ("/a", "a"), + ("a/", "a/"), + ("/a/", "a/"), + ("/a/b/c/", "a/b/c/"), + ], +) +def test_remove_root_marker(prefix): + fs = fsspec.filesystem("file") + prefix, normalized_prefix = prefix + assert remove_root_marker(prefix, root_marker=fs.root_marker) == normalized_prefix From 5c4c7cfdb6c96e39efd007ccb27c6a420c064212 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:49:29 +0200 Subject: [PATCH 24/41] Add test for prefix root --- fsspec/implementations/tests/test_prefix.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index ac85b37ef..06b441418 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -89,6 +89,20 @@ def test_open(): assert f.read() == b"a,b\n3,4,5\n" +def test_prefix_root(): + with filetexts(csv_files, mode="b"): + fs = PrefixFileSystem(prefix="/", fs=fsspec.filesystem("file")) + abs_path_file = os.path.abspath("a/b/c/.test.fakedata.3.csv") + + # Risk double root marker (in path and in prefix) + with fs.open(abs_path_file) as f: + assert f.read() == b"a,b\n3,4,5\n" + + # Without root marker + with fs.open(abs_path_file[1:]) as f: + assert f.read() == b"a,b\n3,4,5\n" + + def test_cats(): with filetexts(csv_files, mode="b"): fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) From 52c148230d02b3b8bc2669566d4c562614ad73e0 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 10:50:33 +0200 Subject: [PATCH 25/41] Fix comment --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 7ecd06642..e041c7cce 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -63,7 +63,7 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: path = remove_root_marker(path, root_marker=self.fs.root_marker) if self.prefix == self.fs.root_marker: - path = f"{self.fs.root_marker}{path}" # don't add twice the same sep + path = f"{self.fs.root_marker}{path}" # don't add twice the root marker else: path = f"{self.prefix}{self.fs.sep}{path}" From dc2d378ed8d0fe5504d168a732cae0be5cc21472 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 11:12:40 +0200 Subject: [PATCH 26/41] Fix docstring --- fsspec/implementations/prefix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index e041c7cce..93f845502 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -40,7 +40,7 @@ def __init__( prefix: str The prefix to append to all paths - fs: fsspec.AbstractFileSystem + fs: AbstractFileSystem An instantiated filesystem to wrap. All operations are delegated to this filesystem after appending the specified prefix """ From bc2d5bc565077df54f284769657b7e4bb8a81619 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 11:45:19 +0200 Subject: [PATCH 27/41] Fix pytest parametrization --- fsspec/implementations/tests/test_prefix.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 06b441418..32d23c86c 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -179,7 +179,7 @@ def test_emtpy_prefix(): @pytest.mark.parametrize( - "prefix", + "prefix, normalized_prefix", [ ("/", "/"), ("a", "a"), @@ -189,9 +189,8 @@ def test_emtpy_prefix(): ("/a/b/c/", "/a/b/c"), ], ) -def test_remove_trailing_sep(prefix): +def test_remove_trailing_sep(prefix, normalized_prefix): fs = fsspec.filesystem("file") - prefix, normalized_prefix = prefix assert ( remove_trailing_sep(prefix, sep=fs.sep, root_marker=fs.root_marker) == normalized_prefix @@ -199,7 +198,7 @@ def test_remove_trailing_sep(prefix): @pytest.mark.parametrize( - "prefix", + "prefix,normalized_prefix", [ ("/", ""), ("a", "a"), @@ -209,7 +208,6 @@ def test_remove_trailing_sep(prefix): ("/a/b/c/", "a/b/c/"), ], ) -def test_remove_root_marker(prefix): +def test_remove_root_marker(prefix, normalized_prefix): fs = fsspec.filesystem("file") - prefix, normalized_prefix = prefix assert remove_root_marker(prefix, root_marker=fs.root_marker) == normalized_prefix From 5be29bb63f7fc3a8f6ee9b78df603793d45bf68b Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 6 Oct 2021 11:52:29 +0200 Subject: [PATCH 28/41] Add tests for current ls behaviour --- fsspec/implementations/tests/test_prefix.py | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 32d23c86c..b64adb4db 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -211,3 +211,26 @@ def test_remove_trailing_sep(prefix, normalized_prefix): def test_remove_root_marker(prefix, normalized_prefix): fs = fsspec.filesystem("file") assert remove_root_marker(prefix, root_marker=fs.root_marker) == normalized_prefix + + +@pytest.mark.parametrize( + "ls_arg, expected_out", + [ + (".", ["./b"]), + ("./", ["./b"]), + ("./b", ["./b/c"]), + ("./b/", ["./b/c"]), + ("b", ["b/c"]), + ("b/", ["b/c"]), + ("./b/c/d", ["./b/c/d/e"]), + ("./b/c/d/", ["./b/c/d/e"]), + ("b/c/d", ["b/c/d/e"]), + ("b/c/d/", ["b/c/d/e"]), + ("b/c/d/e", []), + ("b/c/d/e/", []), + ], +) +def test_ls(tmpdir, ls_arg, expected_out): + os.makedirs(os.path.join(make_path_posix(str(tmpdir)), "a/b/c/d/e/")) + fs = PrefixFileSystem(prefix=f"{tmpdir}/a", fs=fsspec.filesystem("file")) + assert fs.ls(ls_arg) == expected_out From 9fc0545fc4240d2fa7f589c0cb9e5b23b43c100f Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 01:55:42 +0200 Subject: [PATCH 29/41] prefixfs: make helpers private --- fsspec/implementations/prefix.py | 11 +++++------ fsspec/implementations/tests/test_prefix.py | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 93f845502..88d7d655f 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -1,6 +1,5 @@ from typing import Any, Iterable, Sequence, Union -import fsspec from fsspec import AbstractFileSystem from fsspec.core import split_protocol from fsspec.spec import AbstractBufferedFile @@ -12,13 +11,13 @@ def _fetch_range(self, start, end): pass -def remove_trailing_sep(prefix: str, sep: str, root_marker: str) -> str: +def _remove_trailing_sep(prefix: str, sep: str, root_marker: str) -> str: if prefix[-len(sep) :] == sep and prefix != root_marker: return prefix[: -len(sep)] return prefix -def remove_root_marker(path: str, root_marker: str) -> str: +def _remove_root_marker(path: str, root_marker: str) -> str: if path[: len(root_marker)] == root_marker: return path[len(root_marker) :] return path @@ -48,10 +47,10 @@ def __init__( self.fs = fs if not prefix: - raise ValueError(f"empty prefix is not a valid prefix") + raise ValueError("empty prefix is not a valid prefix") prefix = stringify_path(prefix) - self.prefix = remove_trailing_sep( + self.prefix = _remove_trailing_sep( prefix, sep=self.fs.sep, root_marker=self.fs.root_marker ) @@ -60,7 +59,7 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: path = stringify_path(path) protocol, path = split_protocol(path) - path = remove_root_marker(path, root_marker=self.fs.root_marker) + path = _remove_root_marker(path, root_marker=self.fs.root_marker) if self.prefix == self.fs.root_marker: path = f"{self.fs.root_marker}{path}" # don't add twice the root marker diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index b64adb4db..9127d212c 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -13,8 +13,8 @@ from fsspec.implementations.local import make_path_posix from fsspec.implementations.prefix import ( PrefixFileSystem, - remove_root_marker, - remove_trailing_sep, + _remove_root_marker, + _remove_trailing_sep, ) files = { @@ -192,7 +192,7 @@ def test_emtpy_prefix(): def test_remove_trailing_sep(prefix, normalized_prefix): fs = fsspec.filesystem("file") assert ( - remove_trailing_sep(prefix, sep=fs.sep, root_marker=fs.root_marker) + _remove_trailing_sep(prefix, sep=fs.sep, root_marker=fs.root_marker) == normalized_prefix ) @@ -210,7 +210,7 @@ def test_remove_trailing_sep(prefix, normalized_prefix): ) def test_remove_root_marker(prefix, normalized_prefix): fs = fsspec.filesystem("file") - assert remove_root_marker(prefix, root_marker=fs.root_marker) == normalized_prefix + assert _remove_root_marker(prefix, root_marker=fs.root_marker) == normalized_prefix @pytest.mark.parametrize( From 4d6a788e5ea4b9ea0ea10c2705a79d51ed3b6391 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 02:35:57 +0200 Subject: [PATCH 30/41] tests: prefixfs: no root_marker on windows --- fsspec/implementations/tests/test_prefix.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 9127d212c..3941f2076 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -98,9 +98,11 @@ def test_prefix_root(): with fs.open(abs_path_file) as f: assert f.read() == b"a,b\n3,4,5\n" - # Without root marker - with fs.open(abs_path_file[1:]) as f: - assert f.read() == b"a,b\n3,4,5\n" + # no root marker in windows paths + if os.name != "nt": + # Without root marker + with fs.open(abs_path_file[1:]) as f: + assert f.read() == b"a,b\n3,4,5\n" def test_cats(): From b3b80f800a57634ba8e158e70034fdf8b04f9844 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 02:54:07 +0200 Subject: [PATCH 31/41] prefixfs: remove incomplete/broken type annotations --- fsspec/implementations/prefix.py | 68 +++++++++++++++----------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 88d7d655f..c34a72502 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -1,5 +1,3 @@ -from typing import Any, Iterable, Sequence, Union - from fsspec import AbstractFileSystem from fsspec.core import split_protocol from fsspec.spec import AbstractBufferedFile @@ -11,13 +9,13 @@ def _fetch_range(self, start, end): pass -def _remove_trailing_sep(prefix: str, sep: str, root_marker: str) -> str: +def _remove_trailing_sep(prefix, sep, root_marker): if prefix[-len(sep) :] == sep and prefix != root_marker: return prefix[: -len(sep)] return prefix -def _remove_root_marker(path: str, root_marker: str) -> str: +def _remove_root_marker(path, root_marker): if path[: len(root_marker)] == root_marker: return path[len(root_marker) :] return path @@ -28,11 +26,11 @@ class PrefixFileSystem(AbstractFileSystem): def __init__( self, - prefix: str, - fs: AbstractFileSystem, + prefix, + fs, *args, **storage_options, - ) -> None: + ): """ Parameters ---------- @@ -54,7 +52,7 @@ def __init__( prefix, sep=self.fs.sep, root_marker=self.fs.root_marker ) - def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: + def _add_fs_prefix(self, path): if isinstance(path, str): path = stringify_path(path) protocol, path = split_protocol(path) @@ -67,38 +65,34 @@ def _add_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: path = f"{self.prefix}{self.fs.sep}{path}" return protocol + "://" + path if protocol is not None else path - elif isinstance(path, Iterable): - return [self._add_fs_prefix(x) for x in path] - assert False + return [self._add_fs_prefix(x) for x in path] - def _remove_fs_prefix(self, path: str) -> Union[str, Sequence[str]]: + def _remove_fs_prefix(self, path): if isinstance(path, str): path = stringify_path(path) protocol, path = split_protocol(path) path = path[len(self.prefix) + 1 :] return protocol + "://" + path if protocol is not None else path - elif isinstance(path, Iterable): - return [self._remove_fs_prefix(x) for x in path] - assert False + return [self._remove_fs_prefix(x) for x in path] - def mkdir(self, path: str, create_parents: bool = True, **kwargs) -> None: + def mkdir(self, path, create_parents=True, **kwargs): path = self._add_fs_prefix(path) return self.fs.mkdir(path=path, create_parents=create_parents, **kwargs) - def makedirs(self, path: str, exist_ok: bool = False): + def makedirs(self, path, exist_ok=False): path = self._add_fs_prefix(path) return self.fs.makedirs(path=path, exist_ok=exist_ok) - def rmdir(self, path: str): + def rmdir(self, path): path = self._add_fs_prefix(path) return self.fs.rmdir(path=path) def ls( self, - path: str, + path, detail=False, **kwargs, - ) -> Sequence[str]: + ): path = self._add_fs_prefix(path) ls_out = self.fs.ls(path=path, detail=detail, **kwargs) if detail: @@ -107,70 +101,70 @@ def ls( return ls_out return self._remove_fs_prefix(ls_out) - def glob(self, path: str, **kwargs): + def glob(self, path, **kwargs): path = self._add_fs_prefix(path) glob_out = self.fs.glob(path=path, **kwargs) return [self._remove_fs_prefix(x) for x in glob_out] - def info(self, path: str, **kwargs): + def info(self, path, **kwargs): path = self._add_fs_prefix(path) return self.fs.info(path=path, **kwargs) - def cp_file(self, path1: str, path2: str, **kwargs): + def cp_file(self, path1, path2, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) return self.fs.cp_file(path1, path2, **kwargs) - def get_file(self, path1: str, path2: str, callback=None, **kwargs): + def get_file(self, path1, path2, callback=None, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) return self.fs.get_file(path1, path2, callback, **kwargs) - def put_file(self, path1: str, path2: str, callback=None, **kwargs): + def put_file(self, path1, path2, callback=None, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) return self.fs.put_file(path1, path2, callback, **kwargs) - def mv_file(self, path1: str, path2: str, **kwargs): + def mv_file(self, path1, path2, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) return self.fs.mv_file(path1, path2, **kwargs) - def rm_file(self, path: str): + def rm_file(self, path): path = self._add_fs_prefix(path) return self.fs.rm_file(path) - def rm(self, path: str, recursive=False, maxdepth=None): + def rm(self, path, recursive=False, maxdepth=None): path = self._add_fs_prefix(path) return self.fs.rm(path, recursive=recursive, maxdepth=maxdepth) - def touch(self, path: str, **kwargs): + def touch(self, path, **kwargs): path = self._add_fs_prefix(path) return self.fs.touch(path, **kwargs) - def created(self, path: str): + def created(self, path): path = self._add_fs_prefix(path) return self.fs.created(path) - def modified(self, path: str): + def modified(self, path): path = self._add_fs_prefix(path) return self.fs.modified(path) - def sign(self, path: str, expiration=100, **kwargs): + def sign(self, path, expiration=100, **kwargs): path = self._add_fs_prefix(path) return self.fs.sign(path, expiration=expiration, **kwargs) def cat( self, - path: str, - recursive: bool = False, - on_error: str = "raise", - **kwargs: Any, + path, + recursive=False, + on_error="raise", + **kwargs, ): path = self._add_fs_prefix(path) return self.fs.cat(path, recursive=recursive, on_error=on_error, **kwargs) - def __repr__(self) -> str: + def __repr__(self): return f"{self.__class__.__qualname__}(prefix='{self.prefix}', fs={self.fs})" def open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs): From 2e85d5de97ee1fa655f5b0b9f4da6d28ad126e13 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 04:32:19 +0200 Subject: [PATCH 32/41] prefixfs: note that it is experimental --- fsspec/implementations/prefix.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index c34a72502..dfae2eed5 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -22,7 +22,10 @@ def _remove_root_marker(path, root_marker): class PrefixFileSystem(AbstractFileSystem): - """A meta-filesystem to add a prefix and delegate to another filesystem""" + """A meta-filesystem to add a prefix and delegate to another filesystem + + This interface is incomplete and experimental. + """ def __init__( self, From 7b81d655b1492b896179412d8744a9e975b3702d Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 05:24:06 +0200 Subject: [PATCH 33/41] prefixfs: don't handle protocol --- fsspec/implementations/prefix.py | 41 ++++---------------- fsspec/implementations/tests/test_prefix.py | 43 +-------------------- 2 files changed, 8 insertions(+), 76 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index dfae2eed5..590611039 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -1,7 +1,4 @@ -from fsspec import AbstractFileSystem -from fsspec.core import split_protocol -from fsspec.spec import AbstractBufferedFile -from fsspec.utils import stringify_path +from fsspec.spec import AbstractBufferedFile, AbstractFileSystem class PrefixBufferedFile(AbstractBufferedFile): @@ -9,18 +6,6 @@ def _fetch_range(self, start, end): pass -def _remove_trailing_sep(prefix, sep, root_marker): - if prefix[-len(sep) :] == sep and prefix != root_marker: - return prefix[: -len(sep)] - return prefix - - -def _remove_root_marker(path, root_marker): - if path[: len(root_marker)] == root_marker: - return path[len(root_marker) :] - return path - - class PrefixFileSystem(AbstractFileSystem): """A meta-filesystem to add a prefix and delegate to another filesystem @@ -50,32 +35,20 @@ def __init__( if not prefix: raise ValueError("empty prefix is not a valid prefix") - prefix = stringify_path(prefix) - self.prefix = _remove_trailing_sep( - prefix, sep=self.fs.sep, root_marker=self.fs.root_marker - ) + self.prefix = prefix def _add_fs_prefix(self, path): if isinstance(path, str): - path = stringify_path(path) - protocol, path = split_protocol(path) - - path = _remove_root_marker(path, root_marker=self.fs.root_marker) - if self.prefix == self.fs.root_marker: - path = f"{self.fs.root_marker}{path}" # don't add twice the root marker - else: - path = f"{self.prefix}{self.fs.sep}{path}" - - return protocol + "://" + path if protocol is not None else path + return f"{self.fs.root_marker}{path}" + return f"{self.prefix}{self.fs.sep}{path}" return [self._add_fs_prefix(x) for x in path] def _remove_fs_prefix(self, path): if isinstance(path, str): - path = stringify_path(path) - protocol, path = split_protocol(path) - path = path[len(self.prefix) + 1 :] - return protocol + "://" + path if protocol is not None else path + if self.prefix == self.fs.root_marker: + return path[len(self.prefix) :] + return path[len(self.prefix) + 1 :] return [self._remove_fs_prefix(x) for x in path] def mkdir(self, path, create_parents=True, **kwargs): diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 3941f2076..0b770ffba 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -11,11 +11,7 @@ import fsspec from fsspec.core import OpenFile from fsspec.implementations.local import make_path_posix -from fsspec.implementations.prefix import ( - PrefixFileSystem, - _remove_root_marker, - _remove_trailing_sep, -) +from fsspec.implementations.prefix import PrefixFileSystem files = { ".test.accounts.1.json": ( @@ -138,9 +134,7 @@ def test_isfile(): with filetexts(files, mode="b"): for f in files.keys(): assert fs.isfile(f) - assert fs.isfile("file://" + f) assert not fs.isfile("not-a-file") - assert not fs.isfile("file://not-a-file") def test_isdir(): @@ -180,41 +174,6 @@ def test_emtpy_prefix(): PrefixFileSystem(prefix=None, fs=fsspec.filesystem("file")) -@pytest.mark.parametrize( - "prefix, normalized_prefix", - [ - ("/", "/"), - ("a", "a"), - ("/a", "/a"), - ("a/", "a"), - ("/a/", "/a"), - ("/a/b/c/", "/a/b/c"), - ], -) -def test_remove_trailing_sep(prefix, normalized_prefix): - fs = fsspec.filesystem("file") - assert ( - _remove_trailing_sep(prefix, sep=fs.sep, root_marker=fs.root_marker) - == normalized_prefix - ) - - -@pytest.mark.parametrize( - "prefix,normalized_prefix", - [ - ("/", ""), - ("a", "a"), - ("/a", "a"), - ("a/", "a/"), - ("/a/", "a/"), - ("/a/b/c/", "a/b/c/"), - ], -) -def test_remove_root_marker(prefix, normalized_prefix): - fs = fsspec.filesystem("file") - assert _remove_root_marker(prefix, root_marker=fs.root_marker) == normalized_prefix - - @pytest.mark.parametrize( "ls_arg, expected_out", [ From ac4ffd1a795c6ef7ab24ce68b30f55542796466c Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 05:24:59 +0200 Subject: [PATCH 34/41] prefixfs: remove unused PrefixBufferedFile --- fsspec/implementations/prefix.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 590611039..c5acd85ff 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -1,9 +1,4 @@ -from fsspec.spec import AbstractBufferedFile, AbstractFileSystem - - -class PrefixBufferedFile(AbstractBufferedFile): - def _fetch_range(self, start, end): - pass +from fsspec.spec import AbstractFileSystem class PrefixFileSystem(AbstractFileSystem): From b1590d4e7183aa75d425276ba57b42a64253514b Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 05:35:46 +0200 Subject: [PATCH 35/41] prefixfs: preserve method signature --- fsspec/implementations/prefix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index c5acd85ff..3aaf9790e 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -89,12 +89,12 @@ def cp_file(self, path1, path2, **kwargs): def get_file(self, path1, path2, callback=None, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) - return self.fs.get_file(path1, path2, callback, **kwargs) + return self.fs.get_file(path1, path2, callback=callback, **kwargs) def put_file(self, path1, path2, callback=None, **kwargs): path1 = self._add_fs_prefix(path1) path2 = self._add_fs_prefix(path2) - return self.fs.put_file(path1, path2, callback, **kwargs) + return self.fs.put_file(path1, path2, callback=callback, **kwargs) def mv_file(self, path1, path2, **kwargs): path1 = self._add_fs_prefix(path1) From 99b045231c1014766160849c63ce3e0afb80f394 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 05:37:52 +0200 Subject: [PATCH 36/41] prefixfs: open: remove redundant docstring --- fsspec/implementations/prefix.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py index 3aaf9790e..746f1896f 100644 --- a/fsspec/implementations/prefix.py +++ b/fsspec/implementations/prefix.py @@ -139,28 +139,6 @@ def __repr__(self): return f"{self.__class__.__qualname__}(prefix='{self.prefix}', fs={self.fs})" def open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs): - """ - Return a file-like object from the fs - - The file-like object returned ignores an eventual PrefixFileSystem: - - the ``.path`` attribute is always an absolute path - - the ``.fs`` attribute, if present, would be the wrapped file-system - - The resultant instance must function correctly in a context ``with`` - block. - - Parameters - ---------- - path: str - Target file - mode: str like 'rb', 'w' - See builtin ``open()`` - block_size: int - Some indication of buffering - this is a value in bytes - cache_options : dict, optional - Extra arguments to pass through to the cache. - encoding, errors, newline: passed on to TextIOWrapper for text mode - """ return self.fs.open( self._add_fs_prefix(path), mode=mode, From 15c927c7ade64c181ee6b8b20dac78c5b86e4430 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 28 Nov 2021 20:39:24 +0200 Subject: [PATCH 37/41] tests: prefixfs: use posixpath --- fsspec/implementations/tests/test_prefix.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 0b770ffba..7febdb673 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -147,10 +147,11 @@ def test_isdir(): @pytest.mark.parametrize("dirname", ["/dir", "dir"]) -@pytest.mark.parametrize("prefix", ["a/b/c/d/e", "a/b/c/d/e/"]) -def test_directories(tmpdir, prefix, dirname): +def test_directories(tmpdir, dirname): + import posixpath + tmpdir = make_path_posix(str(tmpdir)) - prefix = os.path.join(tmpdir, prefix) + prefix = posixpath.join(tmpdir, "a/b/c/d/e") fs = PrefixFileSystem(prefix=prefix, fs=fsspec.filesystem("file")) fs.mkdir(dirname) From c49b11214185be3fa89974f78be59d44ce1257a7 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Mon, 29 Nov 2021 17:55:39 +0200 Subject: [PATCH 38/41] tests: prefix: remove redundant import --- fsspec/implementations/tests/test_prefix.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_prefix.py index 7febdb673..63a4e5252 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_prefix.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import os import os.path import tempfile From 9398f1fb2f54eea669849e99a22f3df5a4e14179 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 17 Dec 2021 00:02:55 +0200 Subject: [PATCH 39/41] prefixfs -> dirfs --- docs/source/api.rst | 6 +- fsspec/implementations/dirfs.py | 325 ++++++++++++++++++ fsspec/implementations/prefix.py | 148 -------- .../tests/{test_prefix.py => test_dir.py} | 66 ++-- 4 files changed, 372 insertions(+), 173 deletions(-) create mode 100644 fsspec/implementations/dirfs.py delete mode 100644 fsspec/implementations/prefix.py rename fsspec/implementations/tests/{test_prefix.py => test_dir.py} (75%) diff --git a/docs/source/api.rst b/docs/source/api.rst index 1420b8315..a06337c8f 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -119,7 +119,7 @@ Built-in Implementations fsspec.implementations.libarchive.LibArchiveFileSystem fsspec.implementations.dbfs.DatabricksFileSystem fsspec.implementations.reference.ReferenceFileSystem - fsspec.implementations.prefix.PrefixFileSystem + fsspec.implementations.dirfs.DirFileSystem .. autoclass:: fsspec.implementations.ftp.FTPFileSystem :members: __init__ @@ -184,8 +184,8 @@ Built-in Implementations .. autoclass:: fsspec.implementations.reference.ReferenceFileSystem :members: __init__ -.. autoclass:: fsspec.implementations.prefix.PrefixFileSystem - :members: __init__, open +.. autoclass:: fsspec.implementations.dirfs.DirFileSystem + :members: __init__ Other Known Implementations --------------------------- diff --git a/fsspec/implementations/dirfs.py b/fsspec/implementations/dirfs.py new file mode 100644 index 000000000..b43256f7d --- /dev/null +++ b/fsspec/implementations/dirfs.py @@ -0,0 +1,325 @@ +from ..asyn import AsyncFileSystem + + +class DirFileSystem(AsyncFileSystem): + def __init__(self, path, fs, *args, **storage_options): + """ + Parameters + ---------- + path: str + Path to the directory. + fs: AbstractFileSystem + An instantiated filesystem to wrap. + """ + super().__init__(*args, **storage_options) + + if self.asynchronous and not fs.async_impl: + raise ValueError("can't use asynchronous with non-async fs") + + if fs.async_impl and self.asynchronous != fs.asynchronous: + raise ValueError("both dirfs and fs should be in the same sync/async mode") + + self.path = fs._strip_protocol(path) + self.fs = fs + + def _join(self, path): + if isinstance(path, str): + if not self.path: + return path + if not path: + return self.path + return self.fs.sep.join((self.path, path)) + return [self._join(_path) for _path in path] + + def _relpath(self, path): + if isinstance(path, str): + if not self.path: + return path + if path == self.path: + return "" + prefix = self.path + self.fs.sep + assert path.startswith(prefix) + return path[len(prefix) :] + return [self._relpath(_path) for _path in path] + + # Wrappers below + + @property + def sep(self): + return self.fs.sep + + async def set_session(self, *args, **kwargs): + return await self.fs.set_session(*args, **kwargs) + + async def _rm_file(self, path, **kwargs): + return await self.fs._rm_file(self._join(path), **kwargs) + + def rm_file(self, path, **kwargs): + return self.fs.rm_file(self._join(path), **kwargs) + + async def _rm(self, path, *args, **kwargs): + return await self.fs._rm(self._join(path), *args, **kwargs) + + def rm(self, path, *args, **kwargs): + return self.fs.rm(self._join(path), *args, **kwargs) + + async def _cp_file(self, path1, path2, **kwargs): + return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs) + + def cp_file(self, path1, path2, **kwargs): + return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs) + + async def _copy( + self, + path1, + path2, + *args, + **kwargs, + ): + return await self.fs._copy( + self._join(path1), + self._join(path2), + *args, + **kwargs, + ) + + def copy(self, path1, path2, *args, **kwargs): + return self.fs.copy( + self._join(path1), + self._join(path2), + *args, + **kwargs, + ) + + async def _pipe(self, path, *args, **kwargs): + return await self.fs._pipe(self._join(path), *args, **kwargs) + + def pipe(self, path, *args, **kwargs): + return self.fs.pipe(self._join(path), *args, **kwargs) + + async def _cat_file(self, path, *args, **kwargs): + return await self.fs._cat_file(self._join(path), *args, **kwargs) + + def cat_file(self, path, *args, **kwargs): + return self.fs.cat_file(self._join(path), *args, **kwargs) + + async def _cat(self, path, *args, **kwargs): + ret = await self.fs._cat( + self._join(path), + *args, + **kwargs, + ) + + if isinstance(ret, dict): + return {self._relpath(key): value for key, value in ret.items()} + + return ret + + def cat(self, path, *args, **kwargs): + ret = self.fs.cat( + self._join(path), + *args, + **kwargs, + ) + + if isinstance(ret, dict): + return {self._relpath(key): value for key, value in ret.items()} + + return ret + + async def _put_file(self, lpath, rpath, **kwargs): + return await self.fs._put_file(lpath, self._join(rpath), **kwargs) + + def put_file(self, lpath, rpath, **kwargs): + return self.fs.put_file(lpath, self._join(rpath), **kwargs) + + async def _put( + self, + lpath, + rpath, + *args, + **kwargs, + ): + return await self.fs._put( + lpath, + self._join(rpath), + *args, + **kwargs, + ) + + def put(self, lpath, rpath, *args, **kwargs): + return self.fs.put( + lpath, + self._join(rpath), + *args, + **kwargs, + ) + + async def _get_file(self, rpath, lpath, **kwargs): + return await self.fs._get_file(self._join(rpath), lpath, **kwargs) + + def get_file(self, rpath, lpath, **kwargs): + return self.fs.get_file(self._join(rpath), lpath, **kwargs) + + async def _get(self, rpath, *args, **kwargs): + return await self.fs._get(self._join(rpath), *args, **kwargs) + + def get(self, rpath, *args, **kwargs): + return self.fs.get(self._join(rpath), *args, **kwargs) + + async def _isfile(self, path): + return await self.fs._isfile(self._join(path)) + + def isfile(self, path): + return self.fs.isfile(self._join(path)) + + async def _isdir(self, path): + return await self.fs._isdir(self._join(path)) + + def isdir(self, path): + return self.fs.isdir(self._join(path)) + + async def _size(self, path): + return await self.fs._size(self._join(path)) + + def size(self, path): + return self.fs.size(self._join(path)) + + async def _exists(self, path): + return await self.fs._exists(self._join(path)) + + def exists(self, path): + return self.fs.exists(self._join(path)) + + async def _info(self, path, **kwargs): + return await self.fs._info(self._join(path), **kwargs) + + def info(self, path, **kwargs): + return self.fs.info(self._join(path), **kwargs) + + async def _ls(self, path, detail=True, **kwargs): + ret = await self.fs._ls(self._join(path), detail=detail, **kwargs) + if detail: + for entry in ret: + entry["name"] = self._relpath(entry["name"]) + return ret + + return self._relpath(ret) + + def ls(self, path, detail=True, **kwargs): + ret = self.fs.ls(self._join(path), detail=detail, **kwargs) + if detail: + for entry in ret: + entry["name"] = self._relpath(entry["name"]) + return ret + + return self._relpath(ret) + + async def _walk(self, path, *args, **kwargs): + async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs): + yield self._relpath(root), dirs, files + + def walk(self, path, *args, **kwargs): + for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs): + yield self._relpath(root), dirs, files + + async def _glob(self, path, **kwargs): + detail = kwargs.get("detail", False) + ret = await self.fs._glob(self._join(path), **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + def glob(self, path, **kwargs): + detail = kwargs.get("detail", False) + ret = self.fs.glob(self._join(path), **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + async def _du(self, path, *args, **kwargs): + total = kwargs.get("total", True) + ret = await self.fs._du(self._join(path), *args, **kwargs) + if total: + return ret + + return {self._relpath(path): size for path, size in ret.items()} + + def du(self, path, *args, **kwargs): + total = kwargs.get("total", True) + ret = self.fs.du(self._join(path), *args, **kwargs) + if total: + return ret + + return {self._relpath(path): size for path, size in ret.items()} + + async def _find(self, path, *args, **kwargs): + detail = kwargs.get("detail", False) + ret = await self.fs._find(self._join(path), *args, **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + def find(self, path, *args, **kwargs): + detail = kwargs.get("detail", False) + ret = self.fs.find(self._join(path), *args, **kwargs) + if detail: + return {self._relpath(path): info for path, info in ret.items()} + return self._relpath(ret) + + async def _expand_path(self, path, *args, **kwargs): + return self._relpath( + await self.fs._expand_path(self._join(path), *args, **kwargs) + ) + + def expand_path(self, path, *args, **kwargs): + return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs)) + + async def _mkdir(self, path, *args, **kwargs): + return await self.fs._mkdir(self._join(path), *args, **kwargs) + + def mkdir(self, path, *args, **kwargs): + return self.fs.mkdir(self._join(path), *args, **kwargs) + + async def _makedirs(self, path, *args, **kwargs): + return await self.fs._makedirs(self._join(path), *args, **kwargs) + + def makedirs(self, path, *args, **kwargs): + return self.fs.makedirs(self._join(path), *args, **kwargs) + + def rmdir(self, path): + return self.fs.rmdir(self._join(path)) + + def mv_file(self, path1, path2, **kwargs): + return self.fs.mv_file( + self._join(path1), + self._join(path2), + **kwargs, + ) + + def touch(self, path, **kwargs): + return self.fs.touch(self._join(path), **kwargs) + + def created(self, path): + return self.fs.created(self._join(path)) + + def modified(self, path): + return self.fs.modified(self._join(path)) + + def sign(self, path, *args, **kwargs): + return self.fs.sign(self._join(path), *args, **kwargs) + + def __repr__(self): + return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})" + + def open( + self, + path, + *args, + **kwargs, + ): + return self.fs.open( + self._join(path), + *args, + **kwargs, + ) diff --git a/fsspec/implementations/prefix.py b/fsspec/implementations/prefix.py deleted file mode 100644 index 746f1896f..000000000 --- a/fsspec/implementations/prefix.py +++ /dev/null @@ -1,148 +0,0 @@ -from fsspec.spec import AbstractFileSystem - - -class PrefixFileSystem(AbstractFileSystem): - """A meta-filesystem to add a prefix and delegate to another filesystem - - This interface is incomplete and experimental. - """ - - def __init__( - self, - prefix, - fs, - *args, - **storage_options, - ): - """ - Parameters - ---------- - prefix: str - The prefix to append to all paths - - fs: AbstractFileSystem - An instantiated filesystem to wrap. All operations are delegated to - this filesystem after appending the specified prefix - """ - super().__init__(*args, **storage_options) - self.fs = fs - - if not prefix: - raise ValueError("empty prefix is not a valid prefix") - - self.prefix = prefix - - def _add_fs_prefix(self, path): - if isinstance(path, str): - if self.prefix == self.fs.root_marker: - return f"{self.fs.root_marker}{path}" - return f"{self.prefix}{self.fs.sep}{path}" - return [self._add_fs_prefix(x) for x in path] - - def _remove_fs_prefix(self, path): - if isinstance(path, str): - if self.prefix == self.fs.root_marker: - return path[len(self.prefix) :] - return path[len(self.prefix) + 1 :] - return [self._remove_fs_prefix(x) for x in path] - - def mkdir(self, path, create_parents=True, **kwargs): - path = self._add_fs_prefix(path) - return self.fs.mkdir(path=path, create_parents=create_parents, **kwargs) - - def makedirs(self, path, exist_ok=False): - path = self._add_fs_prefix(path) - return self.fs.makedirs(path=path, exist_ok=exist_ok) - - def rmdir(self, path): - path = self._add_fs_prefix(path) - return self.fs.rmdir(path=path) - - def ls( - self, - path, - detail=False, - **kwargs, - ): - path = self._add_fs_prefix(path) - ls_out = self.fs.ls(path=path, detail=detail, **kwargs) - if detail: - for out in ls_out: - out["name"] = self._remove_fs_prefix(out["name"]) - return ls_out - return self._remove_fs_prefix(ls_out) - - def glob(self, path, **kwargs): - path = self._add_fs_prefix(path) - glob_out = self.fs.glob(path=path, **kwargs) - return [self._remove_fs_prefix(x) for x in glob_out] - - def info(self, path, **kwargs): - path = self._add_fs_prefix(path) - return self.fs.info(path=path, **kwargs) - - def cp_file(self, path1, path2, **kwargs): - path1 = self._add_fs_prefix(path1) - path2 = self._add_fs_prefix(path2) - return self.fs.cp_file(path1, path2, **kwargs) - - def get_file(self, path1, path2, callback=None, **kwargs): - path1 = self._add_fs_prefix(path1) - path2 = self._add_fs_prefix(path2) - return self.fs.get_file(path1, path2, callback=callback, **kwargs) - - def put_file(self, path1, path2, callback=None, **kwargs): - path1 = self._add_fs_prefix(path1) - path2 = self._add_fs_prefix(path2) - return self.fs.put_file(path1, path2, callback=callback, **kwargs) - - def mv_file(self, path1, path2, **kwargs): - path1 = self._add_fs_prefix(path1) - path2 = self._add_fs_prefix(path2) - return self.fs.mv_file(path1, path2, **kwargs) - - def rm_file(self, path): - path = self._add_fs_prefix(path) - return self.fs.rm_file(path) - - def rm(self, path, recursive=False, maxdepth=None): - path = self._add_fs_prefix(path) - return self.fs.rm(path, recursive=recursive, maxdepth=maxdepth) - - def touch(self, path, **kwargs): - path = self._add_fs_prefix(path) - return self.fs.touch(path, **kwargs) - - def created(self, path): - path = self._add_fs_prefix(path) - return self.fs.created(path) - - def modified(self, path): - path = self._add_fs_prefix(path) - return self.fs.modified(path) - - def sign(self, path, expiration=100, **kwargs): - path = self._add_fs_prefix(path) - return self.fs.sign(path, expiration=expiration, **kwargs) - - def cat( - self, - path, - recursive=False, - on_error="raise", - **kwargs, - ): - path = self._add_fs_prefix(path) - return self.fs.cat(path, recursive=recursive, on_error=on_error, **kwargs) - - def __repr__(self): - return f"{self.__class__.__qualname__}(prefix='{self.prefix}', fs={self.fs})" - - def open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs): - return self.fs.open( - self._add_fs_prefix(path), - mode=mode, - block_size=block_size, - cache_options=cache_options, - **kwargs, - ) diff --git a/fsspec/implementations/tests/test_prefix.py b/fsspec/implementations/tests/test_dir.py similarity index 75% rename from fsspec/implementations/tests/test_prefix.py rename to fsspec/implementations/tests/test_dir.py index 63a4e5252..738f68bfa 100644 --- a/fsspec/implementations/tests/test_prefix.py +++ b/fsspec/implementations/tests/test_dir.py @@ -1,5 +1,7 @@ +import asyncio import os import os.path +import sys import tempfile from contextlib import contextmanager from pathlib import Path @@ -8,8 +10,10 @@ import fsspec from fsspec.core import OpenFile +from fsspec.implementations.dirfs import DirFileSystem from fsspec.implementations.local import make_path_posix -from fsspec.implementations.prefix import PrefixFileSystem + +from .test_http import data, realfile, server # noqa: F401 files = { ".test.accounts.1.json": ( @@ -78,14 +82,14 @@ def filetexts(d, open=open, mode="t"): def test_open(): with filetexts(csv_files, mode="b"): - fs = PrefixFileSystem(prefix="a", fs=fsspec.filesystem("file")) + fs = DirFileSystem("a", fs=fsspec.filesystem("file")) with fs.open("b/c/.test.fakedata.3.csv") as f: assert f.read() == b"a,b\n3,4,5\n" def test_prefix_root(): with filetexts(csv_files, mode="b"): - fs = PrefixFileSystem(prefix="/", fs=fsspec.filesystem("file")) + fs = DirFileSystem("/", fs=fsspec.filesystem("file")) abs_path_file = os.path.abspath("a/b/c/.test.fakedata.3.csv") # Risk double root marker (in path and in prefix) @@ -101,7 +105,7 @@ def test_prefix_root(): def test_cats(): with filetexts(csv_files, mode="b"): - fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) + fs = DirFileSystem(".", fs=fsspec.filesystem("file")) assert fs.cat(".test.fakedata.1.csv") == b"a,b\n" b"1,2\n" out = set(fs.cat([".test.fakedata.1.csv", ".test.fakedata.2.csv"]).values()) assert out == {b"a,b\n" b"1,2\n", b"a,b\n" b"3,4\n"} @@ -121,23 +125,23 @@ def test_cats(): def test_not_found(): fn = "not-a-file" - fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) + fs = DirFileSystem(".", fs=fsspec.filesystem("file")) with pytest.raises((FileNotFoundError, OSError)): with OpenFile(fs, fn, mode="rb"): pass def test_isfile(): - fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) with filetexts(files, mode="b"): + fs = DirFileSystem(os.getcwd(), fs=fsspec.filesystem("file")) for f in files.keys(): assert fs.isfile(f) assert not fs.isfile("not-a-file") def test_isdir(): - fs = PrefixFileSystem(prefix=".", fs=fsspec.filesystem("file")) with filetexts(files, mode="b"): + fs = DirFileSystem(os.getcwd(), fs=fsspec.filesystem("file")) for f in files.keys(): assert fs.isfile(f) assert not fs.isdir(f) @@ -151,26 +155,18 @@ def test_directories(tmpdir, dirname): tmpdir = make_path_posix(str(tmpdir)) prefix = posixpath.join(tmpdir, "a/b/c/d/e") - fs = PrefixFileSystem(prefix=prefix, fs=fsspec.filesystem("file")) + fs = DirFileSystem(prefix, fs=fsspec.filesystem("file")) fs.mkdir(dirname) assert not os.path.exists(os.path.join(tmpdir, "dir")) assert os.path.exists(os.path.join(prefix, "dir")) - assert fs.ls(".") == ["./dir"] + assert fs.ls(".", detail=False) == ["./dir"] fs.rmdir(dirname) assert not os.path.exists(os.path.join(prefix, "dir")) - fs = PrefixFileSystem(prefix=f"{tmpdir}/a", fs=fsspec.filesystem("file")) - assert fs.ls(".") == ["./b"] + fs = DirFileSystem(f"{tmpdir}/a", fs=fsspec.filesystem("file")) + assert fs.ls(".", detail=False) == ["./b"] fs.rm("b", recursive=True) - assert fs.ls(".") == [] - - -def test_emtpy_prefix(): - with pytest.raises(ValueError): - PrefixFileSystem(prefix="", fs=fsspec.filesystem("file")) - - with pytest.raises(ValueError): - PrefixFileSystem(prefix=None, fs=fsspec.filesystem("file")) + assert fs.ls(".", detail=False) == [] @pytest.mark.parametrize( @@ -192,5 +188,31 @@ def test_emtpy_prefix(): ) def test_ls(tmpdir, ls_arg, expected_out): os.makedirs(os.path.join(make_path_posix(str(tmpdir)), "a/b/c/d/e/")) - fs = PrefixFileSystem(prefix=f"{tmpdir}/a", fs=fsspec.filesystem("file")) - assert fs.ls(ls_arg) == expected_out + fs = DirFileSystem(f"{tmpdir}/a", fs=fsspec.filesystem("file")) + assert fs.ls(ls_arg, detail=False) == expected_out + + +def test_async_fs_list(server): # noqa: F811 + h = fsspec.filesystem("http") + fs = DirFileSystem(server + "/index", fs=h) + out = fs.glob("*") + assert out == ["realfile"] + + +@pytest.mark.skipif(sys.version_info < (3, 7), reason="no asyncio.run in py36") +def test_async_this_thread(server): # noqa: F811 + async def _test(): + h = fsspec.filesystem("http", asynchronous=True) + fs = DirFileSystem(server + "/index", fs=h) + + session = await fs.set_session() # creates client + + url = "realfile" + with pytest.raises((NotImplementedError, RuntimeError)): + fs.cat([url]) + out = await fs._cat([url]) + del fs + assert out == {url: data} + await session.close() + + asyncio.run(_test()) From 47dd357615181bbef5c8e84e1ae8509f6ca037bf Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 19 Dec 2021 21:09:26 +0200 Subject: [PATCH 40/41] remove tests --- fsspec/implementations/tests/test_dir.py | 218 ----------------------- 1 file changed, 218 deletions(-) delete mode 100644 fsspec/implementations/tests/test_dir.py diff --git a/fsspec/implementations/tests/test_dir.py b/fsspec/implementations/tests/test_dir.py deleted file mode 100644 index 738f68bfa..000000000 --- a/fsspec/implementations/tests/test_dir.py +++ /dev/null @@ -1,218 +0,0 @@ -import asyncio -import os -import os.path -import sys -import tempfile -from contextlib import contextmanager -from pathlib import Path - -import pytest - -import fsspec -from fsspec.core import OpenFile -from fsspec.implementations.dirfs import DirFileSystem -from fsspec.implementations.local import make_path_posix - -from .test_http import data, realfile, server # noqa: F401 - -files = { - ".test.accounts.1.json": ( - b'{"amount": 100, "name": "Alice"}\n' - b'{"amount": 200, "name": "Bob"}\n' - b'{"amount": 300, "name": "Charlie"}\n' - b'{"amount": 400, "name": "Dennis"}\n' - ), - ".test.accounts.2.json": ( - b'{"amount": 500, "name": "Alice"}\n' - b'{"amount": 600, "name": "Bob"}\n' - b'{"amount": 700, "name": "Charlie"}\n' - b'{"amount": 800, "name": "Dennis"}\n' - ), -} - - -csv_files = { - ".test.fakedata.1.csv": b"a,b\n1,2\n", - ".test.fakedata.2.csv": b"a,b\n3,4\n", - "a/b/c/.test.fakedata.3.csv": b"a,b\n3,4,5\n", -} -odir = os.getcwd() - - -@contextmanager -def filetexts(d, open=open, mode="t"): - """Dumps a number of textfiles to disk - - d - dict - a mapping from filename to text like {'a.csv': '1,1\n2,2'} - - Since this is meant for use in tests, this context manager will - automatically switch to a temporary current directory, to avoid - race conditions when running tests in parallel. - """ - dirname = tempfile.mkdtemp() - try: - os.chdir(dirname) - for filename, text in d.items(): - filename = Path(filename) - - if not filename.parent.exists(): - filename.parent.mkdir(parents=True, exist_ok=True) - - f = open(filename, "w" + mode) - try: - f.write(text) - finally: - try: - f.close() - except AttributeError: - pass - - yield list(d) - - for filename in d: - if os.path.exists(filename): - try: - os.remove(filename) - except (IOError, OSError): - pass - finally: - os.chdir(odir) - - -def test_open(): - with filetexts(csv_files, mode="b"): - fs = DirFileSystem("a", fs=fsspec.filesystem("file")) - with fs.open("b/c/.test.fakedata.3.csv") as f: - assert f.read() == b"a,b\n3,4,5\n" - - -def test_prefix_root(): - with filetexts(csv_files, mode="b"): - fs = DirFileSystem("/", fs=fsspec.filesystem("file")) - abs_path_file = os.path.abspath("a/b/c/.test.fakedata.3.csv") - - # Risk double root marker (in path and in prefix) - with fs.open(abs_path_file) as f: - assert f.read() == b"a,b\n3,4,5\n" - - # no root marker in windows paths - if os.name != "nt": - # Without root marker - with fs.open(abs_path_file[1:]) as f: - assert f.read() == b"a,b\n3,4,5\n" - - -def test_cats(): - with filetexts(csv_files, mode="b"): - fs = DirFileSystem(".", fs=fsspec.filesystem("file")) - assert fs.cat(".test.fakedata.1.csv") == b"a,b\n" b"1,2\n" - out = set(fs.cat([".test.fakedata.1.csv", ".test.fakedata.2.csv"]).values()) - assert out == {b"a,b\n" b"1,2\n", b"a,b\n" b"3,4\n"} - assert fs.cat(".test.fakedata.1.csv", None, None) == b"a,b\n" b"1,2\n" - assert fs.cat(".test.fakedata.1.csv", start=1, end=6) == b"a,b\n" b"1,2\n"[1:6] - assert fs.cat(".test.fakedata.1.csv", start=-1) == b"a,b\n" b"1,2\n"[-1:] - assert ( - fs.cat(".test.fakedata.1.csv", start=1, end=-2) == b"a,b\n" b"1,2\n"[1:-2] - ) - out = set( - fs.cat( - [".test.fakedata.1.csv", ".test.fakedata.2.csv"], start=1, end=-1 - ).values() - ) - assert out == {b"a,b\n" b"1,2\n"[1:-1], b"a,b\n" b"3,4\n"[1:-1]} - - -def test_not_found(): - fn = "not-a-file" - fs = DirFileSystem(".", fs=fsspec.filesystem("file")) - with pytest.raises((FileNotFoundError, OSError)): - with OpenFile(fs, fn, mode="rb"): - pass - - -def test_isfile(): - with filetexts(files, mode="b"): - fs = DirFileSystem(os.getcwd(), fs=fsspec.filesystem("file")) - for f in files.keys(): - assert fs.isfile(f) - assert not fs.isfile("not-a-file") - - -def test_isdir(): - with filetexts(files, mode="b"): - fs = DirFileSystem(os.getcwd(), fs=fsspec.filesystem("file")) - for f in files.keys(): - assert fs.isfile(f) - assert not fs.isdir(f) - assert not fs.isdir("not-a-dir") - - -@pytest.mark.parametrize("dirname", ["/dir", "dir"]) -def test_directories(tmpdir, dirname): - import posixpath - - tmpdir = make_path_posix(str(tmpdir)) - prefix = posixpath.join(tmpdir, "a/b/c/d/e") - - fs = DirFileSystem(prefix, fs=fsspec.filesystem("file")) - fs.mkdir(dirname) - assert not os.path.exists(os.path.join(tmpdir, "dir")) - assert os.path.exists(os.path.join(prefix, "dir")) - assert fs.ls(".", detail=False) == ["./dir"] - fs.rmdir(dirname) - assert not os.path.exists(os.path.join(prefix, "dir")) - - fs = DirFileSystem(f"{tmpdir}/a", fs=fsspec.filesystem("file")) - assert fs.ls(".", detail=False) == ["./b"] - fs.rm("b", recursive=True) - assert fs.ls(".", detail=False) == [] - - -@pytest.mark.parametrize( - "ls_arg, expected_out", - [ - (".", ["./b"]), - ("./", ["./b"]), - ("./b", ["./b/c"]), - ("./b/", ["./b/c"]), - ("b", ["b/c"]), - ("b/", ["b/c"]), - ("./b/c/d", ["./b/c/d/e"]), - ("./b/c/d/", ["./b/c/d/e"]), - ("b/c/d", ["b/c/d/e"]), - ("b/c/d/", ["b/c/d/e"]), - ("b/c/d/e", []), - ("b/c/d/e/", []), - ], -) -def test_ls(tmpdir, ls_arg, expected_out): - os.makedirs(os.path.join(make_path_posix(str(tmpdir)), "a/b/c/d/e/")) - fs = DirFileSystem(f"{tmpdir}/a", fs=fsspec.filesystem("file")) - assert fs.ls(ls_arg, detail=False) == expected_out - - -def test_async_fs_list(server): # noqa: F811 - h = fsspec.filesystem("http") - fs = DirFileSystem(server + "/index", fs=h) - out = fs.glob("*") - assert out == ["realfile"] - - -@pytest.mark.skipif(sys.version_info < (3, 7), reason="no asyncio.run in py36") -def test_async_this_thread(server): # noqa: F811 - async def _test(): - h = fsspec.filesystem("http", asynchronous=True) - fs = DirFileSystem(server + "/index", fs=h) - - session = await fs.set_session() # creates client - - url = "realfile" - with pytest.raises((NotImplementedError, RuntimeError)): - fs.cat([url]) - out = await fs._cat([url]) - del fs - assert out == {url: data} - await session.close() - - asyncio.run(_test()) From 8b6512cb117da4ee386d6b507f79e531cdd5c8f1 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 13 Jan 2022 17:57:40 +0200 Subject: [PATCH 41/41] tests: dirfs: add unit tests --- ci/environment-win.yml | 2 + fsspec/implementations/.dirfs.py.swp | Bin 0 -> 16384 bytes fsspec/implementations/tests/test_dirfs.py | 566 +++++++++++++++++++++ tox.ini | 2 + 4 files changed, 570 insertions(+) create mode 100644 fsspec/implementations/.dirfs.py.swp create mode 100644 fsspec/implementations/tests/test_dirfs.py diff --git a/ci/environment-win.yml b/ci/environment-win.yml index af9671ea9..5a09b61e9 100644 --- a/ci/environment-win.yml +++ b/ci/environment-win.yml @@ -12,8 +12,10 @@ dependencies: - pyftpdlib - cloudpickle - pytest + - pytest-asyncio - pytest-benchmark - pytest-cov + - pytest-mock - pytest-vcr - python-libarchive-c - numpy diff --git a/fsspec/implementations/.dirfs.py.swp b/fsspec/implementations/.dirfs.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..afe78095e97a1f4d2a74b766c82e4f79675e920b GIT binary patch literal 16384 zcmeI2e~2DO8OJ9!O`6u!604#9VdmbTdEd+Bz2-!e!;_}bMros^jTKVkx_RH(yG{1p z-R|t0%cWfUhpmXeAcBGwtB8Nuf};I_PzsF*N+AUW1=~Pd+CS6?f?BLX_4~}s&i;7c zyW3Z-RoR8l-RMdVV0}icH18k+JB_dQ$!J2XP<s$MySY-6e!BAfRrCC)NxxKiZf=`jjRK7V zjRK7VjRK7VjRK7VjRK7VjRK7VjRK7VuY&?^pRrx2cPG67!2kcV`2XUY7<(2x10De@ zU;vJSL*U(D2l(UFjQtoq2JQu)1|J1mz~$ilcE(NsAG{Cj0q+D?f#1B5v8TZIz*oT# zydP`>SAgGM#n?~5cfprH0B!^C0Drxbv0s3%gZsgK;9hVF^uUL}A@COP%r?e;4ITm~ zfDdj2Tfr9a%PScBGB^j8z+Ny7{_&zuX&VnWINpKA81ut!8>_zY+a0aY^gW#3R82bzO8Tbsi4;%q= z;99T={0?LIJa`T~0qz1ZI0ile<^YZJTfqhwH&-H3zQCmjye_`L=(%Z|(UZRLINi{L z86Lao(hQ$=lSPSN(eq7Ab%yzlqYVjDU(GZo5OIa5wmT_`2# zrk#lVuuYBY48Kc1++Hh=x9Dt)yUChMmyR-@v7kMqqHybnICy)O5P z{e{P7W@V!GqIjh+bLlSk$o;wlbL&pw*Q*DWvC2DwxlJ0-H9DE~TEx^-{GgdTm(N_f z-JjnoB~;#`7c0fAg{q!$?&1Zn!3b0b1!HF0r!`cQv`}8FUl@e77tpF01uK)bqS)1f znF$(``BpNRWO*{VYs?uVd8NVTmB+YB)v|1%III*a*3+8l1H~ET| zl`k~ES~`r(v7=nFd=jE(Sd!AjXimdm71U!%S&HYRvfAp%F_F}Tk?*R~dUen(QI(aq z66&g{Qx&Bw`?v&XArdWYfnq-~{bw@U6h(A$q4xOW=$qClS;b9Yu9c6}-M z9dyL~V3I^hyLCK*BdPD4xf}Ljoy(ue^eFnh=b&Qa(hBOO|Mg@GuJZ>E?K{+?dg- zsbl1}S^cV+h1%s1)4$#6Iu1T0PaP*;m}D9y?xJv_SbY$lce+)XRtG|;MPWedyQzSV z_5*XCMtm77OxF&ZC+GQr;}|Trm)?YYx2q4)?PJp7RyZ9MB>6EoqK9^@5Jg8$5@Y{sN$Q|4}di z0XPV*0~Zn3KLs8JKLlCq{~r8(CHM*A`X2xR4uTuOPC&8#L*Q%R7BCGu;17u3e-1ti z&Vp5NGk6(s`=7yia5p#uJ_4==+rh68zkd;Y0UQHw1J{995xYMPE`V=>d%&l_0k9WL z0S+j>KM$S+-vJU(Tz>?#z!Z2H@%m%nQSbn`1>6i4z$};oI{*W(K;LJ<6W~Gc6+oJQ z4m4MzK%;$T%xW4mkU1|hZs$5~b9f`vUg5A7tYduFgTF6abLu6Fc^QEw87h!gCH z)Q2&rwXAE>UkoRS#%%s<CldmJ1Hqm0@a{fS?h&CHdx63 zra0QJt<&Jj#w+4Sd8=JtuL0bB4_8DHC?R|`cGQ3K9;KKQz}}&0hxS* zljG!Dufw)L{mQDnwI(XB11hp9k&9im1-YKWg71p(m$jB>7_kG-BvwA`7n(nUB=Jwi>Z73z>9!1O5HUbra!DP sovk%xs^Tpw4bE~T85VrS*!^#opyC>@g%Sm?m{^eVI*c)W?bnol0K3#E^Z)<= literal 0 HcmV?d00001 diff --git a/fsspec/implementations/tests/test_dirfs.py b/fsspec/implementations/tests/test_dirfs.py new file mode 100644 index 000000000..e9afbf17a --- /dev/null +++ b/fsspec/implementations/tests/test_dirfs.py @@ -0,0 +1,566 @@ +import sys + +import pytest + +from fsspec.asyn import AsyncFileSystem +from fsspec.implementations.dirfs import DirFileSystem +from fsspec.spec import AbstractFileSystem + +PATH = "path/to/dir" +ARGS = ["foo", "bar"] +KWARGS = {"baz": "baz", "qux": "qux"} + + +@pytest.fixture +def make_fs(mocker): + def _make_fs(async_impl=False, asynchronous=False): + attrs = { + "sep": "/", + "async_impl": async_impl, + "_strip_protocol": lambda path: path, + } + + if async_impl: + if asynchronous and sys.version_info < (3, 8): + pytest.skip("no AsyncMock before Python 3.8") + + attrs["asynchronous"] = asynchronous + cls = AsyncFileSystem + else: + cls = AbstractFileSystem + + fs = mocker.MagicMock(spec=cls, **attrs) + + return fs + + return _make_fs + + +@pytest.fixture( + params=[ + pytest.param(False, id="sync"), + pytest.param(True, id="async"), + ] +) +def fs(make_fs, request): + return make_fs(async_impl=request.param) + + +@pytest.fixture +def asyncfs(make_fs): + return make_fs(async_impl=True, asynchronous=True) + + +@pytest.fixture +def make_dirfs(): + def _make_dirfs(fs, asynchronous=False): + return DirFileSystem(PATH, fs, asynchronous=asynchronous) + + return _make_dirfs + + +@pytest.fixture +def dirfs(make_dirfs, fs): + return make_dirfs(fs) + + +@pytest.fixture +def adirfs(make_dirfs, asyncfs): + return make_dirfs(asyncfs, asynchronous=True) + + +def test_dirfs(fs, asyncfs): + DirFileSystem("path", fs) + DirFileSystem("path", asyncfs, asynchronous=True) + + with pytest.raises(ValueError): + DirFileSystem("path", asyncfs) + + with pytest.raises(ValueError): + DirFileSystem("path", fs, asynchronous=True) + + +@pytest.mark.parametrize( + "root, rel, full", + [ + ("", "", ""), + ("", "foo", "foo"), + ("root", "", "root"), + ("root", "foo", "root/foo"), + ], +) +def test_path(fs, root, rel, full): + dirfs = DirFileSystem(root, fs) + assert dirfs._join(rel) == full + assert dirfs._relpath(full) == rel + + +def test_sep(mocker, dirfs): + sep = mocker.Mock() + dirfs.fs.sep = sep + assert dirfs.sep == sep + + +@pytest.mark.asyncio +async def test_set_session(mocker, adirfs): + adirfs.fs.set_session = mocker.AsyncMock() + assert ( + await adirfs.set_session(*ARGS, **KWARGS) == adirfs.fs.set_session.return_value + ) + adirfs.fs.set_session.assert_called_once_with(*ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_rm_file(adirfs): + await adirfs._rm_file("file", **KWARGS) + adirfs.fs._rm_file.assert_called_once_with(f"{PATH}/file", **KWARGS) + + +def test_rm_file(dirfs): + dirfs.rm_file("file", **KWARGS) + dirfs.fs.rm_file.assert_called_once_with("path/to/dir/file", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_rm(adirfs): + await adirfs._rm("file", *ARGS, **KWARGS) + adirfs.fs._rm.assert_called_once_with("path/to/dir/file", *ARGS, **KWARGS) + + +def test_rm(dirfs): + dirfs.rm("file", *ARGS, **KWARGS) + dirfs.fs.rm.assert_called_once_with("path/to/dir/file", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_cp_file(adirfs): + await adirfs._cp_file("one", "two", **KWARGS) + adirfs.fs._cp_file.assert_called_once_with(f"{PATH}/one", f"{PATH}/two", **KWARGS) + + +def test_cp_file(dirfs): + dirfs.cp_file("one", "two", **KWARGS) + dirfs.fs.cp_file.assert_called_once_with(f"{PATH}/one", f"{PATH}/two", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_copy(adirfs): + await adirfs._copy("one", "two", *ARGS, **KWARGS) + adirfs.fs._copy.assert_called_once_with( + f"{PATH}/one", f"{PATH}/two", *ARGS, **KWARGS + ) + + +def test_copy(dirfs): + dirfs.copy("one", "two", *ARGS, **KWARGS) + dirfs.fs.copy.assert_called_once_with(f"{PATH}/one", f"{PATH}/two", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_pipe(adirfs): + await adirfs._pipe("file", *ARGS, **KWARGS) + adirfs.fs._pipe.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +def test_pipe(dirfs): + dirfs.pipe("file", *ARGS, **KWARGS) + dirfs.fs.pipe.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_cat_file(adirfs): + assert ( + await adirfs._cat_file("file", *ARGS, **KWARGS) + == adirfs.fs._cat_file.return_value + ) + adirfs.fs._cat_file.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +def test_cat_file(dirfs): + assert dirfs.cat_file("file", *ARGS, **KWARGS) == dirfs.fs.cat_file.return_value + dirfs.fs.cat_file.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_cat(adirfs): + assert await adirfs._cat("file", *ARGS, **KWARGS) == adirfs.fs._cat.return_value + adirfs.fs._cat.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +def test_cat(dirfs): + assert dirfs.cat("file", *ARGS, **KWARGS) == dirfs.fs.cat.return_value + dirfs.fs.cat.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_cat_list(adirfs): + adirfs.fs._cat.return_value = {f"{PATH}/one": "foo", f"{PATH}/two": "bar"} + assert await adirfs._cat(["one", "two"], *ARGS, **KWARGS) == { + "one": "foo", + "two": "bar", + } + adirfs.fs._cat.assert_called_once_with( + [f"{PATH}/one", f"{PATH}/two"], *ARGS, **KWARGS + ) + + +def test_cat_list(dirfs): + dirfs.fs.cat.return_value = {f"{PATH}/one": "foo", f"{PATH}/two": "bar"} + assert dirfs.cat(["one", "two"], *ARGS, **KWARGS) == {"one": "foo", "two": "bar"} + dirfs.fs.cat.assert_called_once_with( + [f"{PATH}/one", f"{PATH}/two"], *ARGS, **KWARGS + ) + + +@pytest.mark.asyncio +async def test_async_put_file(adirfs): + await adirfs._put_file("local", "file", **KWARGS) + adirfs.fs._put_file.assert_called_once_with("local", f"{PATH}/file", **KWARGS) + + +def test_put_file(dirfs): + dirfs.put_file("local", "file", **KWARGS) + dirfs.fs.put_file.assert_called_once_with("local", f"{PATH}/file", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_put(adirfs): + await adirfs._put("local", "file", **KWARGS) + adirfs.fs._put.assert_called_once_with("local", f"{PATH}/file", **KWARGS) + + +def test_put(dirfs): + dirfs.put("local", "file", **KWARGS) + dirfs.fs.put.assert_called_once_with("local", f"{PATH}/file", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_get_file(adirfs): + await adirfs._get_file("file", "local", **KWARGS) + adirfs.fs._get_file.assert_called_once_with(f"{PATH}/file", "local", **KWARGS) + + +def test_get_file(dirfs): + dirfs.get_file("file", "local", **KWARGS) + dirfs.fs.get_file.assert_called_once_with(f"{PATH}/file", "local", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_get(adirfs): + await adirfs._get("file", "local", **KWARGS) + adirfs.fs._get.assert_called_once_with(f"{PATH}/file", "local", **KWARGS) + + +def test_get(dirfs): + dirfs.get("file", "local", **KWARGS) + dirfs.fs.get.assert_called_once_with(f"{PATH}/file", "local", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_isfile(adirfs): + assert await adirfs._isfile("file") == adirfs.fs._isfile.return_value + adirfs.fs._isfile.assert_called_once_with(f"{PATH}/file") + + +def test_isfile(dirfs): + assert dirfs.isfile("file") == dirfs.fs.isfile.return_value + dirfs.fs.isfile.assert_called_once_with(f"{PATH}/file") + + +@pytest.mark.asyncio +async def test_async_isdir(adirfs): + assert await adirfs._isdir("file") == adirfs.fs._isdir.return_value + adirfs.fs._isdir.assert_called_once_with(f"{PATH}/file") + + +def test_isdir(dirfs): + assert dirfs.isdir("file") == dirfs.fs.isdir.return_value + dirfs.fs.isdir.assert_called_once_with(f"{PATH}/file") + + +@pytest.mark.asyncio +async def test_async_size(adirfs): + assert await adirfs._size("file") == adirfs.fs._size.return_value + adirfs.fs._size.assert_called_once_with(f"{PATH}/file") + + +def test_size(dirfs): + assert dirfs.size("file") == dirfs.fs.size.return_value + dirfs.fs.size.assert_called_once_with(f"{PATH}/file") + + +@pytest.mark.asyncio +async def test_async_exists(adirfs): + assert await adirfs._exists("file") == adirfs.fs._exists.return_value + adirfs.fs._exists.assert_called_once_with(f"{PATH}/file") + + +def test_exists(dirfs): + assert dirfs.exists("file") == dirfs.fs.exists.return_value + dirfs.fs.exists.assert_called_once_with(f"{PATH}/file") + + +@pytest.mark.asyncio +async def test_async_info(adirfs): + assert await adirfs._info("file", **KWARGS) == adirfs.fs._info.return_value + adirfs.fs._info.assert_called_once_with(f"{PATH}/file", **KWARGS) + + +def test_info(dirfs): + assert dirfs.info("file", **KWARGS) == dirfs.fs.info.return_value + dirfs.fs.info.assert_called_once_with(f"{PATH}/file", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_ls(adirfs): + adirfs.fs._ls.return_value = [f"{PATH}/file"] + assert await adirfs._ls("file", detail=False, **KWARGS) == ["file"] + adirfs.fs._ls.assert_called_once_with(f"{PATH}/file", detail=False, **KWARGS) + + +def test_ls(dirfs): + dirfs.fs.ls.return_value = [f"{PATH}/file"] + assert dirfs.ls("file", detail=False, **KWARGS) == ["file"] + dirfs.fs.ls.assert_called_once_with(f"{PATH}/file", detail=False, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_ls_detail(adirfs): + adirfs.fs._ls.return_value = [{"name": f"{PATH}/file", "foo": "bar"}] + assert await adirfs._ls("file", detail=True, **KWARGS) == [ + {"name": "file", "foo": "bar"} + ] + adirfs.fs._ls.assert_called_once_with(f"{PATH}/file", detail=True, **KWARGS) + + +def test_ls_detail(dirfs): + dirfs.fs.ls.return_value = [{"name": f"{PATH}/file", "foo": "bar"}] + assert dirfs.ls("file", detail=True, **KWARGS) == [{"name": "file", "foo": "bar"}] + dirfs.fs.ls.assert_called_once_with(f"{PATH}/file", detail=True, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_walk(adirfs, mocker): + async def _walk(path, *args, **kwargs): + yield (f"{PATH}/root", ["foo", "bar"], ["baz", "qux"]) + + adirfs.fs._walk = mocker.MagicMock() + adirfs.fs._walk.side_effect = _walk + + actual = [] + async for entry in adirfs._walk("root", *ARGS, **KWARGS): + actual.append(entry) + assert actual == [("root", ["foo", "bar"], ["baz", "qux"])] + adirfs.fs._walk.assert_called_once_with(f"{PATH}/root", *ARGS, **KWARGS) + + +def test_walk(dirfs): + dirfs.fs.walk.return_value = iter( + [(f"{PATH}/root", ["foo", "bar"], ["baz", "qux"])] + ) + assert list(dirfs.walk("root", *ARGS, **KWARGS)) == [ + ("root", ["foo", "bar"], ["baz", "qux"]) + ] + dirfs.fs.walk.assert_called_once_with(f"{PATH}/root", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_glob(adirfs): + adirfs.fs._glob.return_value = [f"{PATH}/one", f"{PATH}/two"] + assert await adirfs._glob("*", **KWARGS) == ["one", "two"] + adirfs.fs._glob.assert_called_once_with(f"{PATH}/*", **KWARGS) + + +def test_glob(dirfs): + dirfs.fs.glob.return_value = [f"{PATH}/one", f"{PATH}/two"] + assert dirfs.glob("*", **KWARGS) == ["one", "two"] + dirfs.fs.glob.assert_called_once_with(f"{PATH}/*", **KWARGS) + + +@pytest.mark.asyncio +async def test_async_glob_detail(adirfs): + adirfs.fs._glob.return_value = { + f"{PATH}/one": {"foo": "bar"}, + f"{PATH}/two": {"baz": "qux"}, + } + assert await adirfs._glob("*", detail=True, **KWARGS) == { + "one": {"foo": "bar"}, + "two": {"baz": "qux"}, + } + adirfs.fs._glob.assert_called_once_with(f"{PATH}/*", detail=True, **KWARGS) + + +def test_glob_detail(dirfs): + dirfs.fs.glob.return_value = { + f"{PATH}/one": {"foo": "bar"}, + f"{PATH}/two": {"baz": "qux"}, + } + assert dirfs.glob("*", detail=True, **KWARGS) == { + "one": {"foo": "bar"}, + "two": {"baz": "qux"}, + } + dirfs.fs.glob.assert_called_once_with(f"{PATH}/*", detail=True, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_du(adirfs): + adirfs.fs._du.return_value = 1234 + assert await adirfs._du("file", *ARGS, **KWARGS) == 1234 + adirfs.fs._du.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +def test_du(dirfs): + dirfs.fs.du.return_value = 1234 + assert dirfs.du("file", *ARGS, **KWARGS) == 1234 + dirfs.fs.du.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_du_granular(adirfs): + adirfs.fs._du.return_value = {f"{PATH}/dir/one": 1, f"{PATH}/dir/two": 2} + assert await adirfs._du("dir", *ARGS, total=False, **KWARGS) == { + "dir/one": 1, + "dir/two": 2, + } + adirfs.fs._du.assert_called_once_with(f"{PATH}/dir", *ARGS, total=False, **KWARGS) + + +def test_du_granular(dirfs): + dirfs.fs.du.return_value = {f"{PATH}/dir/one": 1, f"{PATH}/dir/two": 2} + assert dirfs.du("dir", *ARGS, total=False, **KWARGS) == {"dir/one": 1, "dir/two": 2} + dirfs.fs.du.assert_called_once_with(f"{PATH}/dir", *ARGS, total=False, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_find(adirfs): + adirfs.fs._find.return_value = [f"{PATH}/dir/one", f"{PATH}/dir/two"] + assert await adirfs._find("dir", *ARGS, **KWARGS) == ["dir/one", "dir/two"] + adirfs.fs._find.assert_called_once_with(f"{PATH}/dir", *ARGS, **KWARGS) + + +def test_find(dirfs): + dirfs.fs.find.return_value = [f"{PATH}/dir/one", f"{PATH}/dir/two"] + assert dirfs.find("dir", *ARGS, **KWARGS) == ["dir/one", "dir/two"] + dirfs.fs.find.assert_called_once_with(f"{PATH}/dir", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_find_detail(adirfs): + adirfs.fs._find.return_value = { + f"{PATH}/dir/one": {"foo": "bar"}, + f"{PATH}/dir/two": {"baz": "qux"}, + } + assert await adirfs._find("dir", *ARGS, detail=True, **KWARGS) == { + "dir/one": {"foo": "bar"}, + "dir/two": {"baz": "qux"}, + } + adirfs.fs._find.assert_called_once_with(f"{PATH}/dir", *ARGS, detail=True, **KWARGS) + + +def test_find_detail(dirfs): + dirfs.fs.find.return_value = { + f"{PATH}/dir/one": {"foo": "bar"}, + f"{PATH}/dir/two": {"baz": "qux"}, + } + assert dirfs.find("dir", *ARGS, detail=True, **KWARGS) == { + "dir/one": {"foo": "bar"}, + "dir/two": {"baz": "qux"}, + } + dirfs.fs.find.assert_called_once_with(f"{PATH}/dir", *ARGS, detail=True, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_expand_path(adirfs): + adirfs.fs._expand_path.return_value = [f"{PATH}/file"] + assert await adirfs._expand_path("*", *ARGS, **KWARGS) == ["file"] + adirfs.fs._expand_path.assert_called_once_with(f"{PATH}/*", *ARGS, **KWARGS) + + +def test_expand_path(dirfs): + dirfs.fs.expand_path.return_value = [f"{PATH}/file"] + assert dirfs.expand_path("*", *ARGS, **KWARGS) == ["file"] + dirfs.fs.expand_path.assert_called_once_with(f"{PATH}/*", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_expand_path_list(adirfs): + adirfs.fs._expand_path.return_value = [f"{PATH}/1file", f"{PATH}/2file"] + assert await adirfs._expand_path(["1*", "2*"], *ARGS, **KWARGS) == [ + "1file", + "2file", + ] + adirfs.fs._expand_path.assert_called_once_with( + [f"{PATH}/1*", f"{PATH}/2*"], *ARGS, **KWARGS + ) + + +def test_expand_path_list(dirfs): + dirfs.fs.expand_path.return_value = [f"{PATH}/1file", f"{PATH}/2file"] + assert dirfs.expand_path(["1*", "2*"], *ARGS, **KWARGS) == ["1file", "2file"] + dirfs.fs.expand_path.assert_called_once_with( + [f"{PATH}/1*", f"{PATH}/2*"], *ARGS, **KWARGS + ) + + +@pytest.mark.asyncio +async def test_async_mkdir(adirfs): + await adirfs._mkdir("dir", *ARGS, **KWARGS) + adirfs.fs._mkdir.assert_called_once_with(f"{PATH}/dir", *ARGS, **KWARGS) + + +def test_mkdir(dirfs): + dirfs.mkdir("dir", *ARGS, **KWARGS) + dirfs.fs.mkdir.assert_called_once_with(f"{PATH}/dir", *ARGS, **KWARGS) + + +@pytest.mark.asyncio +async def test_async_makedirs(adirfs): + await adirfs._makedirs("dir", *ARGS, **KWARGS) + adirfs.fs._makedirs.assert_called_once_with(f"{PATH}/dir", *ARGS, **KWARGS) + + +def test_makedirs(dirfs): + dirfs.makedirs("dir", *ARGS, **KWARGS) + dirfs.fs.makedirs.assert_called_once_with(f"{PATH}/dir", *ARGS, **KWARGS) + + +def test_rmdir(mocker, dirfs): + dirfs.fs.rmdir = mocker.Mock() + dirfs.rmdir("dir") + dirfs.fs.rmdir.assert_called_once_with(f"{PATH}/dir") + + +def test_mv_file(mocker, dirfs): + dirfs.fs.mv_file = mocker.Mock() + dirfs.mv_file("one", "two", **KWARGS) + dirfs.fs.mv_file.assert_called_once_with(f"{PATH}/one", f"{PATH}/two", **KWARGS) + + +def test_touch(mocker, dirfs): + dirfs.fs.touch = mocker.Mock() + dirfs.touch("file", **KWARGS) + dirfs.fs.touch.assert_called_once_with(f"{PATH}/file", **KWARGS) + + +def test_created(mocker, dirfs): + dirfs.fs.created = mocker.Mock(return_value="date") + assert dirfs.created("file") == "date" + dirfs.fs.created.assert_called_once_with(f"{PATH}/file") + + +def test_modified(mocker, dirfs): + dirfs.fs.modified = mocker.Mock(return_value="date") + assert dirfs.modified("file") == "date" + dirfs.fs.modified.assert_called_once_with(f"{PATH}/file") + + +def test_sign(mocker, dirfs): + dirfs.fs.sign = mocker.Mock(return_value="url") + assert dirfs.sign("file", *ARGS, **KWARGS) == "url" + dirfs.fs.sign.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) + + +def test_open(mocker, dirfs): + dirfs.fs.open = mocker.Mock() + assert dirfs.open("file", *ARGS, **KWARGS) == dirfs.fs.open.return_value + dirfs.fs.open.assert_called_once_with(f"{PATH}/file", *ARGS, **KWARGS) diff --git a/tox.ini b/tox.ini index e6edbc4c7..edb847e14 100644 --- a/tox.ini +++ b/tox.ini @@ -25,8 +25,10 @@ conda_deps= pyftpdlib cloudpickle pytest + pytest-asyncio pytest-benchmark pytest-cov + pytest-mock pytest-vcr fusepy tomli < 2