From 2dd9355a4276f44b08f841e550f73495710710ff Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Wed, 10 Apr 2024 22:14:26 +0200 Subject: [PATCH] Fix missing folders http glob (#1516) --- fsspec/implementations/http.py | 18 +++++++++--------- fsspec/implementations/tests/test_http.py | 9 +++++++++ fsspec/tests/conftest.py | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 2ffb7ed41..4580764ce 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -451,7 +451,7 @@ async def _glob(self, path, maxdepth=None, **kwargs): ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash path = self._strip_protocol(path) - append_slash_to_dirname = ends_with_slash or path.endswith("/**") + append_slash_to_dirname = ends_with_slash or path.endswith(("/**", "/*")) idx_star = path.find("*") if path.find("*") >= 0 else len(path) idx_brace = path.find("[") if path.find("[") >= 0 else len(path) @@ -494,15 +494,15 @@ async def _glob(self, path, maxdepth=None, **kwargs): pattern = re.compile(pattern) out = { - p: info + ( + p.rstrip("/") + if not append_slash_to_dirname + and info["type"] == "directory" + and p.endswith("/") + else p + ): info for p, info in sorted(allpaths.items()) - if pattern.match( - ( - p + "/" - if append_slash_to_dirname and info["type"] == "directory" - else p - ) - ) + if pattern.match(p.rstrip("/")) } if detail: diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py index af1164480..fdae51ff5 100644 --- a/fsspec/implementations/tests/test_http.py +++ b/fsspec/implementations/tests/test_http.py @@ -129,6 +129,15 @@ def test_list_cache_with_skip_instance_cache(server): assert out == [server + "/index/realfile"] +def test_glob_return_subfolders(server): + h = fsspec.filesystem("http") + out = h.glob(server + "/simple/*") + assert set(out) == { + server + "/simple/dir/", + server + "/simple/file", + } + + def test_isdir(server): h = fsspec.filesystem("http") assert h.isdir(server + "/index/") diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index d305e212b..fb1efb041 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -19,6 +19,13 @@ win = os.name == "nt" +def _make_listing(*paths): + return "\n".join( + f'Link_{i}' + for i, f in enumerate(paths) + ).encode() + + @pytest.fixture def reset_files(): yield @@ -34,6 +41,10 @@ class HTTPTestHandler(BaseHTTPRequestHandler): "/index/otherfile": data, "/index": index, "/data/20020401": listing, + "/simple/": _make_listing("/simple/file", "/simple/dir/"), + "/simple/file": data, + "/simple/dir/": _make_listing("/simple/dir/file"), + "/simple/dir/file": data, } dynamic_files = {} @@ -53,7 +64,9 @@ def _respond(self, code=200, headers=None, data=b""): self.wfile.write(data) def do_GET(self): - file_path = self.path.rstrip("/") + file_path = self.path + if file_path.endswith("/") and file_path.rstrip("/") in self.files: + file_path = file_path.rstrip("/") file_data = self.files.get(file_path) if "give_path" in self.headers: return self._respond(200, data=json.dumps({"path": self.path}).encode())