Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix windows path (#660) #673

Merged
merged 10 commits into from
Jul 3, 2024
20 changes: 11 additions & 9 deletions databricks/sdk/mixins/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ def __repr__(self) -> str:
class _Path(ABC):

def __init__(self, path: str):
self._path = pathlib.Path(str(path).replace('dbfs:', '').replace('file:', ''))
"""Posix Path for API Compatibility in windows and OS Specific Path(System Path) for using pathlib.Path functions."""
self._path = pathlib.PurePosixPath(str(path).replace('dbfs:', '').replace('file:', ''))
self._system_path = pathlib.Path(self._path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be good to add a comment explaining why we need self._path to be PurePosixPath and why we need self._system_path


@property
def is_local(self) -> bool:
Expand Down Expand Up @@ -337,18 +339,18 @@ def child(self, path: str) -> Self:
return _LocalPath(str(self._path / path))

def _is_dir(self) -> bool:
return self._path.is_dir()
return self._system_path.is_dir()

def mkdir(self):
self._path.mkdir(mode=0o755, parents=True, exist_ok=True)
self._system_path.mkdir(mode=0o755, parents=True, exist_ok=True)

def exists(self) -> bool:
return self._path.exists()
return self._system_path.exists()

def open(self, *, read=False, write=False, overwrite=False):
# make local fs follow the similar semantics as DBFS
self._path.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
return self._path.open(mode='wb' if overwrite else 'rb' if read else 'xb')
self._system_path.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
return self._system_path.open(mode='wb' if overwrite else 'rb' if read else 'xb')

def list(self, recursive=False) -> Generator[files.FileInfo, None, None]:
if not self.is_dir:
Expand All @@ -359,7 +361,7 @@ def list(self, recursive=False) -> Generator[files.FileInfo, None, None]:
modification_time=int(st.st_mtime_ns / 1e6),
)
return
queue = deque([self._path])
queue = deque([self._system_path])
while queue:
path = queue.popleft()
for leaf in path.iterdir():
Expand All @@ -379,12 +381,12 @@ def delete(self, *, recursive=False):
if recursive:
for leaf in self.list(recursive=True):
_LocalPath(leaf.path).delete()
self._path.rmdir()
self._system_path.rmdir()
else:
kw = {}
if sys.version_info[:2] > (3, 7):
kw['missing_ok'] = True
self._path.unlink(**kw)
self._system_path.unlink(**kw)

def __repr__(self) -> str:
return f'<_LocalPath {self._path}>'
Expand Down
8 changes: 8 additions & 0 deletions tests/test_dbfs_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,11 @@ def test_fs_path_invalid(config):
with pytest.raises(ValueError) as e:
dbfs_ext._path('s3://path/to/file')
assert 'unsupported scheme "s3"' in str(e.value)


def test_dbfs_local_path_mkdir(config, tmp_path):
from databricks.sdk import WorkspaceClient

w = WorkspaceClient(config=config)
w.dbfs._path(f'file:{tmp_path}/test_dir').mkdir()
assert w.dbfs.exists(f'file:{tmp_path}/test_dir')
Loading