From 990ab5975f8999e23e4f2bc0c96f2a6877e24029 Mon Sep 17 00:00:00 2001 From: Serge Smertin <259697+nfx@users.noreply.github.com> Date: Mon, 19 Jun 2023 17:08:22 +0200 Subject: [PATCH] Increase DBFS copy buffer size (#185) Fix #131 --- databricks/sdk/mixins/files.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/databricks/sdk/mixins/files.py b/databricks/sdk/mixins/files.py index a00247853..fc56a1d8a 100644 --- a/databricks/sdk/mixins/files.py +++ b/databricks/sdk/mixins/files.py @@ -316,7 +316,7 @@ def open(self, path: str, *, read: bool = False, write: bool = False, overwrite: def upload(self, path: str, src: BinaryIO, *, overwrite: bool = False): with self.open(path, write=True, overwrite=overwrite) as dst: - shutil.copyfileobj(src, dst) + shutil.copyfileobj(src, dst, length=_DbfsIO.MAX_CHUNK_SIZE) def download(self, path: str) -> BinaryIO: return self.open(path, read=True) @@ -370,14 +370,12 @@ def copy(self, src: str, dst: str, *, recursive=False, overwrite=False): # copy single file with src.open(read=True) as reader: with dst.open(write=True, overwrite=overwrite) as writer: - for chunk in reader: - writer.write(chunk) + shutil.copyfileobj(reader, writer, length=_DbfsIO.MAX_CHUNK_SIZE) return # iterate through files for child, reader in src.list_opened_handles(recursive=recursive): with dst.child(child).open(write=True, overwrite=overwrite) as writer: - for chunk in reader: - writer.write(chunk) + shutil.copyfileobj(reader, writer, length=_DbfsIO.MAX_CHUNK_SIZE) def move_(self, src: str, dst: str, *, recursive=False, overwrite=False): source = self._path(src)