Skip to content

Commit

Permalink
add download command to gcs-cli (#90)
Browse files Browse the repository at this point in the history
* add download command to gcs-cli

* address review

* fix glob directory listing

* improve gcs-cli download test cases
  • Loading branch information
relud authored Feb 12, 2025
1 parent 2629da8 commit d3c5905
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 3 deletions.
63 changes: 60 additions & 3 deletions obs_common/gcs_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,11 @@ def list_objects(bucket_name, details):
def upload(source, destination):
"""Upload files to a bucket
SOURCE is a path to a file or directory of files. will recurse on directory trees
SOURCE is a path to a file or directory of files. will recurse on directory trees.
DESTINATION is a path to a file or directory in the bucket. If SOURCE is a
directory or DESTINATION ends with "/", then DESTINATION is treated as a directory.
DESTINATION is a path to a file or directory in the bucket, for example
"gs://bucket/dir/" or "gs://bucket/path/to/file". If SOURCE is a directory or DESTINATION
ends with "/", then DESTINATION is treated as a directory.
"""

client = get_client()
Expand Down Expand Up @@ -166,5 +167,61 @@ def upload(source, destination):
click.echo(f"Uploaded gs://{bucket_name}/{key}")


@gcs_group.command()
@click.argument("source")
@click.argument("destination")
def download(source, destination):
"""Download files from a bucket
SOURCE is a path to a file or directory in the bucket, for example
"gs://bucket/path/to/file" or "gs://bucket/dir/". Must end in "/" to indicate a
directory. Will recurse on directory trees.
DESTINATION is a path to a file or directory on the local filesystem. If SOURCE is a
directory or DESTINATION ends with "/", then DESTINATION is treated as a directory.
"""

client = get_client()

# remove protocol from source if present, then separate bucket and prefix
bucket_name, _, prefix = source.split("://", 1)[-1].partition("/")
prefix_path = PurePosixPath(prefix)

try:
bucket = client.get_bucket(bucket_name)
except NotFound as e:
raise click.ClickException(f"GCS bucket {bucket_name!r} does not exist.") from e

source_is_dir = not prefix or prefix.endswith("/")
if source_is_dir:
sources = [
# NOTE(relud): blob.download_to_filename hangs for blobs returned by
# list_blobs, so create a new blob object
bucket.blob(blob.name)
for blob in bucket.list_blobs(prefix=prefix)
]
if not sources:
raise click.ClickException(f"No keys in {source!r}.")
else:
sources = [bucket.blob(prefix)]

destination_path = Path(destination)
for blob in sources:
if source_is_dir:
# source is a directory so treat destination as a directory
path = destination_path / PurePosixPath(blob.name).relative_to(prefix_path)
elif destination_path.is_dir():
# source is a file but destination is a directory, preserve file name
path = destination_path / prefix_path.name
else:
path = destination_path
path.parent.mkdir(parents=True, exist_ok=True)
try:
blob.download_to_filename(str(path))
except NotFound as e:
raise click.ClickException(f"GCS blob does not exist: {source!r}") from e
click.echo(f"Downloaded gs://{bucket_name}/{blob.name}")


if __name__ == "__main__":
gcs_group()
88 changes: 88 additions & 0 deletions tests/test_gcs_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,91 @@ def test_upload_dir_to_dir(gcs_helper, tmp_path):
assert gcs_helper.download(bucket, f"{path.name}/{path.name}") == path.name.encode(
"utf-8"
)


@REQUIRE_EMULATOR
def test_download_file_to_file(gcs_helper, tmp_path):
"""Test downloading one file to a file with a different name."""
bucket = "test"
key = uuid4().hex
gcs_helper.upload(bucket, key, key)
path = tmp_path / uuid4().hex
path.write_text(path.name)
result = CliRunner().invoke(
gcs_group, ["download", f"gs://{bucket}/{key}", str(path.absolute())]
)
assert result.exit_code == 0
assert path.read_text() == key


@REQUIRE_EMULATOR
def test_download_file_to_dir(gcs_helper, tmp_path):
"""Test uploading one file to a directory."""
bucket = "test"
key = uuid4().hex
gcs_helper.upload(bucket, key, key)
path = tmp_path / key
result = CliRunner().invoke(
gcs_group, ["download", f"gs://{bucket}/{key}", str(tmp_path.absolute())]
)
assert result.exit_code == 0
assert path.read_text() == key


@REQUIRE_EMULATOR
def test_download_root_to_dir(gcs_helper, tmp_path):
"""Test downloading a whole bucket to a directory."""
bucket = "test"
key = uuid4().hex
gcs_helper.upload(bucket, f"{key}/{key}", key)
result = CliRunner().invoke(
gcs_group, ["download", f"gs://{bucket}", str(tmp_path.absolute())]
)
assert result.exit_code == 0
assert (tmp_path / key / key).read_text() == key


@REQUIRE_EMULATOR
def test_download_dir_to_dir(gcs_helper, tmp_path):
"""Test downloading a whole directory to a directory."""
bucket = "test"
key = uuid4().hex
# one file that should be downloaded, and another than shouldn't
gcs_helper.upload(bucket, f"{key}/{key}", key)
gcs_helper.upload(bucket, f"{key}_{key}", key)
result = CliRunner().invoke(
gcs_group, ["download", f"gs://{bucket}/{key}/", str(tmp_path.absolute() / key)]
)
assert result.exit_code == 0
assert (tmp_path / key / key).read_text() == key
assert not (tmp_path / f"{key}_{key}").exists()


@REQUIRE_EMULATOR
def test_download_missing_file(gcs_helper, tmp_path):
"""Test downloading a file that doesn't exist."""
bucket = "test"
key = uuid4().hex
# file that should not be downloaded because the source should be a file
gcs_helper.upload(bucket, f"{key}/{key}", key)
result = CliRunner().invoke(
gcs_group,
["download", source := f"gs://{bucket}/{key}", str(tmp_path.absolute())],
)
assert result.exit_code == 1
assert result.stdout == f"Error: GCS blob does not exist: {source!r}\n"


@REQUIRE_EMULATOR
def test_download_missing_dir(gcs_helper, tmp_path):
"""Test downloading a file that doesn't exist."""
bucket = "test"
key = uuid4().hex
# file that should not be downloaded because the source should be a directory
gcs_helper.upload(bucket, f"{key}", key)
result = CliRunner().invoke(
gcs_group,
["download", source := f"gs://{bucket}/{key}/", str(tmp_path.absolute())],
)
assert result.exit_code == 1
assert result.stdout == f"Error: No keys in {source!r}.\n"

0 comments on commit d3c5905

Please sign in to comment.