Skip to content

Commit

Permalink
Do not check for S3 key before attempting download (#19504)
Browse files Browse the repository at this point in the history
S3Hook.download_file first checks object existence then downloads.
This resolves creds 2 times.  We don't need to check existence.
Just ask for the key and if it's not there you'll know from the error.
And if it is there, you'll only have resolved creds once.

Co-authored-by: Kaxil Naik <kaxilnaik@gmail.com>
  • Loading branch information
dstandish and kaxil authored Nov 10, 2021
1 parent cb6137f commit 9053de7
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
13 changes: 9 additions & 4 deletions airflow/providers/amazon/aws/hooks/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,10 +806,15 @@ def download_file(
"""
self.log.info('Downloading source S3 file from Bucket %s with path %s', bucket_name, key)

if not self.check_for_key(key, bucket_name):
raise AirflowException(f'The source file in Bucket {bucket_name} with path {key} does not exist')

s3_obj = self.get_key(key, bucket_name)
try:
s3_obj = self.get_key(key, bucket_name)
except ClientError as e:
if e.response.get('Error', {}).get('Code') == 404:
raise AirflowException(
f'The source file in Bucket {bucket_name} with path {key} does not exist'
)
else:
raise e

with NamedTemporaryFile(dir=local_path, prefix='airflow_tmp_', delete=False) as local_tmp_file:
s3_obj.download_fileobj(local_tmp_file)
Expand Down
1 change: 0 additions & 1 deletion tests/providers/amazon/aws/hooks/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,6 @@ def test_download_file(self, mock_temp_file):

s3_hook.download_file(key=key, bucket_name=bucket)

s3_hook.check_for_key.assert_called_once_with(key, bucket)
s3_hook.get_key.assert_called_once_with(key, bucket)
s3_obj.download_fileobj.assert_called_once_with(mock_temp_file)

Expand Down

0 comments on commit 9053de7

Please sign in to comment.