From 2a03258fa926516b180d53e17e1bb38aaf6fad2b Mon Sep 17 00:00:00 2001 From: Daniel Standish <15932138+dstandish@users.noreply.github.com> Date: Tue, 9 Nov 2021 22:23:10 -0800 Subject: [PATCH 1/2] Do not check for key before attempting download S3Hook.download_file first checks object existence then downloads. This resolves creds 2 times. We don't need to check existence. Just ask for the key and if it's not there you'll know from the error. And if it is there, you'll only have resolved creds once. --- airflow/providers/amazon/aws/hooks/s3.py | 13 +++++++++---- tests/providers/amazon/aws/hooks/test_s3.py | 1 - 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 4a2a15b7a9393..9883e43d2669f 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -806,10 +806,15 @@ def download_file( """ self.log.info('Downloading source S3 file from Bucket %s with path %s', bucket_name, key) - if not self.check_for_key(key, bucket_name): - raise AirflowException(f'The source file in Bucket {bucket_name} with path {key} does not exist') - - s3_obj = self.get_key(key, bucket_name) + try: + s3_obj = self.get_key(key, bucket_name) + except ClientError as e: + if e.response.get('Error', {}).get('Code', {}) == 404: + raise AirflowException( + f'The source file in Bucket {bucket_name} with path {key} does not exist' + ) + else: + raise e with NamedTemporaryFile(dir=local_path, prefix='airflow_tmp_', delete=False) as local_tmp_file: s3_obj.download_fileobj(local_tmp_file) diff --git a/tests/providers/amazon/aws/hooks/test_s3.py b/tests/providers/amazon/aws/hooks/test_s3.py index d4d20b530da1b..934993c040566 100644 --- a/tests/providers/amazon/aws/hooks/test_s3.py +++ b/tests/providers/amazon/aws/hooks/test_s3.py @@ -444,7 +444,6 @@ def test_download_file(self, mock_temp_file): s3_hook.download_file(key=key, bucket_name=bucket) - s3_hook.check_for_key.assert_called_once_with(key, bucket) s3_hook.get_key.assert_called_once_with(key, bucket) s3_obj.download_fileobj.assert_called_once_with(mock_temp_file) From 8bf1f996fd925c6c4f68dc1357d1708d5c12060b Mon Sep 17 00:00:00 2001 From: Daniel Standish <15932138+dstandish@users.noreply.github.com> Date: Wed, 10 Nov 2021 09:46:44 -0800 Subject: [PATCH 2/2] Update airflow/providers/amazon/aws/hooks/s3.py Co-authored-by: Kaxil Naik --- airflow/providers/amazon/aws/hooks/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 9883e43d2669f..53b2747104380 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -809,7 +809,7 @@ def download_file( try: s3_obj = self.get_key(key, bucket_name) except ClientError as e: - if e.response.get('Error', {}).get('Code', {}) == 404: + if e.response.get('Error', {}).get('Code') == 404: raise AirflowException( f'The source file in Bucket {bucket_name} with path {key} does not exist' )