Skip to content

Commit

Permalink
Release s3 resources in a timely manner
Browse files Browse the repository at this point in the history
This commit ensures the entire log file is not loaded in-memory
and is directly streamed from the s3 bucket

Signed-off-by: Yashvardhan Nanavati <yashn@bu.edu>
  • Loading branch information
yashvardhannanavati committed Jul 30, 2024
1 parent 48e52f3 commit c82bb8f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 40 deletions.
8 changes: 5 additions & 3 deletions iib/web/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ def get_object_from_s3_bucket(
"""
file_name = f'{s3_key_prefix}/{s3_file_name}'
log.info('getting file from s3 : %s', file_name)
s3 = boto3.resource(service_name='s3')
try:
response = s3.meta.client.get_object(Bucket=bucket_name, Key=file_name)
s3_client = boto3.client('s3')
response = s3_client.get_object(Bucket=bucket_name, Key=file_name)
return response['Body'].read()
except Exception as error:
log.exception('Unable to fetch object %s from bucket %s: %s', file_name, bucket_name, error)
return None
return response['Body']
finally:
s3_client.close()
49 changes: 12 additions & 37 deletions tests/test_web/test_s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,34 @@
from unittest import mock

import botocore
from botocore.response import StreamingBody

from iib.web import s3_utils


@mock.patch('iib.web.s3_utils.boto3')
def test_get_object_from_s3_bucket(mock_boto3):
my_mock = mock.MagicMock()
mock_boto3.resource.return_value = my_mock
my_mock.meta.client.get_object.return_value = {
'ResponseMetadata': {
'RequestId': 'CK2VG4V5ZQXAAM5B',
'HostId': 'q4Wp/tsvjnl/eBeN0dHvHYi6xUl9U149BdN6IAXjaFJnnQX+=',
'HTTPStatusCode': 200,
'HTTPHeaders': {
'x-amz-id-2': 'q4Wp/tsvjnl/eBeN0dHvHYi6xUl9U149BdN6IAXjaFJnnQX+=',
'x-amz-request-id': 'CK2VG4V5ZQXAAM5B',
'date': 'Sun, 05 Dec 2021 03:35:14 GMT',
'last-modified': 'Sun, 05 Dec 2021 03:29:26 GMT',
'etag': '"69fefda897b58ced9d5f88df1804564c"',
'x-amz-server-side-encryption': 'AES256',
'accept-ranges': 'bytes',
'content-type': 'binary/octet-stream',
'server': 'AmazonS3',
'content-length': '21179',
},
'RetryAttempts': 0,
},
'AcceptRanges': 'bytes',
'ContentLength': 21179,
'ETag': '"69fefda897b58ced9d5f88df1804564c"',
'ContentType': 'binary/octet-stream',
'ServerSideEncryption': 'AES256',
'Metadata': {},
'Body': 'lots of data present here',
}
mock_client = mock.Mock()
mock_boto3.client.return_value = mock_client
mock_body = StreamingBody('lots of data', 0)
mock_body.read = mock.Mock(return_value=b'lots of data')
mock_client.get_object.return_value = {'Body': mock_body}

response = s3_utils.get_object_from_s3_bucket('prefix', 'file', 's3-bucket')

assert response == 'lots of data present here'
mock_boto3.resource.assert_called_once_with(service_name='s3')
my_mock.meta.client.get_object.assert_called_once_with(Bucket='s3-bucket', Key='prefix/file')
assert response == b'lots of data'
mock_boto3.client.assert_called_once_with('s3')
mock_client.get_object.assert_called_once_with(Bucket='s3-bucket', Key='prefix/file')


@mock.patch('iib.web.s3_utils.boto3')
def test_get_object_from_s3_bucket_failure(mock_boto3):
my_mock = mock.MagicMock()
mock_boto3.resource.return_value = my_mock
mock_client = mock.Mock()
mock_boto3.client.return_value = mock_client
error_msg = {
'Error': {'Code': 'SomeServiceException', 'Message': 'Something went horribly wrong'}
}
my_mock.meta.client.get_object.side_effect = botocore.exceptions.ClientError(
error_msg, 'get_object'
)
mock_client.get_object.side_effect = botocore.exceptions.ClientError(error_msg, 'get_object')

response = s3_utils.get_object_from_s3_bucket('prefix', 'file', 's3-bucket')
assert response is None

0 comments on commit c82bb8f

Please sign in to comment.