From 5ffa0fab0be8754e4ea23b59b5c1e7f14ef8d48c Mon Sep 17 00:00:00 2001 From: ravi-mosaicml Date: Tue, 5 Jul 2022 16:50:35 -0700 Subject: [PATCH] Create a new `boto3.Session()` per `S3ObjectStore` instance (#1260) `boto3` sessions are not thread safe. When used in the object store logger with `use_procs: False`, the default session was shared across threads, which caused us to run into https://github.com/boto/boto3/issues/1592. To fix, this PR creates a new session within each `S3ObjectStore` instance. Closes https://mosaicml.atlassian.net/browse/CO-651 --- .../utils/object_store/s3_object_store.py | 2 +- .../object_store/test_s3_object_store.py | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/utils/object_store/test_s3_object_store.py diff --git a/composer/utils/object_store/s3_object_store.py b/composer/utils/object_store/s3_object_store.py index f2d210e750..ab9f20fae7 100644 --- a/composer/utils/object_store/s3_object_store.py +++ b/composer/utils/object_store/s3_object_store.py @@ -85,7 +85,7 @@ def __init__( if client_config is None: client_config = {} config = Config(**client_config) - self.client = boto3.client( + self.client = boto3.Session().client( 's3', config=config, region_name=region_name, diff --git a/tests/utils/object_store/test_s3_object_store.py b/tests/utils/object_store/test_s3_object_store.py new file mode 100644 index 0000000000..c82e6be17d --- /dev/null +++ b/tests/utils/object_store/test_s3_object_store.py @@ -0,0 +1,38 @@ +# Copyright 2022 MosaicML Composer authors +# SPDX-License-Identifier: Apache-2.0 + +import os +import pathlib +import threading + +import pytest + +from composer.utils.object_store import S3ObjectStore + + +def _worker(bucket: str, tmp_path: pathlib.Path, tid: int): + object_store = S3ObjectStore(bucket=bucket) + os.makedirs(tmp_path / str(tid)) + with pytest.raises(FileNotFoundError): + object_store.download_object('this_key_should_not_exist', filename=tmp_path / str(tid) / 'dummy_file') + + +@pytest.mark.timeout(15) +# This test requires properly configured aws credentials; otherwise the s3 client would hit a NoCredentialsError +# when constructing the Session, which occurs before the bug this test checks +@pytest.mark.remote +def test_s3_object_store_multi_threads(tmp_path: pathlib.Path): + """Test to verify that we do not hit https://github.com/boto/boto3/issues/1592.""" + pytest.importorskip('boto3') + # TODO(Bandish) -- once we have integration tests configured, change the bucket below + # to an integration test bucket + bucket = 'allenai-c4' + + threads = [] + # Manually tried fewer threads; it seems that 100 is needed to reliably re-produce the bug + for i in range(100): + t = threading.Thread(target=_worker, kwargs={'bucket': bucket, 'tid': i, 'tmp_path': tmp_path}) + t.start() + threads.append(t) + for t in threads: + t.join()