From fc9d7b3a68d7872d7c9f5bd0d64faab466e5466d Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 29 Jul 2024 14:48:57 +0000 Subject: [PATCH 1/8] DSEGOG-321 Add function to delete directories in Echo --- .../src/records/echo_interface.py | 22 ++++++++ test/records/test_echo_interface.py | 53 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/operationsgateway_api/src/records/echo_interface.py b/operationsgateway_api/src/records/echo_interface.py index f334a148..bdd75562 100644 --- a/operationsgateway_api/src/records/echo_interface.py +++ b/operationsgateway_api/src/records/echo_interface.py @@ -137,3 +137,25 @@ def delete_file_object(self, object_path: str) -> None: raise EchoS3Error( f"Deletion of {object_path} was unsuccessful", ) from exc + + + def delete_directory(self, dir_path: str) -> None: + """ + Given a path, delete an entire 'directory' from Echo. This is used to delete + waveforms and images when deleting a record by its ID + """ + + log.info("Deleting directory from %s", dir_path) + + try: + self.bucket.objects.filter(Prefix=dir_path).delete() + except ClientError as exc: + log.error( + "%s: %s", + exc.response["Error"]["Code"], + exc.response["Error"].get("Message"), + ) + raise EchoS3Error( + f"{exc.response['Error']['Code']} when deleting file at" + f" '{dir_path}'", + ) from exc diff --git a/test/records/test_echo_interface.py b/test/records/test_echo_interface.py index d711cdfc..69c31950 100644 --- a/test/records/test_echo_interface.py +++ b/test/records/test_echo_interface.py @@ -276,3 +276,56 @@ def test_invalid_delete_file_object(self, _): test_echo = EchoInterface() with pytest.raises(EchoS3Error): test_echo.delete_file_object(self.test_image_path) + + @patch( + "operationsgateway_api.src.config.Config.config.echo.url", + config_echo_url, + ) + @patch( + "operationsgateway_api.src.config.Config.config.echo.access_key", + config_echo_access_key, + ) + @patch( + "operationsgateway_api.src.config.Config.config.echo.secret_key", + config_echo_secret_key, + ) + @patch( + "operationsgateway_api.src.config.Config.config.echo.bucket_name", + config_image_bucket_name, + ) + @patch("boto3.resource") + def test_valid_delete_directory(self, _): + test_echo = EchoInterface() + test_echo.delete_directory("test/image/19900202143000/") + assert test_echo.bucket.objects.filter.return_value.delete.call_count == 1 + + @patch( + "operationsgateway_api.src.config.Config.config.echo.url", + config_echo_url, + ) + @patch( + "operationsgateway_api.src.config.Config.config.echo.access_key", + config_echo_access_key, + ) + @patch( + "operationsgateway_api.src.config.Config.config.echo.secret_key", + config_echo_secret_key, + ) + @patch( + "operationsgateway_api.src.config.Config.config.echo.bucket_name", + config_image_bucket_name, + ) + @patch("boto3.resource") + def test_invalid_delete_directory(self, _): + with patch("boto3.resource") as mock_resource: + mock_bucket = mock_resource.return_value.Bucket.return_value + + mock_bucket.objects.filter.return_value.delete.side_effect = ClientError( + {"Error": {"Code": "AccessDenied", "Message": "Access Denied"}}, + "delete", + ) + + test_echo = EchoInterface() + + with pytest.raises(EchoS3Error, match="when deleting file at"): + test_echo.delete_directory("test/image/19900202143000/") From aa4dc1aeed6f32fab9bd316d8a34ddc5d44c5164 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 29 Jul 2024 14:49:38 +0000 Subject: [PATCH 2/8] DSEGOG-321 Add functionality to `DELETE /records/{_id}` to remove associated images and waveforms --- operationsgateway_api/src/routes/records.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/operationsgateway_api/src/routes/records.py b/operationsgateway_api/src/routes/records.py index 38a2773f..fc62572c 100644 --- a/operationsgateway_api/src/routes/records.py +++ b/operationsgateway_api/src/routes/records.py @@ -12,8 +12,10 @@ ) from operationsgateway_api.src.error_handling import endpoint_error_handling from operationsgateway_api.src.exceptions import QueryParameterError +from operationsgateway_api.src.records.echo_interface import EchoInterface from operationsgateway_api.src.records.image import Image from operationsgateway_api.src.records.record import Record as Record +from operationsgateway_api.src.records.waveform import Waveform from operationsgateway_api.src.routes.common_parameters import ParameterHandler @@ -248,11 +250,13 @@ async def delete_record_by_id( ], access_token: Annotated[str, Depends(authorise_route)], ): - # TODO 2 - full implementation will require searching through waveform channels to - # remove the documents in the waveforms collection. The images will need to be - # removed from disk too log.info("Deleting record by ID: %s", id_) await Record.delete_record(id_) + echo = EchoInterface() + log.info("Deleting waveforms for record ID '%s'", id_) + echo.delete_directory(f"{Waveform.echo_prefix}/{id_}/") + log.info("Deleting images for record ID '%s'", id_) + echo.delete_directory(f"{Image.echo_prefix}/{id_}/") return Response(status_code=HTTPStatus.NO_CONTENT.value) From e7ca92f64b7cf56c44f7f990ee964b2524be4a3d Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 29 Jul 2024 14:51:15 +0000 Subject: [PATCH 3/8] DSEGOG-321 Add to delete record test to test for image and waveform deletion - I've moved the setup & teardown of the data into a fixture and added an image and waveform channel --- test/endpoints/conftest.py | 59 ++++++++++++++++++++++++++++ test/endpoints/test_delete_record.py | 39 +++++++++--------- 2 files changed, 79 insertions(+), 19 deletions(-) diff --git a/test/endpoints/conftest.py b/test/endpoints/conftest.py index 1393aac2..d2421e9e 100644 --- a/test/endpoints/conftest.py +++ b/test/endpoints/conftest.py @@ -4,8 +4,12 @@ import pymongo import pytest_asyncio +from operationsgateway_api.src.models import WaveformModel from operationsgateway_api.src.mongo.interface import MongoDBInterface from operationsgateway_api.src.records.echo_interface import EchoInterface +from operationsgateway_api.src.records.image import Image +from operationsgateway_api.src.records.record import Record +from operationsgateway_api.src.records.waveform import Waveform async def add_user(auth_type): @@ -116,3 +120,58 @@ async def remove_experiment_fixture(): "start_date": datetime(1920, 4, 30, 10, 0), }, ) + + +@pytest_asyncio.fixture(scope="function") +async def data_for_delete_records(): + record_id = "19000000000011" + test_record = { + "_id": record_id, + "metadata": { + "epac_ops_data_version": "1.0", + "shotnum": 423648000000, + "timestamp": "2023-06-05T08:00:00", + }, + "channels": { + "test-scalar-channel-id": { + "metadata": {"channel_dtype": "scalar", "units": "µm"}, + "data": 5.126920467610521, + }, + "test-image-channel-id": { + "metadata": {"channel_dtype": "image"}, + "image_path": f"{record_id}/test-image-channel-id.png", + "thumbnail": "i5~9=", + }, + "test-waveform-channel-id": { + "metadata": {"channel_dtype": "waveform"}, + "waveform_path": f"{record_id}/test-waveform-channel-id.json", + "thumbnail": "i5~9=", + }, + }, + } + + record_instance = Record(test_record) + await record_instance.insert() + + echo = EchoInterface() + with open("test/records/original_image.png", "rb") as f: + echo.upload_file_object( + f, + f"{Image.echo_prefix}/{record_id}/test-image-channel-id.png", + ) + waveform = Waveform(WaveformModel(x=[1.0, 2.0, 3.0], y=[1.0, 2.0, 3.0])) + waveform_bytes = waveform.to_json() + echo.upload_file_object( + waveform_bytes, + f"{Waveform.echo_prefix}/{record_id}/test-waveform-channel-id.json", + ) + + yield + + await Record.delete_record(record_id) + echo.delete_file_object( + f"{Image.echo_prefix}/{record_id}/test-image-channel-id.png", + ) + echo.delete_file_object( + f"{Waveform.echo_prefix}/{record_id}/test-waveform-channel-id.json", + ) diff --git a/test/endpoints/test_delete_record.py b/test/endpoints/test_delete_record.py index e4e17703..1da819bf 100644 --- a/test/endpoints/test_delete_record.py +++ b/test/endpoints/test_delete_record.py @@ -1,7 +1,11 @@ from fastapi.testclient import TestClient import pytest +from operationsgateway_api.src.exceptions import MissingDocumentError +from operationsgateway_api.src.records.echo_interface import EchoInterface +from operationsgateway_api.src.records.image import Image from operationsgateway_api.src.records.record import Record +from operationsgateway_api.src.records.waveform import Waveform class TestDeleteRecordById: @@ -10,30 +14,27 @@ async def test_delete_record_success( self, test_app: TestClient, login_and_get_token, + data_for_delete_records, ): record_id = 19000000000011 - test_record = { - "_id": "19000000000011", - "metadata": { - "epac_ops_data_version": "1.0", - "shotnum": 423648000000, - "timestamp": "2023-06-05T08:00:00", - }, - "channels": { - "test-scalar-channel-id": { - "metadata": {"channel_dtype": "scalar", "units": "µm"}, - "data": 5.126920467610521, - }, - }, - } - - record_instance = Record(test_record) - await record_instance.insert() - delete_response = test_app.delete( f"/records/{record_id}", headers={"Authorization": f"Bearer {login_and_get_token}"}, ) assert delete_response.status_code == 204 - assert (await record_instance.find_existing_record()) is None + # Checks the record has been deleted from the database + with pytest.raises(MissingDocumentError): + await Record.find_record_by_id(record_id, {}) + + # Check that waveform and image have been removed from Echo + echo = EchoInterface() + waveform_query = echo.bucket.objects.filter( + Prefix=f"{Waveform.echo_prefix}/{record_id}/", + ) + assert list(waveform_query) == [] + + image_query = echo.bucket.objects.filter( + Prefix=f"{Image.echo_prefix}/{record_id}/", + ) + assert list(image_query) == [] From ac9289077946ab3f695d7f531c24e32078a98f82 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 29 Jul 2024 14:52:18 +0000 Subject: [PATCH 4/8] DSEGOG-321 Improve type hint on `Record.__init__` - `record` can be a `RecordModel` or a dictionary (whose contents will be put into a `RecordModel` in the init) --- operationsgateway_api/src/records/record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operationsgateway_api/src/records/record.py b/operationsgateway_api/src/records/record.py index 6d23e403..24c477dc 100644 --- a/operationsgateway_api/src/records/record.py +++ b/operationsgateway_api/src/records/record.py @@ -29,7 +29,7 @@ class Record: - def __init__(self, record: RecordModel) -> None: + def __init__(self, record: Union[RecordModel, dict]) -> None: """ Store a record within the object. If the input is a dictionary, it will be converted into a `RecordModel` object From 95321bacc835cbf73e6b76b95fc030f662d91cd2 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 29 Jul 2024 14:53:46 +0000 Subject: [PATCH 5/8] DSEGOG-321 Improve fixture for `TestSubmitHDF` so it removes waveforms and images on teardown - I noticed a bug where after running the tests in `test_submit_hdf.py`, images and waveforms for `20200407142816` wouldn't be deleted from Echo - The fixture was trying to remove waveforms from the database so it was obvious that wasn't going to work - Deletion of images wasn't working because `images/` wasn't being prefixed to the path, meaning the files couldn't be found and therefore deleted - Renamed fixture from `reset_databases()` to `reset_record_storage()` - this seems to be a more appropriate name for what the function is doing --- test/endpoints/conftest.py | 25 +++++++------------------ test/endpoints/test_submit_hdf.py | 12 ++++++------ 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/test/endpoints/conftest.py b/test/endpoints/conftest.py index d2421e9e..c630d18b 100644 --- a/test/endpoints/conftest.py +++ b/test/endpoints/conftest.py @@ -73,27 +73,16 @@ async def remove_record(timestamp_id): ) -async def remove_image(images): +@pytest_asyncio.fixture(scope="function") +async def reset_record_storage(): + yield + record_id = "20200407142816" + await remove_record(record_id) echo = EchoInterface() - for image_path in images: - echo.delete_file_object(image_path) - -async def remove_waveform(): - await MongoDBInterface.delete_one( - "waveforms", - filter_={"_id": "20200407142816_PM-201-HJ-PD"}, - ) + echo.delete_directory(f"{Waveform.echo_prefix}/{record_id}/") + echo.delete_directory(f"{Image.echo_prefix}/{record_id}/") - -@pytest_asyncio.fixture(scope="function") -async def reset_databases(): - yield - await remove_record("20200407142816") - await remove_image( - ["20200407142816/PM-201-FE-CAM-1.png", "20200407142816/PM-201-FE-CAM-2.png"], - ) - await remove_waveform() if os.path.exists("test.h5"): os.remove("test.h5") diff --git a/test/endpoints/test_submit_hdf.py b/test/endpoints/test_submit_hdf.py index 390db557..e5d990a5 100644 --- a/test/endpoints/test_submit_hdf.py +++ b/test/endpoints/test_submit_hdf.py @@ -76,7 +76,7 @@ class TestSubmitHDF: @pytest.mark.asyncio async def test_ingest_data_success( self, - reset_databases, + reset_record_storage, test_app: TestClient, login_and_get_token, ): @@ -121,7 +121,7 @@ async def test_ingest_data_success( @pytest.mark.asyncio async def test_merge_record_success( self, - reset_databases, + reset_record_storage, test_app: TestClient, login_and_get_token, ): @@ -194,7 +194,7 @@ async def test_hdf_rejects( data_version, active_area, expected_response, - reset_databases, + reset_record_storage, test_app: TestClient, login_and_get_token, ): @@ -218,7 +218,7 @@ async def test_hdf_rejects( @pytest.mark.asyncio async def test_partial_import_record_reject( self, - reset_databases, + reset_record_storage, test_app: TestClient, login_and_get_token, ): @@ -249,7 +249,7 @@ async def test_partial_import_record_reject( @pytest.mark.asyncio async def test_channel_all_fail( self, - reset_databases, + reset_record_storage, test_app: TestClient, login_and_get_token, ): @@ -289,7 +289,7 @@ async def test_channel_all_fail( @pytest.mark.asyncio async def test_channel_multiple_reject_types( self, - reset_databases, + reset_record_storage, test_app: TestClient, login_and_get_token, ): From d666f05d6b138ee61697026828baf7562401b086 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Wed, 14 Aug 2024 13:40:53 +0000 Subject: [PATCH 6/8] DSEGOG-321 Fix linting errors --- .../src/records/echo_interface.py | 1 - test/endpoints/conftest.py | 42 +++++++++---------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/operationsgateway_api/src/records/echo_interface.py b/operationsgateway_api/src/records/echo_interface.py index bdd75562..82c8414d 100644 --- a/operationsgateway_api/src/records/echo_interface.py +++ b/operationsgateway_api/src/records/echo_interface.py @@ -138,7 +138,6 @@ def delete_file_object(self, object_path: str) -> None: f"Deletion of {object_path} was unsuccessful", ) from exc - def delete_directory(self, dir_path: str) -> None: """ Given a path, delete an entire 'directory' from Echo. This is used to delete diff --git a/test/endpoints/conftest.py b/test/endpoints/conftest.py index c630d18b..c5936ab3 100644 --- a/test/endpoints/conftest.py +++ b/test/endpoints/conftest.py @@ -115,29 +115,29 @@ async def remove_experiment_fixture(): async def data_for_delete_records(): record_id = "19000000000011" test_record = { - "_id": record_id, - "metadata": { - "epac_ops_data_version": "1.0", - "shotnum": 423648000000, - "timestamp": "2023-06-05T08:00:00", + "_id": record_id, + "metadata": { + "epac_ops_data_version": "1.0", + "shotnum": 423648000000, + "timestamp": "2023-06-05T08:00:00", + }, + "channels": { + "test-scalar-channel-id": { + "metadata": {"channel_dtype": "scalar", "units": "µm"}, + "data": 5.126920467610521, + }, + "test-image-channel-id": { + "metadata": {"channel_dtype": "image"}, + "image_path": f"{record_id}/test-image-channel-id.png", + "thumbnail": "i5~9=", }, - "channels": { - "test-scalar-channel-id": { - "metadata": {"channel_dtype": "scalar", "units": "µm"}, - "data": 5.126920467610521, - }, - "test-image-channel-id": { - "metadata": {"channel_dtype": "image"}, - "image_path": f"{record_id}/test-image-channel-id.png", - "thumbnail": "i5~9=", - }, - "test-waveform-channel-id": { - "metadata": {"channel_dtype": "waveform"}, - "waveform_path": f"{record_id}/test-waveform-channel-id.json", - "thumbnail": "i5~9=", - }, + "test-waveform-channel-id": { + "metadata": {"channel_dtype": "waveform"}, + "waveform_path": f"{record_id}/test-waveform-channel-id.json", + "thumbnail": "i5~9=", }, - } + }, + } record_instance = Record(test_record) await record_instance.insert() From aa7522cf3ac3ca48cd17a153ae27d22c72a39f30 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Wed, 14 Aug 2024 13:42:22 +0000 Subject: [PATCH 7/8] DSEGOG-321 Fix path of test image --- test/endpoints/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/endpoints/conftest.py b/test/endpoints/conftest.py index c5936ab3..c48dac75 100644 --- a/test/endpoints/conftest.py +++ b/test/endpoints/conftest.py @@ -143,7 +143,7 @@ async def data_for_delete_records(): await record_instance.insert() echo = EchoInterface() - with open("test/records/original_image.png", "rb") as f: + with open("test/images/original_image.png", "rb") as f: echo.upload_file_object( f, f"{Image.echo_prefix}/{record_id}/test-image-channel-id.png", From d94fa1ed67613fe5f06c3943ac6e67e771cc6810 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Thu, 15 Aug 2024 11:05:54 +0000 Subject: [PATCH 8/8] DSEGOG-321 Mock Echo storage check to fix test failures - I've added some comments in the test and the docstring of the source code to indicate the check and why it's being mocked in the test --- .../records/ingestion/partial_import_checks.py | 9 ++++++--- test/records/ingestion/test_partial_import.py | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/operationsgateway_api/src/records/ingestion/partial_import_checks.py b/operationsgateway_api/src/records/ingestion/partial_import_checks.py index f10e55ff..1f4f25c9 100644 --- a/operationsgateway_api/src/records/ingestion/partial_import_checks.py +++ b/operationsgateway_api/src/records/ingestion/partial_import_checks.py @@ -89,10 +89,13 @@ def metadata_checks(self): def channel_checks(self): """ - Checks if any of the incoming channels exist in the stored record + Checks if any of the incoming channels exist in the stored record. If the + channels exist, they're rejected, otherwise they become an accepted channel. - if they do they are rejected and a channel response similar to the main channel - checks is returned + For image and waveform channels, a check is conducted to determine whether the + associated image/waveform is stored in Echo; there could be a situation where + there's an image channel in the record, but the image isn't stored in Echo + (perhaps due to a failure in ingestion or someone's manually deleted it) """ ingested_channels = self.ingested_record.channels stored_channels = self.stored_record.channels diff --git a/test/records/ingestion/test_partial_import.py b/test/records/ingestion/test_partial_import.py index bf706c97..78e95082 100644 --- a/test/records/ingestion/test_partial_import.py +++ b/test/records/ingestion/test_partial_import.py @@ -1,4 +1,5 @@ import copy +from unittest.mock import patch import pytest @@ -219,4 +220,16 @@ async def test_import_channel_checks(self, remove_hdf_file, test_type, response) stored_record, ) - assert partial_import_checker.channel_checks() == response + # This test doesn't use any data stored in the database/Echo, it provides + # instances of RecordModel as inputs to PartialImportChecks. For image and + # waveform channels, a check is conducted to make sure the associated + # image/waveform is actually on Echo and because we're not using stored data for + # this test, we need to mock that check + with patch.object( + partial_import_checker, + "_is_image_or_waveform_stored", + ) as mock_is_stored: + mock_is_stored.return_value = True + partial_import_channel_checks = partial_import_checker.channel_checks() + + assert partial_import_channel_checks == response