From 834e9cedb3a0882354eeb6b8f3783e5fe81f21f8 Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Mon, 28 Mar 2022 11:38:05 +0200 Subject: [PATCH 1/7] Added function to delete dataset by pid --- pyscicat/client.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pyscicat/client.py b/pyscicat/client.py index 3056850..8316a09 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -538,6 +538,33 @@ def get_proposal(self, pid: str = None) -> dict: return response.json() + def delete_dataset(self, pid: str = None) -> dict: + """ + Delete dataset by pid + + Parameters + ---------- + pid : str + The pid of the dataset to be deleted + + Returns + ------- + dict + response from SciCat backend + """ + + encoded_pid = urllib.parse.quote_plus(pid) + endpoint = "/Datasets/{}".format(encoded_pid) + url = self._base_url + endpoint + response = self._send_to_scicat(url, cmd='delete') + if not response.ok: + err = response.json()["error"] + logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}') + return None + return response.json() + + + def get_file_size(pathobj): filesize = pathobj.lstat().st_size return filesize From 832b476f9921304e7054dc1fb8d70165d7545536 Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Tue, 19 Apr 2022 15:48:54 +0200 Subject: [PATCH 2/7] added functions to load dataset by id and related orig_datablocks --- pyscicat/client.py | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 8316a09..c3b30a2 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -437,6 +437,24 @@ def get_datasets(self, filter_fields=None) -> List[Dataset]: return None return response.json() + def get_dataset_by_pid(self, pid=None) -> Dataset: + """Gets dataset with the pid provided. + + Parameters + ---------- + pid : string + pid of the dataset requested. + """ + + encode_pid = urllib.parse.quote_plus(pid) + url = f"{self._base_url}/Datasets/{encode_pid}" + response = self._send_to_scicat(url, cmd="get") + if not response.ok: + err = response.json()["error"] + logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}') + return None + return response.json() + # this method is future, needs testing. # def update_dataset(self, pid, fields: Dict): # response = self._send_to_scicat( @@ -537,6 +555,29 @@ def get_proposal(self, pid: str = None) -> dict: return None return response.json() + def get_dataset_origdatablocks(self, pid: str = None) -> dict: + """ + Get dataset orig datablocks by dataset pid. + + Parameters + ---------- + pid : str + The pid of the dataset + + Returns + ------- + dict + The orig_datablocks of the dataset with the requested pid + """ + + encoded_pid = urllib.parse.quote_plus(pid) + url = f"{self._base_url}/Datasets/{encoded_pid}/origdatablocks" + response = self._send_to_scicat(url, cmd="get") + if not response.ok: + err = response.json()["error"] + logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}') + return None + return response.json() def delete_dataset(self, pid: str = None) -> dict: """ @@ -556,7 +597,7 @@ def delete_dataset(self, pid: str = None) -> dict: encoded_pid = urllib.parse.quote_plus(pid) endpoint = "/Datasets/{}".format(encoded_pid) url = self._base_url + endpoint - response = self._send_to_scicat(url, cmd='delete') + response = self._send_to_scicat(url, cmd="delete") if not response.ok: err = response.json()["error"] logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}') @@ -564,7 +605,6 @@ def delete_dataset(self, pid: str = None) -> dict: return response.json() - def get_file_size(pathobj): filesize = pathobj.lstat().st_size return filesize From e0c27ecfc21e8b2bcc9bc88d4a63410ae7cf9c5c Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Tue, 26 Apr 2022 16:42:33 +0200 Subject: [PATCH 3/7] Added model and method for PublishedData --- pyscicat/client.py | 30 ++++++++++++++++++++++++++++++ pyscicat/model.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/pyscicat/client.py b/pyscicat/client.py index c3b30a2..1b8b923 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -17,6 +17,7 @@ OrigDatablock, RawDataset, DerivedDataset, + PublishedData, ) logger = logging.getLogger("splash_ingest") @@ -437,6 +438,35 @@ def get_datasets(self, filter_fields=None) -> List[Dataset]: return None return response.json() + def get_published_data(self, filter=None) -> List[PublishedData]: + """Gets published data using the simple fiter mechanism. This + is appropriate when you do not require paging or text search, but + want to be able to limit results based on items in the Dataset object. + + For example, a search for published data of a given doi would have + ```python + filter = {"doi": "1234"} + ``` + + Parameters + ---------- + filter : dict + Dictionary of filtering fields. Must be json serializable. + """ + if not filter: + filter = None + else: + filter = json.dumps(filter) + + url = f'{self._base_url}/PublishedData' + f'?filter={{"where":{filter}}}' if filter else '' + response = self._send_to_scicat(url, cmd="get") + if not response.ok: + err = response.json()["error"] + logger.error(f'{err["name"]}, {err["statusCode"]}: {err["message"]}') + return None + return response.json() + + def get_dataset_by_pid(self, pid=None) -> Dataset: """Gets dataset with the pid provided. diff --git a/pyscicat/model.py b/pyscicat/model.py index 6de34c4..d354f36 100644 --- a/pyscicat/model.py +++ b/pyscicat/model.py @@ -219,3 +219,31 @@ class Attachment(Ownable): thumbnail: str caption: Optional[str] datasetId: str + + +class PublishedData(): + """ + Published Data with registered DOI + """ + + doi: str + affiliation: str + creator: List[str] + publisher: str + publicationYear: int + title: str + url: Optional[str] + abstract: str + dataDescription: str + resourceType: str + numberOfFiles: Optional[int] + sizeOfArchive: Optional[int] + pidArray: List[str] + authors: List[str] + registeredTime: str + status: str + thumbnail: Optional[str] + createdBy: str + updatedBy: str + createdAt: str + updatedAt: str From 85877f36ee95cb36532b97f043b1c08c3c4e9a52 Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Tue, 26 Apr 2022 16:48:47 +0200 Subject: [PATCH 4/7] Fixing bugs in published data functions --- pyscicat/client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 1b8b923..89397f1 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -458,7 +458,11 @@ def get_published_data(self, filter=None) -> List[PublishedData]: else: filter = json.dumps(filter) - url = f'{self._base_url}/PublishedData' + f'?filter={{"where":{filter}}}' if filter else '' + url = f'{self._base_url}/PublishedData' + ( + f'?filter={{"where":{filter}}}' + if filter + else '' + ) response = self._send_to_scicat(url, cmd="get") if not response.ok: err = response.json()["error"] From cb0fa2ca47afe11aca2716b6ff50304307d14dbe Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Fri, 3 Jun 2022 10:30:16 +0200 Subject: [PATCH 5/7] wip right before finalizing th ecode for the PR --- ...imulation_dataset_ess_derived_dataset.json | 257 ++++++++++++++++++ ...n_simulation_dataset_ess_raw_dataset.json} | 2 +- pyscicat/client.py | 223 +++++++++++++-- pyscicat/model.py | 2 +- pyscicat/tests/test_client.py | 2 +- pyscicat/tests/test_new_dataset.py | 116 ++++++-- 6 files changed, 562 insertions(+), 40 deletions(-) create mode 100644 examples/data/ingestion_simulation_dataset_ess_derived_dataset.json rename examples/data/{ingestion_simulation_dataset_ess_dataset.json => ingestion_simulation_dataset_ess_raw_dataset.json} (99%) diff --git a/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json b/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json new file mode 100644 index 0000000..1b0bcea --- /dev/null +++ b/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json @@ -0,0 +1,257 @@ +{ + "id": "9be3bd96-e256-11ec-bd08-f32122965a87", + "dataset": { + "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE derived", + "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", + "investigator": "Max Novelli", + "inputDatasets" : ["0275d813-be6b-444f-812f-b8311d129361"], + "usedSoftware" : ["python","My software"], + "jobParameters" : { + "parameter-1" : "value-1", + "parameter-2" : "value-2" + }, + "jobLogData" : "Some jebrish about the dataset", + "owner": "Massimiliano Novelli", + "ownerEmail": "max.novelli@ess.eu", + "contactEmail": "max.novelli@ess.eu", + "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31", + "creationTime": "2022-03-07T15:44:59.000Z", + "type": "derived", + "scientificMetadata": { + "sample_width": { "value": 0.015, "unit": "m" }, + "sample_height": { "value": 0.015, "unit": "m" }, + "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" }, + "divergence_requirement_vertical": { "value": 1, "unit": "deg" }, + "guide_sample_distance": { "value": 0.6, "unit": "m" }, + "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" }, + "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" }, + "moderator_width": { "value": 0.12, "unit": "m" }, + "moderator_height": { "value": 0.03, "unit": "m" }, + "moderator_sample_distance": { "value": 170, "unit": "m" }, + "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" }, + "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" }, + "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" }, + "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" }, + "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" }, + "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" }, + "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" }, + "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" }, + "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" }, + "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" }, + "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" }, + "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" }, + "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" }, + "parsing_min_length1": { "value": 28.915197821153896, "unit": "" }, + "parsing_max_length1": { "value": 95.07944574028325, "unit": "" }, + "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" }, + "parsing_std_length1": { "value": 10.210341803833671, "unit": "" }, + "optimization_name": { "value": "PGESKSE", "unit": "" }, + "configuration_summary": { "value": "PGESKSE", "unit": "" }, + "best_figure_of_merit": { "value": "0.25293", "unit": "" }, + "brilliance_transfer": { "value": "0.47344", "unit": "" }, + "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" }, + "number_of_parameters": { "value": 2, "unit": "" }, + "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" }, + "event_writen_present": { "value": true, "unit": "" }, + "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" }, + "event_done_present": { "value": true, "unit": "" }, + "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" }, + "event_analysis_present": { "value": true, "unit": "" }, + "event_analysis_file": { "value": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" }, + "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" }, + "run_name": { "value": "CAMEA CAMEA31", "unit": "" }, + "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" }, + "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" }, + "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" }, + "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" }, + "Hsize": { "value": 4, "unit": "cm" }, + "moderator_size_y": { "value": 3, "unit": "m" } + }, + "techniques": [ + { + "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35", + "name": "Simulation" + } + ], + "size": 68386784, + "instrumentId": "" + }, + "orig_datablock": { + "size": 68386784, + "dataFileList": [ + { + "path": "launch_all.sh", + "size": 10171, + "time": "2014-01-23T19:52:37.000Z" + }, + { + "path": "suggested_reruns-fails.sh", + "size": 448, + "time": "2014-01-23T19:53:04.000Z" + }, + { + "path": "compile_all_py.sh", + "size": 273, + "time": "2014-01-23T19:52:37.000Z" + }, + { + "path": "clean3.sh", + "size": 354, + "time": "2014-01-25T10:44:54.000Z" + }, + { + "path": "master_record-done_4Hsize_3moderator_size_y.txt", + "size": 579, + "time": "2014-01-25T00:35:55.000Z" + }, + { + "path": "master_record-writen_4Hsize_3moderator_size_y.txt", + "size": 561, + "time": "2014-01-23T19:52:38.000Z" + }, + { + "path": "compile_all.sh", + "size": 259, + "time": "2014-01-23T19:52:37.000Z" + }, + { + "path": "output/brill_ref/brilliance_ref_4Hsize_3moderator_size_y.mat", + "size": 11624010, + "time": "2014-01-24T07:56:45.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_acceptance_ess.png", + "size": 521132, + "time": "2014-01-27T11:38:06.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_acceptance_pure.png", + "size": 518423, + "time": "2014-01-27T11:37:52.000Z" + }, + { + "path": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt", + "size": 587, + "time": "2014-01-28T14:03:02.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_overall_pure.png", + "size": 144605, + "time": "2014-01-27T11:37:49.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_posdiv_ess.png", + "size": 336496, + "time": "2014-01-27T11:38:04.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y_all.mat", + "size": 34321077, + "time": "2014-01-25T00:35:55.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_overall_ess.png", + "size": 127660, + "time": "2014-01-27T11:38:02.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_geometry.dat", + "size": 2175, + "time": "2014-01-25T00:23:10.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y_ifit_analyse.m", + "size": 19482, + "time": "2014-01-23T19:52:40.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_geometry.png", + "size": 76259, + "time": "2014-01-27T11:38:09.000Z" + }, + { + "path": "output/analysis/PGESKSE_4Hsize_3moderator_size_y1_posdiv_pure.png", + "size": 353828, + "time": "2014-01-27T11:37:50.000Z" + }, + { + "path": "brilliance_refference/brilliance_ifit_4Hsize_3moderator_size_y.m", + "size": 3048, + "time": "2014-01-23T19:52:33.000Z" + }, + { + "path": "brilliance_refference/brilliance_4Hsize_3moderator_size_y1.mat", + "size": 11626979, + "time": "2014-01-24T07:56:42.000Z" + }, + { + "path": "brilliance_refference/brilliance_4Hsize_3moderator_size_y.batch", + "size": 671, + "time": "2014-01-23T19:52:32.000Z" + }, + { + "path": "brilliance_refference/input_used_4Hsize_3moderator_size_y.txt", + "size": 358, + "time": "2014-01-23T19:52:35.000Z" + }, + { + "path": "brilliance_refference/run_brilliance_ifit_4Hsize_3moderator_size_y.m", + "size": 53, + "time": "2014-01-23T19:52:36.000Z" + }, + { + "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y.batch", + "size": 734, + "time": "2014-01-23T19:52:48.000Z" + }, + { + "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y_ifit.m", + "size": 11101, + "time": "2014-01-23T19:52:48.000Z" + }, + { + "path": "PGESKSE/err_PGESKSE_4Hsize_3moderator_size_y.txt", + "size": 0, + "time": "2014-01-24T21:13:29.000Z" + }, + { + "path": "PGESKSE/run_PGESKSE_4Hsize_3moderator_size_y_ifit.m", + "size": 50, + "time": "2014-01-23T19:52:51.000Z" + }, + { + "path": "PGESKSE/out_PGESKSE_4Hsize_3moderator_size_y.txt", + "size": 8681220, + "time": "2014-01-25T00:35:58.000Z" + }, + { + "path": "PGESKSE/compile_PGESKSE_py.sh", + "size": 558, + "time": "2014-01-23T19:52:45.000Z" + }, + { + "path": "PGESKSE/compile_PGESKSE.sh", + "size": 540, + "time": "2014-01-23T19:52:45.000Z" + }, + { + "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y1.par", + "size": 918, + "time": "2014-01-25T00:35:55.000Z" + }, + { + "path": "PGESKSE/PGESKSE_4Hsize_3moderator_size_y1_geometry.dat", + "size": 2175, + "time": "2014-01-25T00:23:10.000Z" + } + ] + }, + "ownable": { + "ownerGroup": "ess", + "accessGroups": ["dmsc"] + } +} + \ No newline at end of file diff --git a/examples/data/ingestion_simulation_dataset_ess_dataset.json b/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json similarity index 99% rename from examples/data/ingestion_simulation_dataset_ess_dataset.json rename to examples/data/ingestion_simulation_dataset_ess_raw_dataset.json index ebd88bd..ae2977e 100644 --- a/examples/data/ingestion_simulation_dataset_ess_dataset.json +++ b/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json @@ -1,7 +1,7 @@ { "id": "0275d813-be6b-444f-812f-b8311d129361", "dataset": { - "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", + "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE raw", "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "creationLocation": "DMSC", "principalInvestigator": "Max Novelli", diff --git a/pyscicat/client.py b/pyscicat/client.py index 89397f1..7f96f2f 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -152,8 +152,12 @@ def _send_to_scicat(self, url, dataDict=None, cmd="post"): # err = resp.json()["error"] # raise ScicatCommError(f"Error creating Sample {err}") + def upload_dataset(self, dataset: Dataset) -> str: - """Upload a raw or derived dataset (method is autosensing) + """ + Upload a raw or derived dataset (method is autosensing) + This function has been renamed as upsert. + WE are keeping this implementation for backward compatibility Parameters ---------- @@ -170,6 +174,25 @@ def upload_dataset(self, dataset: Dataset) -> str: ScicatCommError Raises if a non-20x message is returned """ + return self.upsert_dataset(dataset) + + + def upsert_dataset(self, dataset: Dataset) -> str: + """ + Create a new dataset or update an existing one + + + Parameters + ---------- + dataset : Dataset + Dataset to create or update + + Returns + ------- + str + pid of the dataset + """ + if isinstance(dataset, RawDataset): dataset_url = self._base_url + "RawDataSets/replaceOrCreate" elif isinstance(dataset, DerivedDataset): @@ -186,10 +209,39 @@ def upload_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + + def upload_new_dataset(self, dataset: Dataset) -> str: """ Upload a new dataset. Uses the generic dataset endpoint. - Relys on the endpoint to sense wthe dataset type + Relys on the endpoint to sense the dataset type + This function has been renamed. + We are keeping this implementation for backward compatibility + + Parameters + ---------- + dataset : Dataset + Dataset to create + + Returns + ------- + dataset : Dataset + Dataset created including the pid (or unique identifier) of the newly created dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + return self.create_dataset(dataset) + + + def create_dataset(self, dataset: Dataset) -> str: + """ + Upload a new dataset. Uses the generic dataset endpoint. + Relys on the endpoint to sense the dataset type + This function has been renamed. + We are keeping this implementation for backward compatibility Parameters ---------- @@ -218,8 +270,34 @@ def upload_new_dataset(self, dataset: Dataset) -> str: return resp.json() + def upload_raw_dataset(self, dataset: Dataset) -> str: - """Upload a raw dataset + """ + Upload a raw dataset + This function has been renamed. + We are keeping this implementation for backward compatibility + + Parameters + ---------- + dataset : Dataset + Dataset to load + + Returns + ------- + str + pid (or unique identifier) of the newly created dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + return self.upsert_raw_dataset(dataset) + + + def upsert_raw_dataset(self, dataset: Dataset) -> str: + """ + Create a new raw dataset or update an existing one Parameters ---------- @@ -245,8 +323,34 @@ def upload_raw_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + def upload_derived_dataset(self, dataset: Dataset) -> str: - """Upload a derived dataset + """ + Upload a derived dataset + This function has been renamed. + We are keeping this implementation for backward compatibility + + Parameters + ---------- + dataset : Dataset + Dataset to upload + + Returns + ------- + str + pid (or unique identifier) of the newly created dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + return self.upsert_derived_dataset(dataset) + + + def upsert_derived_dataset(self, dataset: Dataset) -> str: + """ + Create a new derived dataset or update an existing one Parameters ---------- @@ -274,8 +378,12 @@ def upload_derived_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): - """Upload a Datablock + """ + Upload a Datablock + This function has been renamed + We are keeping this implementation for backward compatibility Parameters ---------- @@ -292,7 +400,29 @@ def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets ScicatCommError Raises if a non-20x message is returned """ + return self.create_dataset_datablock(datablock,datasetType) + + + def create_dataset_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): + """ + create a new datablock for a dataset. + The dataset can be both Raw or Derived + + Parameters + ---------- + datablock : Datablock + Datablock to upload + + Returns + ------- + datablock : Datablock + The created Datablock with id + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ url = ( self._base_url + f"{datasetType}/{urllib.parse.quote_plus(datablock.datasetId)}/origdatablocks" @@ -304,10 +434,11 @@ def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets return resp.json() - def upload_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: - """ - Post SciCat Dataset OrigDatablock + #def upload_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: + """ + Create a new SciCat Dataset OrigDatablock + Parameters ---------- origdatablock : @@ -324,7 +455,32 @@ def upload_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: Raises if a non-20x message is returned """ + return self.create_dataset_origdatabloack(origdatablock) + + + def create_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: + """ + Create a new SciCat Dataset OrigDatablock + This function has been renamed. + It is still accessible with the original name for backward compatibility + The original name is upload_dataset_origdatablock + + Parameters + ---------- + origdatablock : + The OrigDatablock to create + Returns + ------- + dict + The created OrigDatablock with id + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + + """ encoded_pid = urllib.parse.quote_plus(origdatablock.datasetId) endpoint = "Datasets/" + encoded_pid + "/origdatablocks" url = self._base_url + endpoint @@ -336,10 +492,47 @@ def upload_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: return resp.json() + """ + Create a new SciCat Dataset OrigDatablock + Original name, kept for for backward compatibility + """ + upload_dataset_origdatablock = create_dataset_origdatablock + + def upload_attachment( - self, attachment: Attachment, datasetType: str = "RawDatasets" + self, + attachment: Attachment, + datasetType: str = "RawDatasets" ): - """Upload an Attachment. Note that datasetType can be provided to determine the type of dataset + """ + Upload an Attachment. + Note that datasetType can be provided to determine the type of dataset + that this attachment is attached to. This is required for creating the url that SciCat uses. + THis function has been renamed. + WE are kleeping this implementation for backward compatibility + + Parameters + ---------- + attachment : Attachment + Attachment to upload + + datasetType : str + Type of dataset to upload to, default is `RawDatasets` + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + return self.create_dataset_attachment(attachment,datasetType) + + def create_dataset_attachment( + self, + attachment: Attachment, + datasetType: str = "RawDatasets" + ): + """ + Create a new Attachment for a dataset. + Note that datasetType can be provided to determine the type of dataset that this attachment is attached to. This is required for creating the url that SciCat uses. Parameters @@ -371,6 +564,7 @@ def upload_attachment( err = resp.json()["error"] raise ScicatCommError(f"Error uploading thumbnail. {err}") + def get_datasets_full_query(self, skip=0, limit=25, query_fields=None): """Gets datasets using the fullQuery mechanism of SciCat. This is appropriate for cases where might want paging and cases where you want to perform @@ -447,7 +641,7 @@ def get_published_data(self, filter=None) -> List[PublishedData]: ```python filter = {"doi": "1234"} ``` - + Parameters ---------- filter : dict @@ -458,10 +652,8 @@ def get_published_data(self, filter=None) -> List[PublishedData]: else: filter = json.dumps(filter) - url = f'{self._base_url}/PublishedData' + ( - f'?filter={{"where":{filter}}}' - if filter - else '' + url = f"{self._base_url}/PublishedData" + ( + f'?filter={{"where":{filter}}}' if filter else "" ) response = self._send_to_scicat(url, cmd="get") if not response.ok: @@ -470,7 +662,6 @@ def get_published_data(self, filter=None) -> List[PublishedData]: return None return response.json() - def get_dataset_by_pid(self, pid=None) -> Dataset: """Gets dataset with the pid provided. diff --git a/pyscicat/model.py b/pyscicat/model.py index d354f36..bc9451a 100644 --- a/pyscicat/model.py +++ b/pyscicat/model.py @@ -221,7 +221,7 @@ class Attachment(Ownable): datasetId: str -class PublishedData(): +class PublishedData: """ Published Data with registered DOI """ diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index 39ba2ca..8ce83fc 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -40,7 +40,7 @@ def add_mock_requests(mock_request): mock_request.post(local_url + "Datasets", json={"pid": "17"}) -def test_scicate_ingest(): +def test_scicat_ingest(): with requests_mock.Mocker() as mock_request: add_mock_requests(mock_request) scicat = from_credentials( diff --git a/pyscicat/tests/test_new_dataset.py b/pyscicat/tests/test_new_dataset.py index e530db4..dafbc6a 100644 --- a/pyscicat/tests/test_new_dataset.py +++ b/pyscicat/tests/test_new_dataset.py @@ -12,46 +12,117 @@ Ownable, ) -global test_dataset +global test_datasets local_url = "http://localhost:3000/api/v3/" -test_dataset_file = "../../examples/data/ingestion_simulation_dataset_ess_dataset.json" -test_dataset = None +test_dataset_files = { + 'raw' : "../../examples/data/ingestion_simulation_dataset_ess_raw_dataset.json", + 'derived' : "../../examples/data/ingestion_simulation_dataset_ess_derived_dataset.json" +} +test_datasets = {} def set_up_test_environment(mock_request): - global test_dataset + global test_datasets # load test data - data_file_path = Path(__file__).parent.joinpath(test_dataset_file).resolve() - with open(data_file_path, "r") as fh: - test_dataset = json.load(fh) + for name, path in test_dataset_files.items(): + data_file_path = Path(__file__).parent.joinpath(path).resolve() + with open(data_file_path, "r") as fh: + test_datasets[name] = json.load(fh) mock_request.post( local_url + "Users/login", json={"id": "a_token"}, ) +def set_up_mock_raw_dataset(mock_request): + data = test_datasets['raw'] + mock_request.post( local_url + "Datasets", - json={**{"pid": test_dataset["id"]}, **test_dataset["dataset"]}, + json={**{"pid": data["id"]}, **data["dataset"]}, ) - encoded_pid = urllib.parse.quote_plus(test_dataset["id"]) + encoded_pid = urllib.parse.quote_plus(data["id"]) mock_request.post( local_url + "Datasets/" + encoded_pid + "/origdatablocks", json={ - "size": test_dataset["orig_datablock"]["size"], - "datasetId": test_dataset["id"], - "dataFileList": test_dataset["orig_datablock"]["dataFileList"], + "size": data["orig_datablock"]["size"], + "datasetId": data["id"], + "dataFileList": data["orig_datablock"]["dataFileList"], }, ) + return data + + +def set_up_mock_derived_dataset(mock_request): + data = test_datasets['derived'] + + mock_request.post( + local_url + "Datasets", + json={**{"pid": data["id"]}, **data["dataset"]}, + ) + + encoded_pid = urllib.parse.quote_plus(data["id"]) + mock_request.post( + local_url + "Datasets/" + encoded_pid + "/origdatablocks", + json={ + "size": data["orig_datablock"]["size"], + "datasetId": data["id"], + "dataFileList": data["orig_datablock"]["dataFileList"], + }, + ) + + return data + + +def test_scicat_ingest_raw_dataset(): + with requests_mock.Mocker() as mock_request: + set_up_test_environment(mock_request) + data = set_up_mock_raw_dataset(mock_request) + scicat = ScicatClient( + base_url=local_url, + username="Zaphod", + password="heartofgold", + ) + assert ( + scicat._token == "a_token" + ), "scicat client set the token given by the server" + + ownable = Ownable(**data['ownable']) + + # Create Dataset + dataset = RawDataset( + **data["dataset"], + **ownable.dict() + ) + created_dataset = scicat.create_dataset(dataset) + + assert created_dataset["pid"] == data["id"] -def test_scicate_ingest_raw_dataset(): + # origDatablock with DataFiles + origDataBlock = OrigDatablock( + size=data["orig_datablock"]["size"], + datasetId=created_dataset["pid"], + dataFileList=[ + DataFile(**file) + for file in data["orig_datablock"]["dataFileList"] + ], + **ownable.dict() + ) + created_origdatablock = scicat.create_dataset_origdatablock(origDataBlock) + assert len(created_origdatablock["dataFileList"]) == len( + data["orig_datablock"]["dataFileList"] + ) + + +def test_scicat_ingest_derived_dataset(): with requests_mock.Mocker() as mock_request: set_up_test_environment(mock_request) + data = set_up_mock_derived_dataset(mock_request) scicat = ScicatClient( base_url=local_url, username="Zaphod", @@ -61,25 +132,28 @@ def test_scicate_ingest_raw_dataset(): scicat._token == "a_token" ), "scicat client set the token given by the server" - ownable = Ownable(ownerGroup="magrathea", accessGroups=["deep_though"]) + ownable = Ownable(**data['ownable']) # Create Dataset - dataset = RawDataset(**test_dataset["dataset"], **ownable.dict()) - created_dataset = scicat.upload_new_dataset(dataset) + dataset = RawDataset( + **data["dataset"], + **ownable.dict() + ) + created_dataset = scicat.create_dataset(dataset) - assert created_dataset["pid"] == test_dataset["id"] + assert created_dataset["pid"] == data["id"] # origDatablock with DataFiles origDataBlock = OrigDatablock( - size=test_dataset["orig_datablock"]["size"], + size=data["orig_datablock"]["size"], datasetId=created_dataset["pid"], dataFileList=[ DataFile(**file) - for file in test_dataset["orig_datablock"]["dataFileList"] + for file in data["orig_datablock"]["dataFileList"] ], **ownable.dict() ) - created_origdatablock = scicat.upload_dataset_origdatablock(origDataBlock) + created_origdatablock = scicat.create_dataset_origdatablock(origDataBlock) assert len(created_origdatablock["dataFileList"]) == len( - test_dataset["orig_datablock"]["dataFileList"] + data["orig_datablock"]["dataFileList"] ) From a9edeab88d09303f8fa8b9d3d9263715929d2693 Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Fri, 3 Jun 2022 13:47:35 +0200 Subject: [PATCH 6/7] Fixed naming convention on few functions, added test on published data --- examples/data/published_data.json | 56 ++++ pyscicat/client.py | 270 ++++++------------ .../{test_new_dataset.py => test_suite_2.py} | 37 ++- 3 files changed, 185 insertions(+), 178 deletions(-) create mode 100644 examples/data/published_data.json rename pyscicat/tests/{test_new_dataset.py => test_suite_2.py} (82%) diff --git a/examples/data/published_data.json b/examples/data/published_data.json new file mode 100644 index 0000000..ab2680d --- /dev/null +++ b/examples/data/published_data.json @@ -0,0 +1,56 @@ +[ + { + "doi": "10.17199/03dd9804-1b04-4d36-b0fb-cf66e9891e7d", + "affiliation": "ESS", + "creator": [ + "Oliver Lohmann" + ], + "publisher": "ESS", + "publicationYear": 2019, + "title": "SANS/Reflectometry", + "url": "", + "abstract": "SANS/Reflectometry", + "dataDescription": "https://github.com/ess-dmsc/ess_file_formats/wiki/NeXus", + "resourceType": "NeXus HDF5", + "numberOfFiles": null, + "sizeOfArchive": null, + "pidArray": [ + "20.500.12269/0a269002-83e2-4f18-bb98-36c01836d66a" + ], + "authors": [ + "Oliver Lohmann" + ], + "registeredTime": "2020-09-01T14:16:15.552Z", + "status": "registered", + "thumbnail": "", + "createdBy": "admin", + "updatedBy": "admin", + "createdAt": "2020-01-03T19:38:34.203Z", + "updatedAt": "2020-09-09T09:37:58.023Z" + }, + { + "doi": "10.17199/165f8a52-c15d-4c96-ad7d-fb0cbe969f66", + "creator": [ + "Peter Kadletz" + ], + "publisher": "ESS", + "publicationYear": 2020, + "title": "Final bte", + "url": "", + "abstract": "Peter Kadletz, Tobias Richter", + "dataDescription": "https://github.com/ess-dmsc/ess_file_formats/wiki/NeXus", + "resourceType": "raw", + "numberOfFiles": null, + "sizeOfArchive": null, + "pidArray": [ + "20.500.12269/2511nicos_00002511.hdf" + ], + "registeredTime": "2020-09-01T14:16:17.272Z", + "status": "registered", + "scicatUser": "ingestor", + "thumbnail": "", + "updatedBy": "admin", + "createdAt": "2022-06-03T11:16:09.681Z", + "updatedAt": "2020-09-09T09:37:58.094Z" + } +] \ No newline at end of file diff --git a/pyscicat/client.py b/pyscicat/client.py index 7f96f2f..492cde5 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -153,34 +153,13 @@ def _send_to_scicat(self, url, dataDict=None, cmd="post"): # raise ScicatCommError(f"Error creating Sample {err}") - def upload_dataset(self, dataset: Dataset) -> str: - """ - Upload a raw or derived dataset (method is autosensing) - This function has been renamed as upsert. - WE are keeping this implementation for backward compatibility - - Parameters - ---------- - dataset : Dataset - Dataset to load - - Returns - ------- - str - pid (or unique identifier) of the newly created dataset - - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - return self.upsert_dataset(dataset) - - def upsert_dataset(self, dataset: Dataset) -> str: """ Create a new dataset or update an existing one - + This function was renamed. + It is still accessible with the original name for backward compatibility + The original name was upload_dataset + Parameters ---------- @@ -209,39 +188,20 @@ def upsert_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + """ + Upload or create a new dataset + Original name, kept for for backward compatibility + """ + upload_dataset = upsert_dataset - def upload_new_dataset(self, dataset: Dataset) -> str: - """ - Upload a new dataset. Uses the generic dataset endpoint. - Relys on the endpoint to sense the dataset type - This function has been renamed. - We are keeping this implementation for backward compatibility - - Parameters - ---------- - dataset : Dataset - Dataset to create - - Returns - ------- - dataset : Dataset - Dataset created including the pid (or unique identifier) of the newly created dataset - - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - return self.create_dataset(dataset) - - def create_dataset(self, dataset: Dataset) -> str: """ Upload a new dataset. Uses the generic dataset endpoint. - Relys on the endpoint to sense the dataset type - This function has been renamed. - We are keeping this implementation for backward compatibility + Relies on the endpoint to sense the dataset type + This function was renamed. + It is still accessible with the original name for backward compatibility + The original name was upload_new_dataset Parameters ---------- @@ -270,34 +230,21 @@ def create_dataset(self, dataset: Dataset) -> str: return resp.json() - - def upload_raw_dataset(self, dataset: Dataset) -> str: - """ - Upload a raw dataset - This function has been renamed. - We are keeping this implementation for backward compatibility - - Parameters - ---------- - dataset : Dataset - Dataset to load - - Returns - ------- - str - pid (or unique identifier) of the newly created dataset - - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - return self.upsert_raw_dataset(dataset) + """ + Upload a new dataset + Original name, kept for for backward compatibility + """ + upload_new_dataset = create_dataset + + def upsert_raw_dataset(self, dataset: Dataset) -> str: """ Create a new raw dataset or update an existing one + This function was renamed. + It is still accessible with the original name for backward compatibility + The original name was upload_raw_dataset Parameters ---------- @@ -324,33 +271,20 @@ def upsert_raw_dataset(self, dataset: Dataset) -> str: return new_pid - def upload_derived_dataset(self, dataset: Dataset) -> str: - """ - Upload a derived dataset - This function has been renamed. - We are keeping this implementation for backward compatibility - - Parameters - ---------- - dataset : Dataset - Dataset to upload - - Returns - ------- - str - pid (or unique identifier) of the newly created dataset + """ + Upload a raw dataset + Original name, kept for for backward compatibility + """ + upload_raw_dataset = upsert_raw_dataset - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - return self.upsert_derived_dataset(dataset) def upsert_derived_dataset(self, dataset: Dataset) -> str: """ Create a new derived dataset or update an existing one + This function was renamed. + It is still accessible with the original name for backward compatibility + The original name was upsert_derived_dataset Parameters ---------- @@ -378,35 +312,19 @@ def upsert_derived_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid - - def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): - """ - Upload a Datablock - This function has been renamed - We are keeping this implementation for backward compatibility - - Parameters - ---------- - datablock : Datablock - Datablock to upload - - Returns - ------- - datablock : Datablock - The created Datablock with id - - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - return self.create_dataset_datablock(datablock,datasetType) + """ + Upload a derived dataset + Original name, kept for for backward compatibility + """ + upload_derived_dataset = upsert_derived_dataset def create_dataset_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): """ - create a new datablock for a dataset. - The dataset can be both Raw or Derived + Create a new datablock for a dataset. + The dataset can be both Raw or Derived. + It is still accessible with the original name for backward compatibility + The original name was upload_datablock Parameters ---------- @@ -434,36 +352,20 @@ def create_dataset_datablock(self, datablock: Datablock, datasetType: str = "Raw return resp.json() - - #def upload_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: - """ - Create a new SciCat Dataset OrigDatablock - - Parameters - ---------- - origdatablock : - The OrigDatablock to create - - Returns - ------- - dict - The created OrigDatablock with id - - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - - """ - return self.create_dataset_origdatabloack(origdatablock) + """ + Upload a Datablock + Original name, kept for for backward compatibility + """ + upload_datablock = create_dataset_datablock + def create_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: """ Create a new SciCat Dataset OrigDatablock This function has been renamed. It is still accessible with the original name for backward compatibility - The original name is upload_dataset_origdatablock + The original name was upload_dataset_origdatablock Parameters ---------- @@ -499,31 +401,6 @@ def create_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: upload_dataset_origdatablock = create_dataset_origdatablock - def upload_attachment( - self, - attachment: Attachment, - datasetType: str = "RawDatasets" - ): - """ - Upload an Attachment. - Note that datasetType can be provided to determine the type of dataset - that this attachment is attached to. This is required for creating the url that SciCat uses. - THis function has been renamed. - WE are kleeping this implementation for backward compatibility - - Parameters - ---------- - attachment : Attachment - Attachment to upload - - datasetType : str - Type of dataset to upload to, default is `RawDatasets` - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - return self.create_dataset_attachment(attachment,datasetType) def create_dataset_attachment( self, @@ -534,6 +411,9 @@ def create_dataset_attachment( Create a new Attachment for a dataset. Note that datasetType can be provided to determine the type of dataset that this attachment is attached to. This is required for creating the url that SciCat uses. + This function has been renamed. + It is still accessible with the original name for backward compatibility + The original name was upload_attachment Parameters ---------- @@ -564,9 +444,16 @@ def create_dataset_attachment( err = resp.json()["error"] raise ScicatCommError(f"Error uploading thumbnail. {err}") + """ + Create a new attachement for a dataset + Original name, kept for for backward compatibility + """ + upload_attachment = create_dataset_attachment + - def get_datasets_full_query(self, skip=0, limit=25, query_fields=None): - """Gets datasets using the fullQuery mechanism of SciCat. This is + def find_datasets_full_query(self, skip=0, limit=25, query_fields=None): + """ + Gets datasets using the fullQuery mechanism of SciCat. This is appropriate for cases where might want paging and cases where you want to perform a text search on the Datasets collection. The full features of fullQuery search are beyond this document. @@ -576,6 +463,10 @@ def get_datasets_full_query(self, skip=0, limit=25, query_fields=None): To query based on the full text search, send `{"text": " List[Dataset]: - """Gets datasets using the simple fiter mechanism. This + """ + find a set of datasets according the full query provided + Original name, kept for for backward compatibility + """ + get_datasets_full_query = find_datasets_full_query + + + + def find_datasets(self, filter_fields=None) -> List[Dataset]: + """ + Gets datasets using the simple fiter mechanism. This is appropriate when you do not require paging or text search, but want to be able to limit results based on items in the Dataset object. @@ -632,8 +532,16 @@ def get_datasets(self, filter_fields=None) -> List[Dataset]: return None return response.json() - def get_published_data(self, filter=None) -> List[PublishedData]: - """Gets published data using the simple fiter mechanism. This + """ + find a set of datasets according to the simple filter provided + Original name, kept for for backward compatibility + """ + get_datasets = find_datasets + + + def find_published_data(self, filter=None) -> List[PublishedData]: + """ + retrieve all the published data using the simple fiter mechanism. This is appropriate when you do not require paging or text search, but want to be able to limit results based on items in the Dataset object. @@ -652,9 +560,10 @@ def get_published_data(self, filter=None) -> List[PublishedData]: else: filter = json.dumps(filter) - url = f"{self._base_url}/PublishedData" + ( + url = f"{self._base_url}PublishedData" + ( f'?filter={{"where":{filter}}}' if filter else "" ) + print(url) response = self._send_to_scicat(url, cmd="get") if not response.ok: err = response.json()["error"] @@ -662,6 +571,13 @@ def get_published_data(self, filter=None) -> List[PublishedData]: return None return response.json() + """ + find a set of published data according to the simple filter provided + Original name, kept for for backward compatibility + """ + get_published_data = find_published_data + + def get_dataset_by_pid(self, pid=None) -> Dataset: """Gets dataset with the pid provided. diff --git a/pyscicat/tests/test_new_dataset.py b/pyscicat/tests/test_suite_2.py similarity index 82% rename from pyscicat/tests/test_new_dataset.py rename to pyscicat/tests/test_suite_2.py index dafbc6a..1a36106 100644 --- a/pyscicat/tests/test_new_dataset.py +++ b/pyscicat/tests/test_suite_2.py @@ -17,7 +17,8 @@ local_url = "http://localhost:3000/api/v3/" test_dataset_files = { 'raw' : "../../examples/data/ingestion_simulation_dataset_ess_raw_dataset.json", - 'derived' : "../../examples/data/ingestion_simulation_dataset_ess_derived_dataset.json" + 'derived' : "../../examples/data/ingestion_simulation_dataset_ess_derived_dataset.json", + 'published_data' : "../../examples/data/published_data.json" } test_datasets = {} @@ -37,6 +38,7 @@ def set_up_test_environment(mock_request): json={"id": "a_token"}, ) + def set_up_mock_raw_dataset(mock_request): data = test_datasets['raw'] @@ -79,6 +81,19 @@ def set_up_mock_derived_dataset(mock_request): return data +def set_up_mock_published_data(mock_request): + data = test_datasets['published_data'] + + mock_url = local_url + "PublishedData" + print("Mock : " + mock_url) + mock_request.get( + mock_url, + json=data, + ) + + return data + + def test_scicat_ingest_raw_dataset(): with requests_mock.Mocker() as mock_request: set_up_test_environment(mock_request) @@ -157,3 +172,23 @@ def test_scicat_ingest_derived_dataset(): assert len(created_origdatablock["dataFileList"]) == len( data["orig_datablock"]["dataFileList"] ) + + +def test_scicat_find_published_data(): + with requests_mock.Mocker() as mock_request: + set_up_test_environment(mock_request) + data = set_up_mock_published_data(mock_request) + scicat = ScicatClient( + base_url=local_url, + username="Zaphod", + password="heartofgold", + ) + assert ( + scicat._token == "a_token" + ), "scicat client set the token given by the server" + + returned_data = scicat.find_published_data() + + assert len(data) == len(returned_data) + assert data == returned_data + From 479d41dc069bf95e20cca0dead136173e9b68e09 Mon Sep 17 00:00:00 2001 From: Massimiliano Novelli Date: Fri, 3 Jun 2022 14:34:37 +0200 Subject: [PATCH 7/7] fixed linting and code styling --- ...imulation_dataset_ess_derived_dataset.json | 131 +++++++++-------- ...on_simulation_dataset_ess_raw_dataset.json | 137 +++++++++--------- examples/data/published_data.json | 2 +- pyscicat/client.py | 30 +--- pyscicat/tests/test_suite_2.py | 33 ++--- requirements-hdf5.txt | 2 +- requirements.txt | 2 +- 7 files changed, 155 insertions(+), 182 deletions(-) diff --git a/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json b/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json index 1b0bcea..53c18b4 100644 --- a/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json +++ b/examples/data/ingestion_simulation_dataset_ess_derived_dataset.json @@ -1,9 +1,9 @@ { - "id": "9be3bd96-e256-11ec-bd08-f32122965a87", + "id": "9be3bd96-e256-11ec-bd08-f32122965a87", "dataset": { - "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE derived", - "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", - "investigator": "Max Novelli", + "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE derived", + "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", + "investigator": "Max Novelli", "inputDatasets" : ["0275d813-be6b-444f-812f-b8311d129361"], "usedSoftware" : ["python","My software"], "jobParameters" : { @@ -11,76 +11,76 @@ "parameter-2" : "value-2" }, "jobLogData" : "Some jebrish about the dataset", - "owner": "Massimiliano Novelli", - "ownerEmail": "max.novelli@ess.eu", - "contactEmail": "max.novelli@ess.eu", - "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31", - "creationTime": "2022-03-07T15:44:59.000Z", - "type": "derived", + "owner": "Massimiliano Novelli", + "ownerEmail": "max.novelli@ess.eu", + "contactEmail": "max.novelli@ess.eu", + "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31", + "creationTime": "2022-03-07T15:44:59.000Z", + "type": "derived", "scientificMetadata": { - "sample_width": { "value": 0.015, "unit": "m" }, - "sample_height": { "value": 0.015, "unit": "m" }, - "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" }, - "divergence_requirement_vertical": { "value": 1, "unit": "deg" }, - "guide_sample_distance": { "value": 0.6, "unit": "m" }, - "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" }, - "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" }, - "moderator_width": { "value": 0.12, "unit": "m" }, - "moderator_height": { "value": 0.03, "unit": "m" }, - "moderator_sample_distance": { "value": 170, "unit": "m" }, - "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" }, - "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" }, - "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" }, - "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" }, - "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" }, - "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" }, - "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" }, - "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" }, - "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" }, - "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" }, - "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" }, - "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" }, - "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" }, - "parsing_min_length1": { "value": 28.915197821153896, "unit": "" }, - "parsing_max_length1": { "value": 95.07944574028325, "unit": "" }, - "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" }, - "parsing_std_length1": { "value": 10.210341803833671, "unit": "" }, - "optimization_name": { "value": "PGESKSE", "unit": "" }, - "configuration_summary": { "value": "PGESKSE", "unit": "" }, - "best_figure_of_merit": { "value": "0.25293", "unit": "" }, - "brilliance_transfer": { "value": "0.47344", "unit": "" }, - "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" }, - "number_of_parameters": { "value": 2, "unit": "" }, - "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" }, - "event_writen_present": { "value": true, "unit": "" }, - "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" }, - "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" }, - "event_done_present": { "value": true, "unit": "" }, - "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" }, - "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" }, - "event_analysis_present": { "value": true, "unit": "" }, + "sample_width": { "value": 0.015, "unit": "m" }, + "sample_height": { "value": 0.015, "unit": "m" }, + "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" }, + "divergence_requirement_vertical": { "value": 1, "unit": "deg" }, + "guide_sample_distance": { "value": 0.6, "unit": "m" }, + "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" }, + "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" }, + "moderator_width": { "value": 0.12, "unit": "m" }, + "moderator_height": { "value": 0.03, "unit": "m" }, + "moderator_sample_distance": { "value": 170, "unit": "m" }, + "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" }, + "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" }, + "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" }, + "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" }, + "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" }, + "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" }, + "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" }, + "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" }, + "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" }, + "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" }, + "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" }, + "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" }, + "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" }, + "parsing_min_length1": { "value": 28.915197821153896, "unit": "" }, + "parsing_max_length1": { "value": 95.07944574028325, "unit": "" }, + "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" }, + "parsing_std_length1": { "value": 10.210341803833671, "unit": "" }, + "optimization_name": { "value": "PGESKSE", "unit": "" }, + "configuration_summary": { "value": "PGESKSE", "unit": "" }, + "best_figure_of_merit": { "value": "0.25293", "unit": "" }, + "brilliance_transfer": { "value": "0.47344", "unit": "" }, + "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" }, + "number_of_parameters": { "value": 2, "unit": "" }, + "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" }, + "event_writen_present": { "value": true, "unit": "" }, + "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" }, + "event_done_present": { "value": true, "unit": "" }, + "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" }, + "event_analysis_present": { "value": true, "unit": "" }, "event_analysis_file": { "value": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt", "unit": "" }, - "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" }, - "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" }, - "run_name": { "value": "CAMEA CAMEA31", "unit": "" }, - "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" }, - "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" }, - "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" }, - "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" }, - "Hsize": { "value": 4, "unit": "cm" }, + "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" }, + "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" }, + "run_name": { "value": "CAMEA CAMEA31", "unit": "" }, + "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" }, + "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" }, + "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" }, + "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" }, + "Hsize": { "value": 4, "unit": "cm" }, "moderator_size_y": { "value": 3, "unit": "m" } - }, + }, "techniques": [ { - "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35", + "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35", "name": "Simulation" } - ], - "size": 68386784, + ], + "size": 68386784, "instrumentId": "" - }, + }, "orig_datablock": { - "size": 68386784, + "size": 68386784, "dataFileList": [ { "path": "launch_all.sh", @@ -254,4 +254,3 @@ "accessGroups": ["dmsc"] } } - \ No newline at end of file diff --git a/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json b/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json index ae2977e..c2b5817 100644 --- a/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json +++ b/examples/data/ingestion_simulation_dataset_ess_raw_dataset.json @@ -1,82 +1,82 @@ { - "id": "0275d813-be6b-444f-812f-b8311d129361", + "id": "0275d813-be6b-444f-812f-b8311d129361", "dataset": { - "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE raw", - "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", - "creationLocation": "DMSC", - "principalInvestigator": "Max Novelli", - "owner": "Massimiliano Novelli", - "ownerEmail": "max.novelli@ess.eu", - "contactEmail": "max.novelli@ess.eu", - "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31", - "creationTime": "2022-03-07T15:44:59.000Z", - "type": "raw", + "datasetName": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE raw", + "description": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", + "creationLocation": "DMSC", + "principalInvestigator": "Max Novelli", + "owner": "Massimiliano Novelli", + "ownerEmail": "max.novelli@ess.eu", + "contactEmail": "max.novelli@ess.eu", + "sourceFolder": "/mnt/data/simulation/CAMEA/CAMEA31", + "creationTime": "2022-03-07T15:44:59.000Z", + "type": "raw", "scientificMetadata": { - "sample_width": { "value": 0.015, "unit": "m" }, - "sample_height": { "value": 0.015, "unit": "m" }, - "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" }, - "divergence_requirement_vertical": { "value": 1, "unit": "deg" }, - "guide_sample_distance": { "value": 0.6, "unit": "m" }, - "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" }, - "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" }, - "moderator_width": { "value": 0.12, "unit": "m" }, - "moderator_height": { "value": 0.03, "unit": "m" }, - "moderator_sample_distance": { "value": 170, "unit": "m" }, - "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" }, - "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" }, - "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" }, - "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" }, - "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" }, - "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" }, - "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" }, - "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" }, - "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" }, - "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" }, - "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" }, - "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" }, - "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" }, - "parsing_min_length1": { "value": 28.915197821153896, "unit": "" }, - "parsing_max_length1": { "value": 95.07944574028325, "unit": "" }, - "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" }, - "parsing_std_length1": { "value": 10.210341803833671, "unit": "" }, - "optimization_name": { "value": "PGESKSE", "unit": "" }, - "configuration_summary": { "value": "PGESKSE", "unit": "" }, - "best_figure_of_merit": { "value": "0.25293", "unit": "" }, - "brilliance_transfer": { "value": "0.47344", "unit": "" }, - "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" }, - "number_of_parameters": { "value": 2, "unit": "" }, - "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" }, - "event_writen_present": { "value": true, "unit": "" }, - "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" }, - "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" }, - "event_done_present": { "value": true, "unit": "" }, - "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" }, - "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" }, - "event_analysis_present": { "value": true, "unit": "" }, + "sample_width": { "value": 0.015, "unit": "m" }, + "sample_height": { "value": 0.015, "unit": "m" }, + "divergence_requirement_horizontal": { "value": 0.75, "unit": "deg" }, + "divergence_requirement_vertical": { "value": 1, "unit": "deg" }, + "guide_sample_distance": { "value": 0.6, "unit": "m" }, + "lower_wavelength_limit": { "value": 1, "unit": "\u00c5" }, + "upper_wavelength_limit": { "value": 3.6, "unit": "\u00c5" }, + "moderator_width": { "value": 0.12, "unit": "m" }, + "moderator_height": { "value": 0.03, "unit": "m" }, + "moderator_sample_distance": { "value": 170, "unit": "m" }, + "parsing_variables": { "value": "guide_start , startx1 , starty1 , length1", "unit": "" }, + "parsing_min_guide_start": { "value": 2.000035881054106, "unit": "m" }, + "parsing_max_guide_start": { "value": 5.407538318585075, "unit": "m" }, + "parsing_mean_guide_start": { "value": 2.3475508029429557, "unit": "m" }, + "parsing_std_guide_start": { "value": 0.5522363822422368, "unit": "m" }, + "parsing_min_startx1": { "value": 0.006706596967962139, "unit": "m" }, + "parsing_max_startx1": { "value": 0.1460959338571846, "unit": "m" }, + "parsing_mean_startx1": { "value": 0.08885675463366878, "unit": "m" }, + "parsing_std_startx1": { "value": 0.017699812942929365, "unit": "m" }, + "parsing_min_starty1": { "value": 0.011762187831963904, "unit": "m" }, + "parsing_max_starty1": { "value": 0.14999127413576652, "unit": "m" }, + "parsing_mean_starty1": { "value": 0.13009670276273638, "unit": "m" }, + "parsing_std_starty1": { "value": 0.011522927034872269, "unit": "m" }, + "parsing_min_length1": { "value": 28.915197821153896, "unit": "" }, + "parsing_max_length1": { "value": 95.07944574028325, "unit": "" }, + "parsing_mean_length1": { "value": 64.23126877070395, "unit": "" }, + "parsing_std_length1": { "value": 10.210341803833671, "unit": "" }, + "optimization_name": { "value": "PGESKSE", "unit": "" }, + "configuration_summary": { "value": "PGESKSE", "unit": "" }, + "best_figure_of_merit": { "value": "0.25293", "unit": "" }, + "brilliance_transfer": { "value": "0.47344", "unit": "" }, + "event_file_name_suffix": { "value": "4Hsize_3moderator_size_y", "unit": "" }, + "number_of_parameters": { "value": 2, "unit": "" }, + "parameters_name": { "value": "Hsize , moderator_size_y", "unit": "" }, + "event_writen_present": { "value": true, "unit": "" }, + "event_writen_file": { "value": "master_record-writen_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_writen_timestamp": { "value": "2014-01-23T19:52:38", "unit": "" }, + "event_done_present": { "value": true, "unit": "" }, + "event_done_file": { "value": "master_record-done_4Hsize_3moderator_size_y.txt", "unit": "" }, + "event_done_timestamp": { "value": "2014-01-25T00:35:55", "unit": "" }, + "event_analysis_present": { "value": true, "unit": "" }, "event_analysis_file": { "value": "output/analysis/master_record-analyzed_4Hsize_3moderator_size_y.txt", "unit": "" }, - "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" }, - "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" }, - "run_name": { "value": "CAMEA CAMEA31", "unit": "" }, - "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" }, - "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" }, - "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" }, - "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" }, - "Hsize": { "value": 4, "unit": "cm" }, + "event_analysis_timestamp": { "value": "2014-01-28T14:03:02", "unit": "" }, + "dataset_name": { "value": "CAMEA CAMEA31 Hsize 4 moderator_size_y 3 PGESKSE", "unit": "" }, + "run_name": { "value": "CAMEA CAMEA31", "unit": "" }, + "scan_name": { "value": "4Hsize_3moderator_size_y", "unit": "" }, + "output_file_name_base": { "value": "PGESKSE_4Hsize_3moderator_size_y", "unit": "" }, + "dataset_access_path": { "value": "/mnt/data/simulation/CAMEA/CAMEA31", "unit": "" }, + "parameters_structure": { "value": "[{\"name\": \"Hsize\", \"value\": \"1.5\", \"index\": \"4\"}, {\"name\": \"moderator_size_y\", \"value\": \"0.03\", \"index\": \"3\"}]", "unit": "" }, + "Hsize": { "value": 4, "unit": "cm" }, "moderator_size_y": { "value": 3, "unit": "m" } - }, + }, "techniques": [ { - "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35", + "pid": "fe888574-5cc0-11ec-90c3-bf82943dec35", "name": "Simulation" } - ], - "size": 68386784, - "instrumentId": "", - "sampleId": "", + ], + "size": 68386784, + "instrumentId": "", + "sampleId": "", "proposalId": "" - }, + }, "orig_datablock": { - "size": 68386784, + "size": 68386784, "dataFileList": [ { "path": "launch_all.sh", @@ -250,4 +250,3 @@ "accessGroups": ["dmsc"] } } - \ No newline at end of file diff --git a/examples/data/published_data.json b/examples/data/published_data.json index ab2680d..54a573f 100644 --- a/examples/data/published_data.json +++ b/examples/data/published_data.json @@ -53,4 +53,4 @@ "createdAt": "2022-06-03T11:16:09.681Z", "updatedAt": "2020-09-09T09:37:58.094Z" } -] \ No newline at end of file +] diff --git a/pyscicat/client.py b/pyscicat/client.py index 1f4d5c3..1d2063a 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -152,7 +152,6 @@ def _send_to_scicat(self, url, dataDict=None, cmd="post"): # err = resp.json()["error"] # raise ScicatCommError(f"Error creating Sample {err}") - def replace_dataset(self, dataset: Dataset) -> str: """ Create a new dataset or update an existing one @@ -171,7 +170,7 @@ def replace_dataset(self, dataset: Dataset) -> str: str pid of the dataset """ - + if isinstance(dataset, RawDataset): dataset_url = self._base_url + "RawDataSets/replaceOrCreate" elif isinstance(dataset, DerivedDataset): @@ -194,7 +193,6 @@ def replace_dataset(self, dataset: Dataset) -> str: """ upload_dataset = replace_dataset - def create_dataset(self, dataset: Dataset) -> str: """ Upload a new dataset. Uses the generic dataset endpoint. @@ -235,8 +233,7 @@ def create_dataset(self, dataset: Dataset) -> str: Original name, kept for for backward compatibility """ upload_new_dataset = create_dataset - - + def replace_raw_dataset(self, dataset: Dataset) -> str: """ Create a new raw dataset or update an existing one @@ -268,14 +265,12 @@ def replace_raw_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid - """ Upload a raw dataset Original name, kept for for backward compatibility """ upload_raw_dataset = replace_raw_dataset - def replace_derived_dataset(self, dataset: Dataset) -> str: """ Create a new derived dataset or update an existing one @@ -315,7 +310,6 @@ def replace_derived_dataset(self, dataset: Dataset) -> str: """ upload_derived_dataset = replace_derived_dataset - def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: """ Upsert a raw dataset @@ -348,7 +342,6 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: logger.info(f"dataset upserted {new_pid}") return new_pid - def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: """ Upsert a derived dataset @@ -382,8 +375,9 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: logger.info(f"dataset upserted {new_pid}") return new_pid - - def create_dataset_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): + def create_dataset_datablock( + self, datablock: Datablock, datasetType: str = "RawDatasets" + ): """ Create a new datablock for a dataset. The dataset can be both Raw or Derived. @@ -422,7 +416,6 @@ def create_dataset_datablock(self, datablock: Datablock, datasetType: str = "Raw """ upload_datablock = create_dataset_datablock - def create_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: """ Create a new SciCat Dataset OrigDatablock @@ -463,15 +456,11 @@ def create_dataset_origdatablock(self, origdatablock: OrigDatablock) -> dict: """ upload_dataset_origdatablock = create_dataset_origdatablock - - def create_dataset_attachment( - self, - attachment: Attachment, - datasetType: str = "RawDatasets" + self, attachment: Attachment, datasetType: str = "RawDatasets" ): """ - Create a new Attachment for a dataset. + Create a new Attachment for a dataset. Note that datasetType can be provided to determine the type of dataset that this attachment is attached to. This is required for creating the url that SciCat uses. This function has been renamed. @@ -513,7 +502,6 @@ def create_dataset_attachment( """ upload_attachment = create_dataset_attachment - def find_datasets_full_query(self, skip=0, limit=25, query_fields=None): """ Gets datasets using the fullQuery mechanism of SciCat. This is @@ -561,8 +549,6 @@ def find_datasets_full_query(self, skip=0, limit=25, query_fields=None): """ get_datasets_full_query = find_datasets_full_query - - def find_datasets(self, filter_fields=None) -> List[Dataset]: """ Gets datasets using the simple fiter mechanism. This @@ -601,7 +587,6 @@ def find_datasets(self, filter_fields=None) -> List[Dataset]: """ get_datasets = find_datasets - def find_published_data(self, filter=None) -> List[PublishedData]: """ retrieve all the published data using the simple fiter mechanism. This @@ -640,7 +625,6 @@ def find_published_data(self, filter=None) -> List[PublishedData]: """ get_published_data = find_published_data - def get_dataset_by_pid(self, pid=None) -> Dataset: """Gets dataset with the pid provided. diff --git a/pyscicat/tests/test_suite_2.py b/pyscicat/tests/test_suite_2.py index 1a36106..5575b2c 100644 --- a/pyscicat/tests/test_suite_2.py +++ b/pyscicat/tests/test_suite_2.py @@ -16,9 +16,9 @@ local_url = "http://localhost:3000/api/v3/" test_dataset_files = { - 'raw' : "../../examples/data/ingestion_simulation_dataset_ess_raw_dataset.json", - 'derived' : "../../examples/data/ingestion_simulation_dataset_ess_derived_dataset.json", - 'published_data' : "../../examples/data/published_data.json" + "raw": "../../examples/data/ingestion_simulation_dataset_ess_raw_dataset.json", + "derived": "../../examples/data/ingestion_simulation_dataset_ess_derived_dataset.json", + "published_data": "../../examples/data/published_data.json", } test_datasets = {} @@ -40,7 +40,7 @@ def set_up_test_environment(mock_request): def set_up_mock_raw_dataset(mock_request): - data = test_datasets['raw'] + data = test_datasets["raw"] mock_request.post( local_url + "Datasets", @@ -61,7 +61,7 @@ def set_up_mock_raw_dataset(mock_request): def set_up_mock_derived_dataset(mock_request): - data = test_datasets['derived'] + data = test_datasets["derived"] mock_request.post( local_url + "Datasets", @@ -82,7 +82,7 @@ def set_up_mock_derived_dataset(mock_request): def set_up_mock_published_data(mock_request): - data = test_datasets['published_data'] + data = test_datasets["published_data"] mock_url = local_url + "PublishedData" print("Mock : " + mock_url) @@ -107,13 +107,10 @@ def test_scicat_ingest_raw_dataset(): scicat._token == "a_token" ), "scicat client set the token given by the server" - ownable = Ownable(**data['ownable']) + ownable = Ownable(**data["ownable"]) # Create Dataset - dataset = RawDataset( - **data["dataset"], - **ownable.dict() - ) + dataset = RawDataset(**data["dataset"], **ownable.dict()) created_dataset = scicat.create_dataset(dataset) assert created_dataset["pid"] == data["id"] @@ -123,8 +120,7 @@ def test_scicat_ingest_raw_dataset(): size=data["orig_datablock"]["size"], datasetId=created_dataset["pid"], dataFileList=[ - DataFile(**file) - for file in data["orig_datablock"]["dataFileList"] + DataFile(**file) for file in data["orig_datablock"]["dataFileList"] ], **ownable.dict() ) @@ -147,13 +143,10 @@ def test_scicat_ingest_derived_dataset(): scicat._token == "a_token" ), "scicat client set the token given by the server" - ownable = Ownable(**data['ownable']) + ownable = Ownable(**data["ownable"]) # Create Dataset - dataset = RawDataset( - **data["dataset"], - **ownable.dict() - ) + dataset = RawDataset(**data["dataset"], **ownable.dict()) created_dataset = scicat.create_dataset(dataset) assert created_dataset["pid"] == data["id"] @@ -163,8 +156,7 @@ def test_scicat_ingest_derived_dataset(): size=data["orig_datablock"]["size"], datasetId=created_dataset["pid"], dataFileList=[ - DataFile(**file) - for file in data["orig_datablock"]["dataFileList"] + DataFile(**file) for file in data["orig_datablock"]["dataFileList"] ], **ownable.dict() ) @@ -191,4 +183,3 @@ def test_scicat_find_published_data(): assert len(data) == len(returned_data) assert data == returned_data - diff --git a/requirements-hdf5.txt b/requirements-hdf5.txt index c3b2f48..d03a9f6 100644 --- a/requirements-hdf5.txt +++ b/requirements-hdf5.txt @@ -1,2 +1,2 @@ hdf5plugin -h5py \ No newline at end of file +h5py diff --git a/requirements.txt b/requirements.txt index 76aa8db..903705e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ pydantic -requests \ No newline at end of file +requests