diff --git a/dandischema/metadata.py b/dandischema/metadata.py index bd437edf..417917ae 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -1,3 +1,4 @@ +from copy import deepcopy import json from pathlib import Path @@ -130,7 +131,9 @@ def validate(obj, schema_version=None, schema_key=None): klass(**obj) -def migrate(obj, to_version=DANDI_SCHEMA_VERSION): +def migrate(obj: dict, to_version: str = DANDI_SCHEMA_VERSION) -> dict: + """Migrate dandiset metadata object to new schema""" + obj = deepcopy(obj) if to_version not in ALLOWED_TARGET_SCHEMAS: raise ValueError(f"Current target schemas: {ALLOWED_TARGET_SCHEMAS}.") schema_version = obj.get("schemaVersion") @@ -141,13 +144,24 @@ def migrate(obj, to_version=DANDI_SCHEMA_VERSION): if version2tuple(schema_version) > version2tuple(to_version): raise ValueError(f"Cannot migrate from {schema_version} to lower {to_version}.") if version2tuple(schema_version) < (0, 3, 2): + if obj.get("schemaKey") is None: + obj["schemaKey"] = "Dandiset" + id = obj.get("id") + if not id.startswith("DANDI:"): + obj["id"] = f'DANDI:{obj["id"]}' for contrib in obj["contributor"]: contrib["roleName"] = [ val.replace("dandi:", "dcite:") for val in contrib["roleName"] ] + for affiliation in contrib.get("affiliation", []): + affiliation["schemaKey"] = "Affiliation" for contrib in obj["relatedResource"]: contrib["relation"] = contrib["relation"].replace("dandi:", "dcite:") for access in obj["access"]: access["status"] = "dandi:OpenAccess" - obj["schemaVersion"] = to_version + if obj.get("assetsSummary") is None: + obj["assetsSummary"] = {"numberOfFiles": 0, "numberOfBytes": 0} + if obj.get("manifestLocation") is None: + obj["manifestLocation"] = [] + obj["schemaVersion"] = to_version return obj diff --git a/dandischema/models.py b/dandischema/models.py index d181a431..dd21b870 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -336,6 +336,22 @@ class Organization(Contributor): } +class Affiliation(DandiBaseModel): + identifier: Optional[RORID] = Field( + None, + title="A ror.org identifier", + description="Use an ror.org identifier for institutions", + regex=r"^https://ror.org/[a-z0-9]+$", + nskey="schema", + ) + name: str = Field(None, nskey="schema") + + _ldmeta = { + "rdfs:subClassOf": ["schema:Organization", "prov:Organization"], + "nskey": "dandi", + } + + class Person(Contributor): identifier: Optional[ORCID] = Field( None, @@ -350,7 +366,7 @@ class Person(Contributor): nskey="schema", examples=["Lovelace, Augusta Ada", "Smith, John", "Chan, Kong-sang"], ) - affiliation: List[Organization] = Field( + affiliation: List[Affiliation] = Field( None, description="An organization that this person is affiliated with.", nskey="schema", diff --git a/dandischema/tests/data/metadata/meta_000004.json b/dandischema/tests/data/metadata/meta_000004.json index 9c04d905..817db4b4 100644 --- a/dandischema/tests/data/metadata/meta_000004.json +++ b/dandischema/tests/data/metadata/meta_000004.json @@ -1,7 +1,7 @@ { "id": "DANDI:000004/draft", "schemaKey": "Dandiset", - "schemaVersion": "0.3.2", + "schemaVersion": "0.4.0", "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", "description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy re-use of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.", "contributor": [ @@ -30,9 +30,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, @@ -50,9 +49,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA" } ] }, @@ -70,9 +68,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" } ] }, @@ -90,9 +87,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, @@ -109,9 +105,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, @@ -127,9 +122,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, @@ -145,9 +139,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, @@ -164,14 +157,12 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA" }, { - "schemaKey": "Organization", - "name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA" } ] }, @@ -188,14 +179,12 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA" }, { - "schemaKey": "Organization", - "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA" } ] }, @@ -211,14 +200,12 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada" }, { - "schemaKey": "Organization", - "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" } ] }, @@ -234,14 +221,12 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" }, { - "schemaKey": "Organization", - "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada" } ] }, @@ -257,9 +242,8 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, @@ -281,29 +265,24 @@ "includeInCitation": true, "affiliation": [ { - "schemaKey": "Organization", - "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" }, { - "schemaKey": "Organization", - "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" }, { - "schemaKey": "Organization", - "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA" }, { - "schemaKey": "Organization", - "name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA" }, { - "schemaKey": "Organization", - "name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA", - "includeInCitation": false + "schemaKey": "Affiliation", + "name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA" } ] }, diff --git a/dandischema/tests/data/metadata/meta_000004old.json b/dandischema/tests/data/metadata/meta_000004old.json index b120e162..b2f7d13b 100644 --- a/dandischema/tests/data/metadata/meta_000004old.json +++ b/dandischema/tests/data/metadata/meta_000004old.json @@ -1,5 +1,5 @@ { - "id": "DANDI:000004/draft", + "id": "000004/draft", "url": "https://dandiarchive.org/000004/draft", "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", "about": [ @@ -411,33 +411,5 @@ "relation": "dandi:IsDescribedBy", "identifier": "DOI:10.1038/s41597-020-0415-9" } - ], - "assetsSummary": { - "numberOfBytes": 10, - "numberOfFiles": 1, - "dataStandard": [ - { - "name": "NWB" - } - ], - "approach": [ - { - "name": "electrophysiology" - } - ], - "measurementTechnique": [ - { - "name": "two-photon microscopy technique" - } - ], - "species": [ - { - "name": "Human" - } - ] - }, - "manifestLocation": [ - "https://api.dandiarchive.org/api/dandisets/000004/versions/draft/assets/" - ], - "schemaKey": "Dandiset" + ] } diff --git a/dandischema/tests/data/metadata/meta_000008.json b/dandischema/tests/data/metadata/meta_000008.json index 95047741..93c5de07 100644 --- a/dandischema/tests/data/metadata/meta_000008.json +++ b/dandischema/tests/data/metadata/meta_000008.json @@ -1,7 +1,7 @@ { "id": "DANDI:000008/draft", "schemaKey": "Dandiset", - "schemaVersion": "0.3.2", + "schemaVersion": "0.4.0", "name": "Phenotypic variation within and across transcriptomic cell types in mouse motor cortex", "description": "Data from the Tolias Lab shared in the BICCN project", "contributor": [ diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 29cdf6c3..a660f81d 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -229,6 +229,7 @@ def test_requirements(obj, schema_key, missingfields): @pytest.mark.parametrize( "obj, target", [ + ({}, "0.3.2"), ({"schemaVersion": "0.2.2"}, None), ({"schemaVersion": "0.3.0"}, "0.3.2"), ({"schemaVersion": "0.3.1"}, "0.3.0"), @@ -242,9 +243,19 @@ def test_migrate_errors(obj, target): def test_migrate_040(schema_dir): with (METADATA_DIR / "meta_000004old.json").open() as fp: data_as_dict = json.load(fp) + with pytest.raises(ValueError) as exc: + validate(data_as_dict) + data_as_dict["schemaKey"] = "Dandiset" with pytest.raises(ValidationError) as exc: validate(data_as_dict) - badfields = {"contributor", "access", "relatedResource"} + badfields = { + "contributor", + "access", + "relatedResource", + "id", + "manifestLocation", + "assetsSummary", + } assert set([el["loc"][0] for el in exc.value.errors()]) == badfields newmeta = migrate(data_as_dict, to_version=DANDI_SCHEMA_VERSION) assert newmeta["schemaVersion"] == DANDI_SCHEMA_VERSION