Skip to content

Commit

Permalink
Add Afiiliation class to minimize Organization details and update mig…
Browse files Browse the repository at this point in the history
…ration
  • Loading branch information
satra committed May 26, 2021
1 parent da1c7bf commit edd44f6
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 99 deletions.
18 changes: 16 additions & 2 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
import json
from pathlib import Path

Expand Down Expand Up @@ -130,7 +131,9 @@ def validate(obj, schema_version=None, schema_key=None):
klass(**obj)


def migrate(obj, to_version=DANDI_SCHEMA_VERSION):
def migrate(obj: dict, to_version: str = DANDI_SCHEMA_VERSION) -> dict:
"""Migrate dandiset metadata object to new schema"""
obj = deepcopy(obj)
if to_version not in ALLOWED_TARGET_SCHEMAS:
raise ValueError(f"Current target schemas: {ALLOWED_TARGET_SCHEMAS}.")
schema_version = obj.get("schemaVersion")
Expand All @@ -141,13 +144,24 @@ def migrate(obj, to_version=DANDI_SCHEMA_VERSION):
if version2tuple(schema_version) > version2tuple(to_version):
raise ValueError(f"Cannot migrate from {schema_version} to lower {to_version}.")
if version2tuple(schema_version) < (0, 3, 2):
if obj.get("schemaKey") is None:
obj["schemaKey"] = "Dandiset"
id = obj.get("id")
if not id.startswith("DANDI:"):
obj["id"] = f'DANDI:{obj["id"]}'
for contrib in obj["contributor"]:
contrib["roleName"] = [
val.replace("dandi:", "dcite:") for val in contrib["roleName"]
]
for affiliation in contrib.get("affiliation", []):
affiliation["schemaKey"] = "Affiliation"
for contrib in obj["relatedResource"]:
contrib["relation"] = contrib["relation"].replace("dandi:", "dcite:")
for access in obj["access"]:
access["status"] = "dandi:OpenAccess"
obj["schemaVersion"] = to_version
if obj.get("assetsSummary") is None:
obj["assetsSummary"] = {"numberOfFiles": 0, "numberOfBytes": 0}
if obj.get("manifestLocation") is None:
obj["manifestLocation"] = []
obj["schemaVersion"] = to_version
return obj
18 changes: 17 additions & 1 deletion dandischema/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,22 @@ class Organization(Contributor):
}


class Affiliation(DandiBaseModel):
identifier: Optional[RORID] = Field(
None,
title="A ror.org identifier",
description="Use an ror.org identifier for institutions",
regex=r"^https://ror.org/[a-z0-9]+$",
nskey="schema",
)
name: str = Field(None, nskey="schema")

_ldmeta = {
"rdfs:subClassOf": ["schema:Organization", "prov:Organization"],
"nskey": "dandi",
}


class Person(Contributor):
identifier: Optional[ORCID] = Field(
None,
Expand All @@ -350,7 +366,7 @@ class Person(Contributor):
nskey="schema",
examples=["Lovelace, Augusta Ada", "Smith, John", "Chan, Kong-sang"],
)
affiliation: List[Organization] = Field(
affiliation: List[Affiliation] = Field(
None,
description="An organization that this person is affiliated with.",
nskey="schema",
Expand Down
107 changes: 43 additions & 64 deletions dandischema/tests/data/metadata/meta_000004.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "DANDI:000004/draft",
"schemaKey": "Dandiset",
"schemaVersion": "0.3.2",
"schemaVersion": "0.4.0",
"name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task",
"description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy re-use of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.",
"contributor": [
Expand Down Expand Up @@ -30,9 +30,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand All @@ -50,9 +49,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA"
}
]
},
Expand All @@ -70,9 +68,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada"
}
]
},
Expand All @@ -90,9 +87,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand All @@ -109,9 +105,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand All @@ -127,9 +122,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand All @@ -145,9 +139,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand All @@ -164,14 +157,12 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA"
},
{
"schemaKey": "Organization",
"name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA"
}
]
},
Expand All @@ -188,14 +179,12 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA"
},
{
"schemaKey": "Organization",
"name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA"
}
]
},
Expand All @@ -211,14 +200,12 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada"
},
{
"schemaKey": "Organization",
"name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada"
}
]
},
Expand All @@ -234,14 +221,12 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada"
},
{
"schemaKey": "Organization",
"name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada"
}
]
},
Expand All @@ -257,9 +242,8 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand All @@ -281,29 +265,24 @@
"includeInCitation": true,
"affiliation": [
{
"schemaKey": "Organization",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
},
{
"schemaKey": "Organization",
"name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
},
{
"schemaKey": "Organization",
"name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA"
},
{
"schemaKey": "Organization",
"name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA"
},
{
"schemaKey": "Organization",
"name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA",
"includeInCitation": false
"schemaKey": "Affiliation",
"name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA"
}
]
},
Expand Down
32 changes: 2 additions & 30 deletions dandischema/tests/data/metadata/meta_000004old.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"id": "DANDI:000004/draft",
"id": "000004/draft",
"url": "https://dandiarchive.org/000004/draft",
"name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task",
"about": [
Expand Down Expand Up @@ -411,33 +411,5 @@
"relation": "dandi:IsDescribedBy",
"identifier": "DOI:10.1038/s41597-020-0415-9"
}
],
"assetsSummary": {
"numberOfBytes": 10,
"numberOfFiles": 1,
"dataStandard": [
{
"name": "NWB"
}
],
"approach": [
{
"name": "electrophysiology"
}
],
"measurementTechnique": [
{
"name": "two-photon microscopy technique"
}
],
"species": [
{
"name": "Human"
}
]
},
"manifestLocation": [
"https://api.dandiarchive.org/api/dandisets/000004/versions/draft/assets/"
],
"schemaKey": "Dandiset"
]
}
2 changes: 1 addition & 1 deletion dandischema/tests/data/metadata/meta_000008.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "DANDI:000008/draft",
"schemaKey": "Dandiset",
"schemaVersion": "0.3.2",
"schemaVersion": "0.4.0",
"name": "Phenotypic variation within and across transcriptomic cell types in mouse motor cortex",
"description": "Data from the Tolias Lab shared in the BICCN project",
"contributor": [
Expand Down
13 changes: 12 additions & 1 deletion dandischema/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def test_requirements(obj, schema_key, missingfields):
@pytest.mark.parametrize(
"obj, target",
[
({}, "0.3.2"),
({"schemaVersion": "0.2.2"}, None),
({"schemaVersion": "0.3.0"}, "0.3.2"),
({"schemaVersion": "0.3.1"}, "0.3.0"),
Expand All @@ -242,9 +243,19 @@ def test_migrate_errors(obj, target):
def test_migrate_040(schema_dir):
with (METADATA_DIR / "meta_000004old.json").open() as fp:
data_as_dict = json.load(fp)
with pytest.raises(ValueError) as exc:
validate(data_as_dict)
data_as_dict["schemaKey"] = "Dandiset"
with pytest.raises(ValidationError) as exc:
validate(data_as_dict)
badfields = {"contributor", "access", "relatedResource"}
badfields = {
"contributor",
"access",
"relatedResource",
"id",
"manifestLocation",
"assetsSummary",
}
assert set([el["loc"][0] for el in exc.value.errors()]) == badfields
newmeta = migrate(data_as_dict, to_version=DANDI_SCHEMA_VERSION)
assert newmeta["schemaVersion"] == DANDI_SCHEMA_VERSION
Expand Down

0 comments on commit edd44f6

Please sign in to comment.