Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 1st set of Datacite field methods #178

Merged
merged 4 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,6 @@ def datacite_record_all_fields():
return Datacite("cool-repo", source_records)


@pytest.fixture
def datacite_record_optional_fields_blank():
source_records = XMLTransformer.parse_source_file(
"tests/fixtures/datacite/datacite_record_optional_fields_blank.xml"
)
return next(source_records)


@pytest.fixture
def datacite_record_optional_fields_missing():
source_records = XMLTransformer.parse_source_file(
"tests/fixtures/datacite/datacite_record_optional_fields_missing.xml"
)
return next(source_records)


# marc ##########################


Expand Down
58 changes: 31 additions & 27 deletions tests/sources/xml/test_datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,16 +396,16 @@ def test_get_alternate_titles_success():
]


def test_get_alternate_titles_transforms_correctly_if_fields_blank(
datacite_record_optional_fields_blank,
):
assert Datacite.get_alternate_titles(datacite_record_optional_fields_blank) == []
def test_get_alternate_titles_transforms_correctly_if_fields_blank():
source_record = create_datacite_source_record_stub(
'<titles><title titleType="AlternativeTitle"></title></titles>'
)
assert Datacite.get_alternate_titles(source_record) is None


def test_get_alternate_titles_transforms_correctly_if_fields_missing(
datacite_record_optional_fields_missing,
):
assert Datacite.get_alternate_titles(datacite_record_optional_fields_missing) == []
def test_get_alternate_titles_transforms_correctly_if_fields_missing():
source_record = create_datacite_source_record_stub("")
assert Datacite.get_alternate_titles(source_record) is None


def test_get_content_type_success():
Expand All @@ -417,20 +417,18 @@ def test_get_content_type_success():
assert Datacite.get_content_type(source_record, "abc123") == ["Dataset"]


def test_get_content_type_transforms_correctly_if_fields_blank(
datacite_record_optional_fields_blank,
):
assert (
Datacite.get_content_type(datacite_record_optional_fields_blank, "abc123") == []
def test_get_content_type_transforms_correctly_if_fields_blank():
source_record = create_datacite_source_record_stub(
"""
<resourceType resourceTypeGeneral=""></resourceType>
"""
)
assert Datacite.get_content_type(source_record, "abc123") is None


def test_get_content_type_transforms_correctly_if_fields_missing(
datacite_record_optional_fields_missing,
):
assert (
Datacite.get_content_type(datacite_record_optional_fields_missing, "abc123") == []
)
def test_get_content_type_transforms_correctly_if_fields_missing():
source_record = create_datacite_source_record_stub("")
assert Datacite.get_content_type(source_record, "abc123") is None


def test_get_contributors_success():
ehanson8 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -498,16 +496,22 @@ def test_get_contributors_success():
]


def test_get_contributors_transforms_correctly_if_fields_blank(
datacite_record_optional_fields_blank,
):
assert Datacite.get_contributors(datacite_record_optional_fields_blank) == []
def test_get_contributors_transforms_correctly_if_fields_blank():
source_record = create_datacite_source_record_stub(
"""
<creators>
<creator />
<contributors>
<contributor />
</contributors>
"""
)
assert Datacite.get_contributors(source_record) is None


def test_get_contributors_transforms_correctly_if_fields_missing(
datacite_record_optional_fields_missing,
):
assert Datacite.get_contributors(datacite_record_optional_fields_missing) == []
def test_get_contributors_transforms_correctly_if_fields_missing():
source_record = create_datacite_source_record_stub("")
assert Datacite.get_contributors(source_record) is None


def test_generate_name_identifier_url_orcid_scheme(datacite_record_all_fields):
Expand Down
24 changes: 13 additions & 11 deletions transmogrifier/sources/xml/datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
source_record_id = self.get_source_record_id(source_record)

# alternate_titles
fields["alternate_titles"] = self.get_alternate_titles(source_record) or None
fields["alternate_titles"] = self.get_alternate_titles(source_record)

# content_type
fields["content_type"] = (
self.get_content_type(source_record, source_record_id) or None
)
fields["content_type"] = self.get_content_type(source_record, source_record_id)

# contributors
fields["contributors"] = self.get_contributors(source_record) or None
fields["contributors"] = self.get_contributors(source_record)

# dates
if publication_year := source_record.metadata.find(
Expand Down Expand Up @@ -248,7 +246,9 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
return fields

@classmethod
def get_alternate_titles(cls, source_record: Tag) -> list[timdex.AlternateTitle]:
def get_alternate_titles(
cls, source_record: Tag
) -> list[timdex.AlternateTitle] | None:
alternate_titles = []
alternate_titles.extend(
[
Expand All @@ -261,7 +261,7 @@ def get_alternate_titles(cls, source_record: Tag) -> list[timdex.AlternateTitle]
]
)
alternate_titles.extend(list(cls._get_additional_titles(source_record)))
return alternate_titles
return alternate_titles or None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this time, I do like this full encapsulation, thanks for making these changes.

Relevant here and elsewhere...

Once we get into orchestration, which I think touches on the instantiation of the TIMDEXRecord instance, it might be worth considering if maybe it's okay to return an empty list? If so, and that was filtered out during serialization to JSON, then we could probably drop a bunch of these conversions to None. Just noting for the future, but again, think this is great for now, and in the spirit of the incremental approach.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am most likely onboard with that approach when we get there!


@classmethod
def _get_additional_titles(
Expand All @@ -273,7 +273,9 @@ def _get_additional_titles(
yield timdex.AlternateTitle(value=title)

@classmethod
def get_content_type(cls, source_record: Tag, source_record_id: str) -> list[str]:
def get_content_type(
cls, source_record: Tag, source_record_id: str
) -> list[str] | None:
content_types = []
if resource_type := source_record.metadata.find("resourceType"):
if content_type := resource_type.get("resourceTypeGeneral"):
Expand All @@ -287,14 +289,14 @@ def get_content_type(cls, source_record: Tag, source_record_id: str) -> list[str
"Datacite record %s missing required Datacite field resourceType",
source_record_id,
)
return content_types
return content_types or None

@classmethod
def get_contributors(cls, source_record: Tag) -> list[timdex.Contributor]:
def get_contributors(cls, source_record: Tag) -> list[timdex.Contributor] | None:
contributors = []
contributors.extend(list(cls._get_creators(source_record)))
contributors.extend(list(cls._get_contributors(source_record)))
return contributors
return contributors or None

@classmethod
def _get_creators(cls, source_record: Tag) -> Iterator[timdex.Contributor]:
Expand Down
Loading