Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete Aardvark field method refactor #199

Merged
merged 3 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 220 additions & 62 deletions tests/sources/json/test_aardvark.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,14 +285,72 @@ def test_parse_solr_date_range_invalid_date_range_string_raises_error():
MITAardvark.parse_solr_date_range_string("Invalid", "123")


def test_aardvark_get_identifiers_success(aardvark_record_all_fields):
assert MITAardvark.get_identifiers(next(aardvark_record_all_fields)) == [
def test_aardvark_get_format_type_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_format_s"] = ["Shapefile"]
assert MITAardvark.get_format(source_record) == ["Shapefile"]


def test_aardvark_get_format_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dct_format_s"] = []
assert MITAardvark.get_format(source_record) is None


def test_aardvark_get_format_transforms_correctly_if_fields_missing():
source_record = {}
assert MITAardvark.get_format(source_record) is None


def test_aardvark_get_identifiers_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_identifier_sm"] = ["abc123"]
assert MITAardvark.get_identifiers(source_record) == [
timdex.Identifier(value="abc123", kind="Not specified")
]


def test_aardvark_get_links_success(aardvark_record_all_fields):
assert MITAardvark.get_links(next(aardvark_record_all_fields), "123") == [
def test_aardvark_get_identifiers_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dct_identifier_sm"] = []
assert MITAardvark.get_identifiers(source_record) is None


def test_aardvark_get_identifiers_transforms_correctly_if_fields_missing():
source_record = {}
assert MITAardvark.get_identifiers(source_record) is None


def test_aardvark_get_languages_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_language_sm"] = ["eng"]
assert MITAardvark.get_languages(source_record) == ["eng"]


def test_aardvark_get_languages_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dct_language_sm"] = []
assert MITAardvark.get_languages(source_record) is None


def test_aardvark_get_languages_transforms_correctly_if_fields_missing():
source_record = {}
assert MITAardvark.get_languages(source_record) is None


def test_aardvark_get_links_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_references_s"] = (
'{"http://schema.org/downloadUrl": [{"label": "Source Metadata", "url": '
'"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95'
'.source.fgdc.xml"}, {"label": "Aardvark Metadata", "url": '
'"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95'
'.normalized.aardvark.json"}, {"label": "Data", "url": '
'"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95'
'.zip"}], "http://schema.org/url": "https://geodata.libraries.mit.edu/record/'
'gismit:GISPORTAL_GISOWNER01_BOSTONWATER95"}'
)
assert MITAardvark.get_links(source_record) == [
timdex.Link(
url="https://cdn.dev1.mitlibrary.net/geo/public"
"/GISPORTAL_GISOWNER01_BOSTONWATER95.source.fgdc.xml",
Expand Down Expand Up @@ -321,29 +379,55 @@ def test_aardvark_get_links_success(aardvark_record_all_fields):
]


def test_aardvark_get_links_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dct_references_s"] = ""
assert MITAardvark.get_links(source_record) is None


def test_aardvark_get_links_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_links(source_record) is None


def test_aardvark_get_links_logs_warning_for_invalid_json(caplog):
assert MITAardvark.get_links({"dct_references_s": "Invalid"}, "123") == []
source_record = create_aardvark_source_record_stub()
source_record["dct_references_s"] = "Invalid"
assert MITAardvark.get_links(source_record) is None
assert (
"Record ID '123': Unable to parse links string 'Invalid' as JSON" in caplog.text
)


def test_aardvark_get_locations_success(aardvark_record_all_fields):
assert MITAardvark.get_locations(next(aardvark_record_all_fields), "123") == [
def test_aardvark_get_locations_success():
source_record = create_aardvark_source_record_stub()
source_record["dcat_bbox"] = "ENVELOPE(-111.1, -104.0, 45.0, 40.9)"
source_record["locn_geometry"] = "ENVELOPE(-111.1, -104.0, 45.0, 40.9)"
assert MITAardvark.get_locations(source_record) == [
timdex.Location(
kind="Bounding Box", geoshape="BBOX (-111.1, -104.0, 45.0, 40.9)"
),
timdex.Location(kind="Geometry", geoshape="BBOX (-111.1, -104.0, 45.0, 40.9)"),
]


def test_parse_get_locations_string_invalid_geostring_logs_warning(
aardvark_record_all_fields, caplog
):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dcat_bbox"] = "Invalid"
aardvark_record["locn_geometry"] = "Invalid"
assert MITAardvark.get_locations(aardvark_record, "123") == []
def test_aardvark_get_locations_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dcat_bbox"] = ""
source_record["locn_geometry"] = ""
assert MITAardvark.get_locations(source_record) is None


def test_aardvark_get_locations_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_locations(source_record) is None


def test_parse_get_locations_string_invalid_geostring_logs_warning(caplog):
source_record = create_aardvark_source_record_stub()
source_record["dcat_bbox"] = "Invalid"
source_record["locn_geometry"] = "Invalid"
assert MITAardvark.get_locations(source_record) is None
assert (
"Record ID '123': Unable to parse geodata string 'Invalid' in 'dcat_bbox'"
in caplog.text
Expand All @@ -354,8 +438,16 @@ def test_parse_get_locations_string_invalid_geostring_logs_warning(
)


def test_aardvark_get_notes_success(aardvark_record_all_fields):
assert MITAardvark.get_notes(next(aardvark_record_all_fields)) == [
def test_aardvark_get_notes_success():
source_record = create_aardvark_source_record_stub()
source_record["gbl_displayNote_sm"] = [
"Danger: This text will be displayed in a red box",
"Info: This text will be displayed in a blue box",
"Tip: This text will be displayed in a green box",
"Warning: This text will be displayed in a yellow box",
"This is text without a tag and it will be assigned default 'note' style",
]
assert MITAardvark.get_notes(source_record) == [
timdex.Note(
value=["Danger: This text will be displayed in a red box"],
kind="Display note",
Expand All @@ -381,14 +473,66 @@ def test_aardvark_get_notes_success(aardvark_record_all_fields):
]


def test_aardvark_get_publishers(aardvark_record_all_fields):
assert MITAardvark.get_publishers(next(aardvark_record_all_fields)) == [
def test_aardvark_get_notes_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["gbl_displayNote_sm"] = []
assert MITAardvark.get_notes(source_record) is None


def test_aardvark_get_notes_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_notes(source_record) is None


def test_aardvark_get_provider_success():
source_record = create_aardvark_source_record_stub()
source_record["schema_provider_s"] = "MIT"
assert MITAardvark.get_provider(source_record) == "MIT"


def test_aardvark_get_provider_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["schema_provider_s"] = ""
assert MITAardvark.get_provider(source_record) is None


def test_aardvark_get_provider_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_provider(source_record) is None


def test_aardvark_get_publishers_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_publisher_sm"] = ["ML InfoMap (Firm)"]
assert MITAardvark.get_publishers(source_record) == [
timdex.Publisher(name="ML InfoMap (Firm)")
]


def test_aardvark_get_rights_success(aardvark_record_all_fields):
assert MITAardvark.get_rights("source", next(aardvark_record_all_fields)) == [
def test_aardvark_get_publishers_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dct_publisher_sm"] = []
assert MITAardvark.get_publishers(source_record) is None


def test_aardvark_get_publishers_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_publishers(source_record) is None


def test_aardvark_get_rights_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_accessRights_s"] = "Access note"
source_record["dct_license_sm"] = [
"http://license.license",
"http://another_license.another_license",
]
source_record["dct_rights_sm"] = ["Some person has the rights"]
source_record["dct_rightsHolder_sm"] = [
"The person with the rights",
"Another person with the rights",
]
assert MITAardvark.get_rights(source_record, "source") == [
timdex.Rights(description="Access note", kind="Access rights"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
Expand All @@ -399,76 +543,90 @@ def test_aardvark_get_rights_success(aardvark_record_all_fields):
]


def test_aardvark_get_rights_mit_restricted_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Restricted"
assert MITAardvark.get_rights("gismit", aardvark_record) == [
def test_aardvark_get_rights_mit_restricted_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_accessRights_s"] = "Restricted"
assert MITAardvark.get_rights(source_record, "gismit") == [
timdex.Rights(description="Restricted", kind="Access rights"),
timdex.Rights(description="MIT authentication required", kind="Access to files"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_mit_public_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Public"
assert MITAardvark.get_rights("gismit", aardvark_record) == [
def test_aardvark_get_rights_mit_public_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_accessRights_s"] = "Public"
assert MITAardvark.get_rights(source_record, "gismit") == [
timdex.Rights(description="Public", kind="Access rights"),
timdex.Rights(description="no authentication required", kind="Access to files"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_external_restricted_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Restricted"
assert MITAardvark.get_rights("gisogm", aardvark_record) == [
def test_aardvark_get_rights_external_restricted_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_accessRights_s"] = "Restricted"
assert MITAardvark.get_rights(source_record, "gisogm") == [
timdex.Rights(description="Restricted", kind="Access rights"),
timdex.Rights(
description="unknown: check with owning institution", kind="Access to files"
),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_external_public_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Public"
assert MITAardvark.get_rights("gisogm", aardvark_record) == [
def test_aardvark_get_rights_external_public_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_accessRights_s"] = "Public"
assert MITAardvark.get_rights(source_record, "gisogm") == [
timdex.Rights(description="Public", kind="Access rights"),
timdex.Rights(
description="unknown: check with owning institution", kind="Access to files"
),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_subjects_success(aardvark_record_all_fields):
assert MITAardvark.get_subjects(next(aardvark_record_all_fields)) == [
def test_aardvark_get_subjects_success():
source_record = create_aardvark_source_record_stub()
source_record["dcat_keyword_sm"] = ["Country"]
source_record["dcat_theme_sm"] = ["Political boundaries"]
source_record["dct_spatial_sm"] = ["Some city, Some country"]
source_record["dct_subject_sm"] = ["Geography", "Earth"]
source_record["gbl_resourceClass_sm"] = ["Dataset"]
assert MITAardvark.get_subjects(source_record) == [
timdex.Subject(value=["Country"], kind="DCAT; Keyword"),
timdex.Subject(value=["Political boundaries"], kind="DCAT; Theme"),
timdex.Subject(value=["Some city, Some country"], kind="Dublin Core; Spatial"),
timdex.Subject(value=["Geography"], kind="Dublin Core; Subject"),
timdex.Subject(value=["Earth"], kind="Dublin Core; Subject"),
timdex.Subject(value=["Dataset"], kind="Subject scheme not provided"),
]


def test_aardvark_get_subjects_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dcat_keyword_sm"] = []
source_record["dcat_theme_sm"] = []
source_record["dct_spatial_sm"] = []
source_record["dct_subject_sm"] = []
source_record["gbl_resourceClass_sm"] = []
assert MITAardvark.get_subjects(source_record) is None


def test_aardvark_get_subjects_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_subjects(source_record) is None


def test_aardvark_get_summary_success():
source_record = create_aardvark_source_record_stub()
source_record["dct_description_sm"] = ["A description"]
assert MITAardvark.get_summary(source_record) == ["A description"]


def test_aardvark_get_summary_transforms_correctly_if_fields_blank():
source_record = create_aardvark_source_record_stub()
source_record["dct_description_sm"] = []
assert MITAardvark.get_summary(source_record) is None


def test_aardvark_get_summary_transforms_correctly_if_fields_missing():
source_record = create_aardvark_source_record_stub()
assert MITAardvark.get_summary(source_record) is None
Loading
Loading