Skip to content

Commit

Permalink
Merge branch 'Health-RI-healthdcat_ap'
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Jan 9, 2025
2 parents 868b81e + d334bb3 commit f9cd102
Show file tree
Hide file tree
Showing 30 changed files with 2,072 additions and 217 deletions.
1 change: 1 addition & 0 deletions ckanext/dcat/profiles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@
from .euro_dcat_ap_3 import EuropeanDCATAP3Profile
from .dcat_us_3 import DCATUS3Profile
from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
from .euro_health_dcat_ap import EuropeanHealthDCATAPProfile
from .schemaorg import SchemaOrgProfile
14 changes: 7 additions & 7 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
import json
from urllib.parse import quote

from ckan.lib.helpers import resource_formats
from ckan.model.license import LicenseRegister
from ckantoolkit import ObjectNotFound, asbool, aslist, config, get_action, url_for
from dateutil.parser import parse as parse_date
from rdflib import term, URIRef, BNode, Literal
from rdflib.namespace import Namespace, RDF, XSD, SKOS, RDFS, ORG
from geomet import wkt, InvalidGeoJSONException
from geomet import InvalidGeoJSONException, wkt
from rdflib import BNode, Literal, URIRef, term
from rdflib.namespace import ORG, RDF, RDFS, SKOS, XSD, Namespace

from ckantoolkit import config, url_for, asbool, aslist, get_action, ObjectNotFound
from ckan.model.license import LicenseRegister
from ckan.lib.helpers import resource_formats
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
from ckanext.dcat.validators import is_year, is_year_month, is_date
from ckanext.dcat.validators import is_date, is_year, is_year_month

CNT = Namespace("http://www.w3.org/2011/content#")
DCT = Namespace("http://purl.org/dc/terms/")
Expand Down
86 changes: 85 additions & 1 deletion ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from rdflib import URIRef, BNode, Literal
from rdflib import URIRef, BNode, Literal, term
from .base import RDFProfile, CleanedURIRef, URIRefOrLiteral
from .base import (
RDF,
Expand All @@ -10,6 +10,7 @@
FOAF,
SKOS,
LOCN,
RDFS,
)


Expand Down Expand Up @@ -118,6 +119,11 @@ def _parse_list_value(data_dict, field_name):
if agents:
dataset_dict[key] = agents

# Add any qualifiedRelations
qual_relations = self._relationship_details(dataset_ref, DCAT.qualifiedRelation)
if qual_relations:
dataset_dict["qualified_relation"] = qual_relations

# Repeating subfields: resources
for schema_field in self._dataset_schema["resource_fields"]:
if "repeating_subfields" in schema_field:
Expand Down Expand Up @@ -227,6 +233,10 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
spatial_ref, field[1], item[field[0]]
)

self._add_relationship(
dataset_ref, dataset_dict, "qualified_relation", DCAT.qualifiedRelation
)

resources = dataset_dict.get("resources", [])
for resource in resources:
if resource.get("access_services"):
Expand Down Expand Up @@ -292,6 +302,80 @@ def _add_agents(
_type=URIRefOrLiteral,
)

def _relationship_details(self, subject, predicate):
"""
Returns a list of dicts with details about a dcat:Relationship property, e.g.
dcat:qualifiedRelation
Both subject and predicate must be rdflib URIRef or BNode objects
Returns keys for uri, role, and relation with the values set to
an empty string if they could not be found.
"""

relations = []
for relation in self.g.objects(subject, predicate):
relation_details = {}
relation_details["uri"] = (
str(relation) if isinstance(relation, term.URIRef) else ""
)
relation_details["role"] = self._object_value(relation, DCAT.hadRole)
relation_details["relation"] = self._object_value(relation, DCT.relation)
relations.append(relation_details)

return relations

def _add_relationship(
self,
dataset_ref,
dataset_dict,
relation_key,
rdf_predicate,
):
"""
Adds one or more Relationships to the RDF graph.
:param dataset_ref: The RDF reference of the dataset
:param dataset_dict: The dataset dictionary containing agent information
:param relation_key: field name in the CKAN dict (.e.g. "qualifiedRelation")
:param rdf_predicate: The RDF predicate (DCAT.qualifiedRelation)
"""
relation = dataset_dict.get(relation_key)
if (
isinstance(relation, list)
and len(relation)
and self._not_empty_dict(relation[0])
):
relations = relation

for relation in relations:

agent_uri = relation.get("uri")
if agent_uri:
agent_ref = CleanedURIRef(agent_uri)
else:
agent_ref = BNode()

self.g.add((agent_ref, RDF.type, DCAT.Relationship))
self.g.add((dataset_ref, rdf_predicate, agent_ref))

self._add_triple_from_dict(
relation,
agent_ref,
DCT.relation,
"relation",
_type=URIRefOrLiteral,
_class=RDFS.Resource,
)
self._add_triple_from_dict(
relation,
agent_ref,
DCAT.hadRole,
"role",
_type=URIRefOrLiteral,
_class=DCAT.Role,
)

@staticmethod
def _not_empty_dict(data_dict):
return any(data_dict.values())
152 changes: 152 additions & 0 deletions ckanext/dcat/profiles/euro_health_dcat_ap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from rdflib import XSD, Literal, URIRef
from rdflib.namespace import Namespace

from ckanext.dcat.profiles.base import URIRefOrLiteral
from ckanext.dcat.profiles.euro_dcat_ap_3 import EuropeanDCATAP3Profile

# HealthDCAT-AP namespace. Note: not finalized yet
HEALTHDCATAP = Namespace("http://healthdataportal.eu/ns/health#")

# Data Privacy Vocabulary namespace
DPV = Namespace("https://w3id.org/dpv#")

namespaces = {
"healthdcatap": HEALTHDCATAP,
"dpv": DPV,
}


class EuropeanHealthDCATAPProfile(EuropeanDCATAP3Profile):
"""
A profile implementing HealthDCAT-AP, a health-related extension of the DCAT
application profile for sharing information about Catalogues containing Datasets
and Data Services descriptions in Europe.
"""

def parse_dataset(self, dataset_dict, dataset_ref):
# Call super method for DCAT-AP 3 properties
dataset_dict = super(EuropeanHealthDCATAPProfile, self).parse_dataset(
dataset_dict, dataset_ref
)

dataset_dict = self._parse_health_fields(dataset_dict, dataset_ref)

return dataset_dict

def _parse_health_fields(self, dataset_dict, dataset_ref):
self.__parse_healthdcat_stringvalues(dataset_dict, dataset_ref)

self.__parse_healthdcat_intvalues(dataset_dict, dataset_ref)

# Add the HDAB. There should only ever be one but you never know
agents = self._agents_details(dataset_ref, HEALTHDCATAP.hdab)
if agents:
dataset_dict["hdab"] = agents

# Retention period
retention_start, retention_end = self._time_interval(
dataset_ref, HEALTHDCATAP.retentionPeriod, dcat_ap_version=2
)
retention_dict = {}
if retention_start is not None:
retention_dict["start"] = retention_start
if retention_end is not None:
retention_dict["end"] = retention_end
if retention_dict:
dataset_dict["retention_period"] = [retention_dict]

return dataset_dict

def __parse_healthdcat_intvalues(self, dataset_dict, dataset_ref):
for key, predicate in (
("min_typical_age", HEALTHDCATAP.minTypicalAge),
("max_typical_age", HEALTHDCATAP.maxTypicalAge),
("number_of_records", HEALTHDCATAP.numberOfRecords),
("number_of_unique_individuals", HEALTHDCATAP.numberOfUniqueIndividuals),
):
value = self._object_value_int(dataset_ref, predicate)
# A zero value evaluates as False but is definitely not a None
if value is not None:
dataset_dict[key] = value

def __parse_healthdcat_stringvalues(self, dataset_dict, dataset_ref):
for (key, predicate,) in (
("analytics", HEALTHDCATAP.analytics),
("code_values", HEALTHDCATAP.hasCodeValues),
("coding_system", HEALTHDCATAP.hasCodingSystem),
("health_category", HEALTHDCATAP.healthCategory),
("health_theme", HEALTHDCATAP.healthTheme),
("legal_basis", DPV.hasLegalBasis),
("personal_data", DPV.hasPersonalData),
("population_coverage", HEALTHDCATAP.populationCoverage),
("publisher_note", HEALTHDCATAP.publisherNote),
("publisher_type", HEALTHDCATAP.publisherType),
("purpose", DPV.hasPurpose),
):
values = self._object_value_list(dataset_ref, predicate)
if values:
dataset_dict[key] = values

def graph_from_dataset(self, dataset_dict, dataset_ref):
super().graph_from_dataset(dataset_dict, dataset_ref)
for prefix, namespace in namespaces.items():
self.g.bind(prefix, namespace)

# key, predicate, fallbacks, _type, _class
items = [
("analytics", HEALTHDCATAP.analytics, None, URIRefOrLiteral),
("code_values", HEALTHDCATAP.hasCodeValues, None, URIRefOrLiteral),
("coding_system", HEALTHDCATAP.hasCodingSystem, None, URIRefOrLiteral),
("health_category", HEALTHDCATAP.healthCategory, None, URIRefOrLiteral),
("health_theme", HEALTHDCATAP.healthCategory, None, URIRefOrLiteral),
("legal_basis", DPV.hasLegalBasis, None, URIRefOrLiteral),
(
"population_coverage",
HEALTHDCATAP.populationCoverage,
None,
URIRefOrLiteral,
),
("personal_data", DPV.hasPersonalData, None, URIRef),
("publisher_note", HEALTHDCATAP.publisherNote, None, URIRefOrLiteral),
("publisher_type", HEALTHDCATAP.publisherType, None, URIRefOrLiteral),
("purpose", DPV.hasPurpose, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

items = [
("min_typical_age", HEALTHDCATAP.minTypicalAge),
("max_typical_age", HEALTHDCATAP.maxTypicalAge),
("number_of_records", HEALTHDCATAP.numberOfRecords),
("number_of_unique_individuals", HEALTHDCATAP.numberOfUniqueIndividuals),
]
for key, predicate in items:
self._add_nonneg_integer_triple(dataset_dict, dataset_ref, key, predicate)

self._add_agents(dataset_ref, dataset_dict, "hdab", HEALTHDCATAP.hdab)

def _add_nonneg_integer_triple(self, dataset_dict, dataset_ref, key, predicate):
"""
Adds non-negative integers to the Dataset graph (xsd:nonNegativeInteger)
dataset_ref: subject of Graph
key: scheming key in CKAN
predicate: predicate to use
"""
value = self._get_dict_value(dataset_dict, key)

if value:
try:
if int(value) < 0:
raise ValueError("Not a non-negative integer")
self.g.add(
(
dataset_ref,
predicate,
Literal(int(value), datatype=XSD.nonNegativeInteger),
)
)
except (ValueError, TypeError):
self.g.add((dataset_ref, predicate, Literal(value)))

def graph_from_catalog(self, catalog_dict, catalog_ref):
super().graph_from_catalog(catalog_dict, catalog_ref)
17 changes: 17 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,23 @@ dataset_fields:
help_inline: true
help_text: This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset.

- field_name: qualified_relation
label: Qualified relation
repeating_label: Relationship
repeating_subfields:

- field_name: uri
label: URI

- field_name: relation
label: Relation
help_text: The resource related to the source resource.

- field_name: role
label: Role
help_text: The function of an entity or agent with respect to another entity or resource.
help_text: A description of a relationship with another resource.

#- field_name: hvd_category
# label: HVD Category
# preset: multiple_text
Expand Down
17 changes: 17 additions & 0 deletions ckanext/dcat/schemas/dcat_us_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,23 @@ dataset_fields:
- field_name: license
label: License

- field_name: qualified_relation
label: Qualified relation
repeating_label: Relationship
repeating_subfields:

- field_name: uri
label: URI

- field_name: relation
label: Relation
help_text: The resource related to the source resource.

- field_name: role
label: Role
help_text: The function of an entity or agent with respect to another entity or resource.
help_text: A description of a relationship with another resource.

# Note: if not provided, this will be autogenerated
- field_name: uri
label: URI
Expand Down
Loading

0 comments on commit f9cd102

Please sign in to comment.