From f23669878ea9d87a4929d483217f4a61959fd9ea Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sun, 15 Dec 2024 19:00:44 +0100 Subject: [PATCH 01/59] Updated dataset, including the following changes: - Allow to add other types of entries to the triplestore that are not datasets. Ex: samples, models, instruments, people, projects... - Renamed list_data_iris() to search_iris(). It can now be use to search for all types of entries. - Renamed prepare() to as_jsonld() and made it part of the public API --- tests/dataset/test_dataset.py | 14 ++-- tests/input/semdata.yaml | 12 ++-- tripper/context/0.2/context.json | 4 +- tripper/dataset/__init__.py | 3 +- tripper/dataset/dataset.py | 120 +++++++++++++++++++------------ 5 files changed, 91 insertions(+), 62 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4aa8fbdb..105db426 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -116,12 +116,7 @@ def test_datadoc(): # pylint: disable=too-many-statements from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore - from tripper.dataset import ( - list_dataset_iris, - load_dict, - save_datadoc, - save_dict, - ) + from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris pytest.importorskip("dlite") pytest.importorskip("rdflib") @@ -188,20 +183,19 @@ def test_datadoc(): # Test searching the triplestore SAMPLE = ts.namespaces["sample"] - datasets = list_dataset_iris(ts) + datasets = search_iris(ts) named_datasets = { SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"], SEMDATA["SEM_cement_batch2/77600-23-001"], SEMDATA["SEM_cement_batch2"], - SAMPLE["SEM_cement_batch2/77600-23-001"], } assert not named_datasets.difference(datasets) - assert set(list_dataset_iris(ts, creator="Sigurd Wenner")) == { + assert set(search_iris(ts, creator="Sigurd Wenner")) == { SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"], SEMDATA["SEM_cement_batch2/77600-23-001"], SEMDATA["SEM_cement_batch2"], } - assert set(list_dataset_iris(ts, _type=CHAMEO.Sample)) == { + assert set(search_iris(ts, type=CHAMEO.Sample)) == { SAMPLE["SEM_cement_batch2/77600-23-001"], } diff --git a/tests/input/semdata.yaml b/tests/input/semdata.yaml index 0e99919f..2d1da201 100644 --- a/tests/input/semdata.yaml +++ b/tests/input/semdata.yaml @@ -1,4 +1,5 @@ --- + # This extends the list of prefixes that are already defined in the context prefixes: sem: https://w3id.com/emmo/domain/sem/0.1# @@ -62,10 +63,6 @@ datasets: downloadURL: sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2 mediaType: inode/directory - - "@id": sample:SEM_cement_batch2/77600-23-001 - "@type": chameo:Sample - title: Series for SEM images for sample 77600-23-001. - parsers: - "@id": parser:sem_hitachi @@ -81,3 +78,10 @@ generators: generatorType: application/vnd.dlite-generate configuration: driver: hitachi + + +# Other entities, like samples, instruments, persons, models etc... +other_entries: + - "@id": sample:SEM_cement_batch2/77600-23-001 + "@type": chameo:Sample + title: Series for SEM images for sample 77600-23-001. diff --git a/tripper/context/0.2/context.json b/tripper/context/0.2/context.json index d5903ba9..3f658c0d 100644 --- a/tripper/context/0.2/context.json +++ b/tripper/context/0.2/context.json @@ -32,8 +32,8 @@ "hasCurrentVersion": "dcat:hasCurrentVersion", "hasVersion": "dcat:hasVersion", "inSeries": { - "@id" : "dcat:inSeries", - "@type" : "@id" + "@id": "dcat:inSeries", + "@type": "@id" }, "keyword": "dcat:keyword", "landingPage": "dcat:landingPage", diff --git a/tripper/dataset/__init__.py b/tripper/dataset/__init__.py index 0a3a5088..d6435b8d 100644 --- a/tripper/dataset/__init__.py +++ b/tripper/dataset/__init__.py @@ -2,12 +2,13 @@ from .dataaccess import load, save from .dataset import ( + as_jsonld, get_jsonld_context, get_partial_pipeline, get_prefixes, - list_dataset_iris, load_dict, read_datadoc, save_datadoc, save_dict, + search_iris, ) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 0387328d..ba4a0fde 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -9,12 +9,13 @@ - `save_datadoc()`: Save documentation from YAML file to the triplestore. Functions for searching the triplestore: - - `list_dataset_iris()`: Get IRIs of matching datasets. + - `search_iris()`: Get IRIs of matching entries in the triplestore. Functions for working with the dict-representation: - `read_datadoc()`: Read documentation from YAML file and return it as dict. - `save_dict()`: Save dict documentation to the triplestore. - `load_dict()`: Load dict documentation from the triplestore. + - `as_jsonld()`: Return the dict as JSON-LD (represented as a Python dict) Functions for interaction with OTEAPI: - `get_partial_pipeline()`: Returns a OTELib partial pipeline. @@ -28,7 +29,6 @@ """ -# pylint: enable=line-too-long # pylint: disable=invalid-name,redefined-builtin,import-outside-toplevel import functools import io @@ -41,7 +41,7 @@ import requests import yaml # type: ignore -from tripper import DCAT, EMMO, OTEIO, RDF, Triplestore +from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore from tripper.utils import AttrDict, as_python if TYPE_CHECKING: # pragma: no cover @@ -90,6 +90,12 @@ "datadoc_label": "datasets", "@type": [DCAT.Dataset, EMMO.DataSet], }, + "entry": { + # General datacatalog entry that is not one of the above + # Ex: samples, instruments, models, people, projects, ... + "datadoc_label": "other_entries", # XXX better label? + "@type": OWL.NamedIndividual, + }, } @@ -120,14 +126,15 @@ def save_dict( Notes: The keys in `dct` and `kwargs` may be either properties defined in the - [JSON-LD context](https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json) - or one of the following special keywords: + [JSON-LD context] or one of the following special keywords: - "@id": Dataset IRI. Must always be given. - "@type": IRI of the ontology class for this type of data. For datasets, it is typically used to refer to a specific subclass of `emmo:DataSet` that provides a semantic description of this dataset. + References: + [JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json """ if "@id" not in dct: raise ValueError("`dct` must have an '@id' key") @@ -136,7 +143,7 @@ def save_dict( if prefixes: all_prefixes.update(prefixes) - d = prepare(type=type, dct=dct, prefixes=all_prefixes, **kwargs) + d = as_jsonld(dct=dct, type=type, prefixes=all_prefixes, **kwargs) # Bind prefixes for prefix, ns in all_prefixes.items(): @@ -199,8 +206,7 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None: except ( dlite.DLiteMissingInstanceError # pylint: disable=no-member ): - # __FIXME__: check session whether want to warn or re-reise - # in this case + # __FIXME__: check session whether to warn or re-reise warnings.warn(f"cannot load datamodel: {uri}") else: add_dataset(ts, dm) @@ -476,7 +482,7 @@ def save_datadoc( for spec in dicttypes.values(): label = spec["datadoc_label"] for dct in get(d, label): - dct = prepare(types[label], dct, prefixes=prefixes) + dct = as_jsonld(dct=dct, type=types[label], prefixes=prefixes) f = io.StringIO(json.dumps(dct)) with Triplestore(backend="rdflib") as ts2: ts2.parse(f, format="json-ld") @@ -505,52 +511,65 @@ def prepare_datadoc(datadoc: dict) -> dict: for type, spec in dicttypes.items(): label = spec["datadoc_label"] for i, dct in enumerate(get(d, label)): - d[label][i] = prepare(type, dct, prefixes=d.prefixes) + d[label][i] = as_jsonld(dct=dct, type=type, prefixes=d.prefixes) return d -def prepare( - type: str, dct: dict, prefixes: dict, _recur: bool = False, **kwargs +def as_jsonld( + dct: dict, + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + _entryid: "Optional[str]" = None, + **kwargs, ) -> dict: - """Return an updated copy of dict `dct` with additional key-value - pairs needed for serialisation to RDF. + """Return an updated copy of dict `dct` as valid JSON-LD. Arguments: - type: Type of dict to prepare. Should be one of: "dataset", - "distribution", "parser" or "generator". dct: Dict to return an updated copy of. + type: Type of dict to prepare. Should either be one of the + pre-defined names: "dataset", "distribution", "accessService", + "parser" and "generator" or an IRI to a class in an ontology. + Defaults to "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. - _recur: Whether this function is called recursively. Intended for - internal use. + _entryid: Id of base entry that is documented. Intended for + internal use only. kwargs: Additional keyword arguments to add to the returned dict. A leading underscore in a key will be translated to a - leading "@"-sign. For example, "@id=..." may be provided - as "_id=...". + leading "@"-sign. For example, "@id" or "@context" may be + provided as "_id" or "_context", respectively. + Returns: - An updated copy of `dct`. + An updated copy of `dct` as valid JSON-LD. """ # pylint: disable=too-many-branches - if type not in dicttypes: - raise ValueError( - f"`type` must be one of: {', '.join(dicttypes.keys())}. " - f"Got: '{type}'" - ) - spec = dicttypes[type] - d = AttrDict() - if not _recur: + if not _entryid: d["@context"] = CONTEXT_URL - add(d, "@type", spec["@type"]) # get type at top - d.update(dct) - add(d, "@type", spec["@type"]) # readd type if overwritten + + if type: + t = dicttypes[type]["@type"] if type in dicttypes else type + add(d, "@type", t) # get type at top + d.update(dct) + add(d, "@type", t) # readd type if overwritten + else: + d.update(dct) for k, v in kwargs.items(): key = f"@{k[1:]}" if re.match("^_([^_]|([^_].*[^_]))$", k) else k add(d, key, v) + if "@id" not in d and not _entryid: + raise ValueError("Missing '@id' in dict to document") + + if not _entryid: + _entryid = d["@id"] + + if "@type" not in d: + warnings.warn(f"Missing '@type' in dict to document: {_entryid}") + all_prefixes = get_prefixes() if prefixes: all_prefixes.update(prefixes) @@ -584,9 +603,11 @@ def prepare( if isinstance(e, str): v[i] = expand_iri(e, all_prefixes) elif isinstance(e, dict) and k in nested: - v[i] = prepare(k, e, prefixes=prefixes) + v[i] = as_jsonld( + e, k, _entryid=_entryid, prefixes=prefixes + ) elif isinstance(v, dict) and k in nested: - d[k] = prepare(k, v, prefixes=prefixes) + d[k] = as_jsonld(v, k, _entryid=_entryid, prefixes=prefixes) return d @@ -711,31 +732,42 @@ def get_partial_pipeline( return pipeline -def list_dataset_iris(ts: Triplestore, **kwargs): - """Return a list of IRIs for all datasets matching a set of criterias - specified by `kwargs`. +def search_iris(ts: Triplestore, type=DCAT.Dataset, **kwargs): + """Return a list of IRIs for all entries of the given type. + Additional matching criterias can be specified by `kwargs`. + Arguments: ts: Triplestore to search. + type: Search for entries that are individuals of the class with + this IRI. The default is `dcat:Dataset`. kwargs: Match criterias. Examples: List all dataset IRIs: - list_dataset_iris(ts) + search_iris(ts) List IRIs of all datasets with John Doe as `contactPoint`: - list_dataset_iris(ts, contactPoint="John Doe") + search_iris(ts, contactPoint="John Doe") + + List IRIs of all samples: - List IRIs of all datasets with John Doe as `contactPoint` AND that are + search_iris(ts, type=CHAMEO.Sample) + + List IRIs of all datasets with John Doe as `contactPoint` AND are measured on a given sample: - list_dataset_iris( + search_iris( ts, contactPoint="John Doe", fromSample=SAMPLE.batch2/sample3 ) """ crit = [] + + if type: + crit.append(f" ?iri rdf:type <{type}> .") + expanded = {v: k for k, v in get_shortnames().items()} for k, v in kwargs.items(): key = f"@{k[1:]}" if k.startswith("_") else k @@ -748,14 +780,12 @@ def list_dataset_iris(ts: Triplestore, **kwargs): ) else: value = v - crit.append(f" ?dataset <{predicate}> {value} .") + crit.append(f" ?iri <{predicate}> {value} .") criterias = "\n".join(crit) query = f""" PREFIX rdf: <{RDF}> - PREFIX dcat: <{DCAT}> - SELECT ?dataset + SELECT ?iri WHERE {{ - ?dataset rdf:type dcat:Dataset . {criterias} }} """ From 94fa59a0788f49a33964aad14d0008b91ea4cf18 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 00:18:00 +0100 Subject: [PATCH 02/59] Added new TableDoc class providing a table interface for data documentation. --- docs/api_reference/dataset/tabledoc.md | 3 + pyproject.toml | 7 ++- tests/dataset/dataset_paths.py | 12 ++++ tests/dataset/test_dataaccess.py | 16 ++---- tests/dataset/test_dataset.py | 47 +++++++++------ tests/dataset/test_tabledoc.py | 79 ++++++++++++++++++++++++++ tripper/dataset/__init__.py | 1 + tripper/dataset/dataaccess.py | 4 +- tripper/dataset/dataset.py | 79 +++++++++++++++++++++++--- tripper/dataset/tabledoc.py | 68 ++++++++++++++++++++++ 10 files changed, 275 insertions(+), 41 deletions(-) create mode 100644 docs/api_reference/dataset/tabledoc.md create mode 100644 tests/dataset/dataset_paths.py create mode 100644 tests/dataset/test_tabledoc.py create mode 100644 tripper/dataset/tabledoc.py diff --git a/docs/api_reference/dataset/tabledoc.md b/docs/api_reference/dataset/tabledoc.md new file mode 100644 index 00000000..f3a73929 --- /dev/null +++ b/docs/api_reference/dataset/tabledoc.md @@ -0,0 +1,3 @@ +# tabledoc + +::: tripper.dataset.tabledoc diff --git a/pyproject.toml b/pyproject.toml index d5f7f94a..21196860 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,8 @@ max-public-methods = 25 max-locals = 20 disable = [ "fixme", - "too-many-positional-arguments", + "invalid-name", + #"too-many-positional-arguments", ] good-names = [ # Default @@ -115,8 +116,8 @@ good-names = [ "s", "p", "o", # Namespaces "EX", - # dict, value, file, ... - "d", "v", "f", + # dict, value, file, keyword... + "d", "v", "f", "kw", ] [tool.pytest.ini_options] diff --git a/tests/dataset/dataset_paths.py b/tests/dataset/dataset_paths.py new file mode 100644 index 00000000..e84b2f47 --- /dev/null +++ b/tests/dataset/dataset_paths.py @@ -0,0 +1,12 @@ +"""Defines paths for tests. + +It defines some directories and some utility functions that can be used +with or without conftest. +""" + +from pathlib import Path + +testdir = Path(__file__).resolve().parent.parent +ontodir = testdir / "ontologies" +indir = testdir / "input" +outdir = testdir / "output" diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index c3a7b75d..bdc0ef45 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -2,18 +2,12 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code -from pathlib import Path - import pytest +from dataset_paths import outdir pytest.importorskip("yaml") pytest.importorskip("requests") -thisdir = Path(__file__).resolve().parent -testdir = thisdir.parent -inputdir = testdir / "input" -outputdir = testdir / "output" - # if True: def test_save_and_load(): @@ -38,7 +32,6 @@ def test_save_and_load(): # Test save dict save_dict( ts, - type="dataset", dct={ "@id": SEMDATA.img1, "distribution": { @@ -49,6 +42,7 @@ def test_save_and_load(): "format": "tiff", }, }, + type="dataset", ) newdistr = load_dict(ts, SEMDATA.img1) assert newdistr["@type"] == [DCAT.Dataset, EMMO.DataSet] @@ -57,12 +51,12 @@ def test_save_and_load(): save_dict( ts, - type="generator", dct={ "@id": GEN.sem_hitachi, "generatorType": "application/vnd.dlite-generate", "configuration": {"driver": "hitachi"}, }, + type="generator", ) # Test load dataset (this downloads an actual image from github) @@ -70,7 +64,7 @@ def test_save_and_load(): assert len(data) == 53502 # Test save dataset with anonymous distribution - newfile = outputdir / "newimage.tiff" + newfile = outdir / "newimage.tiff" newfile.unlink(missing_ok=True) buf = b"some bytes..." save( @@ -94,7 +88,7 @@ def test_save_and_load(): assert newimage.distribution.downloadURL == f"file:{newfile}" # Test save dataset with named distribution - newfile2 = outputdir / "newimage.png" + newfile2 = outdir / "newimage.png" newfile2.unlink(missing_ok=True) save( ts, diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 105db426..4e43cd10 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -2,18 +2,12 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code -from pathlib import Path - import pytest +from dataset_paths import indir pytest.importorskip("yaml") pytest.importorskip("requests") -thisdir = Path(__file__).resolve().parent -testdir = thisdir.parent -inputdir = testdir / "input" -outputdir = testdir / "output" - def test_get_jsonld_context(): """Test get_jsonld_context().""" @@ -73,12 +67,31 @@ def test_add(): from tripper.dataset.dataset import add d = {} - add(d, "a", 1) - add(d, "b", 1) - add(d, "b", 1) - add(d, "a", 2) - add(d, "a", 1) - assert d == {"a": [1, 2], "b": 1} + add(d, "a", "1") + add(d, "b", "1") + add(d, "b", "1") + add(d, "a", "2") + add(d, "a", "1") + add(d, "a", {"c": "3"}) + assert d == {"a": ["1", "2", {"c": "3"}], "b": "1"} + + +def test_addnested(): + """Test help-function addnested().""" + from tripper.dataset.dataset import addnested + from tripper.utils import AttrDict + + d = AttrDict() + addnested(d, "a.b", "1") + assert d == {"a": {"b": "1"}} + + addnested(d, "a", "2") + assert d == {"a": ["2", {"b": "1"}]} + + addnested(d, "a.b.c", {"d": "3"}) + assert d.a[0] == "2" + assert d.a[1].b[1].c == {"d": "3"} + assert d == {"a": ["2", {"b": ["1", {"c": {"d": "3"}}]}]} def test_get(): @@ -124,7 +137,7 @@ def test_datadoc(): ts = Triplestore("rdflib") # Load data documentation into triplestore - datadoc = save_datadoc(ts, inputdir / "semdata.yaml") + datadoc = save_datadoc(ts, indir / "semdata.yaml") assert isinstance(datadoc, dict) assert "@context" in datadoc @@ -167,8 +180,8 @@ def test_datadoc(): # Test save dict save_dict( ts, - "distribution", - {"@id": SEMDATA.newdistr, "format": "txt"}, + dct={"@id": SEMDATA.newdistr, "format": "txt"}, + type="distribution", prefixes={"echem": "https://w3id.org/emmo/domain/electrochemistry"}, ) newdistr = load_dict(ts, SEMDATA.newdistr) @@ -210,7 +223,7 @@ def test_pipeline(): # Prepare triplestore ts = Triplestore("rdflib") - save_datadoc(ts, inputdir / "semdata.yaml") + save_datadoc(ts, indir / "semdata.yaml") SEMDATA = ts.namespaces["semdata"] diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py new file mode 100644 index 00000000..77e81dcc --- /dev/null +++ b/tests/dataset/test_tabledoc.py @@ -0,0 +1,79 @@ +"""Test the dataset module.""" + +from tripper import Triplestore +from tripper.dataset import TableDoc + + +# if True: +def test_as_dicts(): + """Test the as_dicts() method.""" + + from tripper import DCAT, EMMO, Namespace + + ONTO = Namespace("http:/example.com/onto#") + DS = Namespace("http:/example.com/datasets#") + + td = TableDoc( + header=[ + "@id", + "@type", + "@type", + "inSeries", + "distribution.downloadURL", + ], + data=[ + ("ds:s1", "onto:T1", "onto:T2", None, "file:///data/"), + ("ds:d1", "onto:T1", None, "ds:s1", "file:///data/d1.txt"), + ("ds:d2", "onto:T2", None, "ds:s1", "file:///data/d2.txt"), + ], + prefixes={ + "onto": "http:/example.com/onto#", + "ds": "http:/example.com/datasets#", + }, + # context={ + # "ds": "http:/example.com/datasets#", + # }, + ) + + s1, d1, d2 = td.asdicts() # pylint: disable=unbalanced-tuple-unpacking + + assert s1["@id"] == DS.s1 + assert set(s1["@type"]) == { + DCAT.Dataset, + EMMO.DataSet, + ONTO.T1, + ONTO.T2, + } + assert "inSeries" not in s1 + assert s1.distribution == { + "@type": DCAT.Distribution, + "downloadURL": "file:///data/", + } + + assert d1["@id"] == DS.d1 + assert set(d1["@type"]) == { + DCAT.Dataset, + EMMO.DataSet, + ONTO.T1, + } + assert d1.inSeries == DS.s1 + assert d1.distribution == { + "@type": DCAT.Distribution, + "downloadURL": "file:///data/d1.txt", + } + + assert d2["@id"] == DS.d2 + assert set(d2["@type"]) == { + DCAT.Dataset, + EMMO.DataSet, + ONTO.T2, + } + assert d2.inSeries == DS.s1 + assert d2.distribution == { + "@type": DCAT.Distribution, + "downloadURL": "file:///data/d2.txt", + } + + ts = Triplestore(backend="rdflib") + td.save(ts) + print(ts.serialize()) diff --git a/tripper/dataset/__init__.py b/tripper/dataset/__init__.py index d6435b8d..e0b53d58 100644 --- a/tripper/dataset/__init__.py +++ b/tripper/dataset/__init__.py @@ -12,3 +12,4 @@ save_dict, search_iris, ) +from .tabledoc import TableDoc diff --git a/tripper/dataset/dataaccess.py b/tripper/dataset/dataaccess.py index 672b2a59..3e248e36 100644 --- a/tripper/dataset/dataaccess.py +++ b/tripper/dataset/dataaccess.py @@ -175,9 +175,9 @@ def save( # Update triplestore ts.add_triples(triples) if save_dataset: - save_dict(ts, "dataset", dataset, prefixes=prefixes) + save_dict(ts, dataset, "dataset", prefixes=prefixes) elif save_distribution: - save_dict(ts, "distribution", distribution, prefixes=prefixes) + save_dict(ts, distribution, "distribution", prefixes=prefixes) return dataset["@id"] diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index ba4a0fde..2bb5e6a1 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -101,8 +101,8 @@ def save_dict( ts: Triplestore, - type: str, dct: dict, + type: str = "dataset", prefixes: "Optional[dict]" = None, **kwargs, ) -> dict: @@ -111,9 +111,11 @@ def save_dict( Arguments: ts: Triplestore to save to. - type: Type of dict to save. Should be one of: "dataset", - "distribution", "parser" or "generator". dct: Dict with data to save. + type: Type of data to save. Should either be one of the + pre-defined names: "dataset", "distribution", "accessService", + "parser" and "generator" or an IRI to a class in an ontology. + Defaults to "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. kwargs: Additional keyword arguments to add to the returned dict. @@ -333,6 +335,9 @@ def get_values( return values +# TODO: update this function to take an initial argument `context`, +# which can be an URL (string), dict with raw context or a list of +# strings or dicts. @cache # type: ignore def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: """Returns the JSON-LD context as a dict. @@ -355,6 +360,8 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: return context +# TODO: update this to take an initial argument `context`. +# See get_jsonld_context() def get_prefixes(timeout: float = 5) -> dict: """Loads the JSON-LD context and returns a dict mapping prefixes to their namespace URL.""" @@ -367,6 +374,8 @@ def get_prefixes(timeout: float = 5) -> dict: return prefixes +# TODO: update this to take an initial argument `context`. +# See get_jsonld_context() def get_shortnames(timeout: float = 5) -> dict: """Loads the JSON-LD context and returns a dict mapping IRIs to their short names defined in the context.""" @@ -407,9 +416,61 @@ def add(d: dict, key: str, value: "Any") -> None: d[key] = value else: klst = d[key] if isinstance(d[key], list) else [d[key]] - vlst = value if isinstance(value, list) else [value] - v = list(set(klst).union(vlst)) - d[key] = v[0] if len(v) == 1 else sorted(v) + if isinstance(value, dict): + v = klst if value in klst else klst + [value] + else: + vlst = value if isinstance(value, list) else [value] + try: + v = list(set(klst).union(vlst)) + except TypeError: # klst contains unhashable dicts + v = klst + [x for x in vlst if x not in klst] + d[key] = ( + v[0] + if len(v) == 1 + else sorted( + # Sort dicts at end, by representing them with a huge + # unicode character + v, + key=lambda x: "\uffff" if isinstance(x, dict) else x, + ) + ) + + +def addnested(d: "Union[dict, list]", key: str, value: "Any"): + """Like add(), but allows `key` to be a dot-separated list of sub-keys. + + Each sub-key will be added to `d` as a corresponding sub-dict. + + Example: + + >>> d = {} + >>> addnested(d, "a.b.c", "val") + {'a': {'b': {'c': 'val'}}} + + """ + if "." in key: + first, rest = key.split(".", 1) + if isinstance(d, list): + for ele in d: + if isinstance(ele, dict): + addnested(ele, key, value) + break + else: + d.append(addnested({}, key, value)) + elif first in d and isinstance(d[first], (dict, list)): + addnested(d[first], rest, value) + else: + addnested(d, first, addnested(AttrDict(), rest, value)) + elif isinstance(d, list): + for ele in d: + if isinstance(ele, dict): + add(ele, key, value) + break + else: + d.append({key: value}) + else: + add(d, key, value) + return d def get( @@ -516,6 +577,8 @@ def prepare_datadoc(datadoc: dict) -> dict: return d +# TODO: update this function to correctly handle multiple contexts +# provided with the `_context` keyword argument. def as_jsonld( dct: dict, type: "Optional[str]" = "dataset", @@ -526,8 +589,8 @@ def as_jsonld( """Return an updated copy of dict `dct` as valid JSON-LD. Arguments: - dct: Dict to return an updated copy of. - type: Type of dict to prepare. Should either be one of the + dct: Dict with data documentation represent as JSON-LD. + type: Type of data to document. Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" or an IRI to a class in an ontology. Defaults to "dataset". diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py new file mode 100644 index 00000000..b1a8ef51 --- /dev/null +++ b/tripper/dataset/tabledoc.py @@ -0,0 +1,68 @@ +"""Basic interface for tabular documentation of datasets.""" + +from typing import TYPE_CHECKING + +from tripper import Triplestore +from tripper.dataset.dataset import addnested, as_jsonld, save_dict +from tripper.utils import AttrDict + +if TYPE_CHECKING: # pragma: no cover + from typing import List, Optional, Sequence, Union + + +class TableDoc: + """Representation of tabular documentation of datasets. + + Arguments: + header: Sequence of column header labels. Nested data can + be represented by dot-separated label strings (e.g. + "distribution.downloadURL") + data: Sequence of rows of data. Each row documents an entry. + type: Type of data to save (applies to all rows). Should + either be one of the pre-defined names: "dataset", + "distribution", "accessService", "parser" and "generator" + or an IRI to a class in an ontology. Defaults to + "dataset". + prefixes: Dict with prefixes in addition to those included in the + JSON-LD context. Should map namespace prefixes to IRIs. + context: Dict with user-defined JSON-LD context. + + """ + + # pylint: disable=redefined-builtin,too-few-public-methods + + def __init__( + self, + header: "Sequence[str]", + data: "Sequence[Sequence[str]]", + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + context: "Optional[Union[dict, list]]" = None, + ): + self.header = header + self.data = data + self.type = type + self.prefixes = prefixes + self.context = context + + def asdicts(self) -> "List[dict]": + """Return the table as a list of dicts.""" + kw = {"_context": self.context} if self.context else {} + + results = [] + for row in self.data: + d = AttrDict() + for i, colname in enumerate(self.header): + cell = row[i] + if cell: + addnested(d, colname, cell) + jsonld = as_jsonld( + d, type=self.type, prefixes=self.prefixes, **kw # type: ignore + ) + results.append(jsonld) + return results + + def save(self, ts: Triplestore) -> None: + """Save tabular datadocumentation to triplestore.""" + for d in self.asdicts(): + save_dict(ts, d) From 028054fad4ae6a7e0055ad746b90f200c807e965 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 00:39:21 +0100 Subject: [PATCH 03/59] Import indir/outdir inside test functions --- tests/dataset/test_dataaccess.py | 3 ++- tests/dataset/test_dataset.py | 5 ++++- tripper/dataset/dataset.py | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index bdc0ef45..0cbc7727 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -3,7 +3,6 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code import pytest -from dataset_paths import outdir pytest.importorskip("yaml") pytest.importorskip("requests") @@ -14,6 +13,8 @@ def test_save_and_load(): """Test save() and load().""" # pylint: disable=too-many-statements + from dataset_paths import outdir + from tripper import DCAT, DCTERMS, EMMO, Triplestore from tripper.dataset import load, load_dict, save, save_dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4e43cd10..1a0cffbd 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -3,7 +3,6 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code import pytest -from dataset_paths import indir pytest.importorskip("yaml") pytest.importorskip("requests") @@ -128,6 +127,8 @@ def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" # pylint: disable=too-many-statements + from dataset_paths import indir + from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris @@ -219,6 +220,8 @@ def test_pipeline(): from tripper import Triplestore otelib = pytest.importorskip("otelib") + from dataset_paths import indir + from tripper.dataset import get_partial_pipeline, save_datadoc # Prepare triplestore diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 2bb5e6a1..13fdb935 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -445,7 +445,8 @@ def addnested(d: "Union[dict, list]", key: str, value: "Any"): >>> d = {} >>> addnested(d, "a.b.c", "val") - {'a': {'b': {'c': 'val'}}} + >>> d == {'a': {'b': {'c': 'val'}}} + True """ if "." in key: From ef5239ad7e96f586c5ce970cec834cf7881864ff Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 00:56:02 +0100 Subject: [PATCH 04/59] Fixed doctest issue --- tripper/dataset/dataset.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 13fdb935..8db5b497 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -38,9 +38,6 @@ from pathlib import Path from typing import TYPE_CHECKING -import requests -import yaml # type: ignore - from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore from tripper.utils import AttrDict, as_python @@ -171,6 +168,8 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None: - data models (require that DLite is installed) """ + import requests + # Save statements and mappings statements = get_values(dct, "statements") statements.extend(get_values(dct, "mappings")) @@ -351,6 +350,8 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: fromfile: Whether to load the context from local file. """ + import requests + if fromfile: with open(CONTEXT_PATH[7:], "r", encoding="utf-8") as f: context = json.load(f)["@context"] @@ -436,16 +437,18 @@ def add(d: dict, key: str, value: "Any") -> None: ) -def addnested(d: "Union[dict, list]", key: str, value: "Any"): +def addnested( + d: "Union[dict, list]", key: str, value: "Any" +) -> "Union[dict, list]": """Like add(), but allows `key` to be a dot-separated list of sub-keys. + Returns the updated `d`. Each sub-key will be added to `d` as a corresponding sub-dict. Example: >>> d = {} - >>> addnested(d, "a.b.c", "val") - >>> d == {'a': {'b': {'c': 'val'}}} + >>> addnested(d, "a.b.c", "val") == {'a': {'b': {'c': 'val'}}} True """ @@ -508,6 +511,8 @@ def expand_iri(iri: str, prefixes: dict) -> str: def read_datadoc(filename: "Union[str, Path]") -> dict: """Read YAML data documentation and return it as a dict.""" + import yaml # type: ignore + with open(filename, "r", encoding="utf-8") as f: d = yaml.safe_load(f) return prepare_datadoc(d) From 331878a1756ed225d48fc3a2e0acdd567ccc4774 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:01:22 +0100 Subject: [PATCH 05/59] Skip test_tabledoc if rdflib isn't available --- tests/dataset/test_tabledoc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 77e81dcc..49902b69 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -1,5 +1,7 @@ """Test the dataset module.""" +import pytest + from tripper import Triplestore from tripper.dataset import TableDoc @@ -10,6 +12,8 @@ def test_as_dicts(): from tripper import DCAT, EMMO, Namespace + pytest.importorskip("rdflib") + ONTO = Namespace("http:/example.com/onto#") DS = Namespace("http:/example.com/datasets#") From 5fe9cf7c387a9fb3ccd02856c0878ef8dceb8eba Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:04:02 +0100 Subject: [PATCH 06/59] More pylint fixes... --- tests/dataset/test_dataaccess.py | 2 +- tests/dataset/test_dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index 0cbc7727..ecf98dba 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -13,7 +13,7 @@ def test_save_and_load(): """Test save() and load().""" # pylint: disable=too-many-statements - from dataset_paths import outdir + from dataset_paths import outdir # pytest: disable=import-error from tripper import DCAT, DCTERMS, EMMO, Triplestore from tripper.dataset import load, load_dict, save, save_dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 1a0cffbd..4fb5ec09 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -127,7 +127,7 @@ def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" # pylint: disable=too-many-statements - from dataset_paths import indir + from dataset_paths import indir # pytest: disable=import-error from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris @@ -220,7 +220,7 @@ def test_pipeline(): from tripper import Triplestore otelib = pytest.importorskip("otelib") - from dataset_paths import indir + from dataset_paths import indir # pytest: disable=import-error from tripper.dataset import get_partial_pipeline, save_datadoc From 4aaeed8551b56ece4c7c9af151ce3152a2779077 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:08:20 +0100 Subject: [PATCH 07/59] Placed importskip before importing EMMO --- tests/dataset/test_tabledoc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 49902b69..4a1c0613 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -2,18 +2,16 @@ import pytest -from tripper import Triplestore -from tripper.dataset import TableDoc - # if True: def test_as_dicts(): """Test the as_dicts() method.""" - from tripper import DCAT, EMMO, Namespace - pytest.importorskip("rdflib") + from tripper import DCAT, EMMO, Namespace, Triplestore + from tripper.dataset import TableDoc + ONTO = Namespace("http:/example.com/onto#") DS = Namespace("http:/example.com/datasets#") From 0f21fbbde6f6b6eaaf6583145f403fb7f841c0a7 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:12:12 +0100 Subject: [PATCH 08/59] typo --- tests/dataset/test_dataaccess.py | 2 +- tests/dataset/test_dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index ecf98dba..af058440 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -13,7 +13,7 @@ def test_save_and_load(): """Test save() and load().""" # pylint: disable=too-many-statements - from dataset_paths import outdir # pytest: disable=import-error + from dataset_paths import outdir # pylint: disable=import-error from tripper import DCAT, DCTERMS, EMMO, Triplestore from tripper.dataset import load, load_dict, save, save_dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4fb5ec09..9bdec2c6 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -127,7 +127,7 @@ def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" # pylint: disable=too-many-statements - from dataset_paths import indir # pytest: disable=import-error + from dataset_paths import indir # pylint: disable=import-error from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris @@ -220,7 +220,7 @@ def test_pipeline(): from tripper import Triplestore otelib = pytest.importorskip("otelib") - from dataset_paths import indir # pytest: disable=import-error + from dataset_paths import indir # pylint: disable=import-error from tripper.dataset import get_partial_pipeline, save_datadoc From 4cc88cb0fb119697f35184177e9dbfb697141ac4 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 15:53:34 +0100 Subject: [PATCH 09/59] Fixed pylint errors --- tripper/dataset/dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index b98eaeaa..bbb4a178 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -42,9 +42,6 @@ from pathlib import Path from typing import TYPE_CHECKING -import requests -import yaml # type: ignore - from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore from tripper.utils import AttrDict, as_python From 92b213d7b2a292b04ddbaf0921d4e046a27e95db Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 10:10:35 +0100 Subject: [PATCH 10/59] added csv file --- tests/input/semdata.csv | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/input/semdata.csv diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv new file mode 100644 index 00000000..631d9e69 --- /dev/null +++ b/tests/input/semdata.csv @@ -0,0 +1,5 @@ +@id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; +semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;…;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; +mple:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; From ae20a0a3dc36926c90511f522b47dbbce5b04259 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 10:54:48 +0100 Subject: [PATCH 11/59] Added csv parser --- tests/dataset/test_tabledoc.py | 28 ++++++++++++++++++++++++++++ tests/input/semdata.csv | 8 ++++---- tripper/dataset/tabledoc.py | 25 +++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 4a1c0613..52ea9236 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -79,3 +79,31 @@ def test_as_dicts(): ts = Triplestore(backend="rdflib") td.save(ts) print(ts.serialize()) + + +if True: + # def test_parse_csv(): + """Test parsing a csv file.""" + from dataset_paths import indir # pylint: disable=import-error + + from tripper.dataset import TableDoc + + td = TableDoc.parse_csv( + indir / "semdata.csv", + delimiter=";", + prefixes={ + "sem": "https://w3id.com/emmo/domain/sem/0.1#", + "semdata": "https://he-matchmaker.eu/data/sem/", + "sample": "https://he-matchmaker.eu/sample/", + "mat": "https://he-matchmaker.eu/material/", + "dm": "http://onto-ns.com/meta/characterisation/0.1/SEMImage#", + "parser": "http://sintef.no/dlite/parser#", + "gen": "http://sintef.no/dlite/generator#", + }, + ) + + img, series, batch, sample = td.asdicts() + assert img["@id"] == ( + "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" + "77600-23-001/77600-23-001_5kV_400x_m001" + ) diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv index 631d9e69..4df732ef 100644 --- a/tests/input/semdata.csv +++ b/tests/input/semdata.csv @@ -1,5 +1,5 @@ @id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf -semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 -semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; -semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;…;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; -mple:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; +semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; +sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index b1a8ef51..50fe86b7 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -1,5 +1,7 @@ """Basic interface for tabular documentation of datasets.""" +import csv +from pathlib import Path from typing import TYPE_CHECKING from tripper import Triplestore @@ -66,3 +68,26 @@ def save(self, ts: Triplestore) -> None: """Save tabular datadocumentation to triplestore.""" for d in self.asdicts(): save_dict(ts, d) + + @classmethod + def parse_csv( + self, + csvfile: "Union[Path, str]", + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + context: "Optional[Union[dict, list]]" = None, + dialect="excel", + **kwargs, + ) -> None: + """Parse a csv file.""" + with open(csvfile, newline="") as f: + reader = csv.reader(f, dialect=dialect, **kwargs) + header = next(reader)[0].split(reader.dialect.delimiter) + data = [row for row in reader] + return TableDoc( + header=header, + data=data, + type=type, + prefixes=prefixes, + context=context, + ) From 543e99e743876f68eaa8913c02d9df33ffc3ca07 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 10:59:01 +0100 Subject: [PATCH 12/59] Updated the test --- tests/dataset/test_tabledoc.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 52ea9236..ae7ca348 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -81,13 +81,16 @@ def test_as_dicts(): print(ts.serialize()) -if True: - # def test_parse_csv(): +#if True: +def test_parse_csv(): """Test parsing a csv file.""" from dataset_paths import indir # pylint: disable=import-error + from tripper import Triplestore from tripper.dataset import TableDoc + pytest.importorskip("rdflib") + td = TableDoc.parse_csv( indir / "semdata.csv", delimiter=";", @@ -107,3 +110,7 @@ def test_as_dicts(): "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" "77600-23-001/77600-23-001_5kV_400x_m001" ) + + ts = Triplestore(backend="rdflib") + td.save(ts) + print(ts.serialize()) From b3e3d0723f879547099aabcc45f9148e4a959700 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:59:28 +0000 Subject: [PATCH 13/59] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tests/dataset/test_tabledoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index ae7ca348..00179810 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -81,7 +81,7 @@ def test_as_dicts(): print(ts.serialize()) -#if True: +# if True: def test_parse_csv(): """Test parsing a csv file.""" from dataset_paths import indir # pylint: disable=import-error From 700c514282a8374f32dd5d858be87e466be1eb4a Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 13:17:46 +0100 Subject: [PATCH 14/59] Fixed failing tests --- tests/dataset/test_tabledoc.py | 8 +++++--- tripper/dataset/tabledoc.py | 35 +++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index ae7ca348..f1640dbc 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -81,16 +81,16 @@ def test_as_dicts(): print(ts.serialize()) -#if True: +# if True: def test_parse_csv(): """Test parsing a csv file.""" from dataset_paths import indir # pylint: disable=import-error + pytest.importorskip("rdflib") + from tripper import Triplestore from tripper.dataset import TableDoc - pytest.importorskip("rdflib") - td = TableDoc.parse_csv( indir / "semdata.csv", delimiter=";", @@ -105,7 +105,9 @@ def test_parse_csv(): }, ) + # pylint: disable=unused-variable,unbalanced-tuple-unpacking img, series, batch, sample = td.asdicts() + assert img["@id"] == ( "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" "77600-23-001/77600-23-001_5kV_400x_m001" diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 50fe86b7..1dc37098 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -69,21 +69,42 @@ def save(self, ts: Triplestore) -> None: for d in self.asdicts(): save_dict(ts, d) - @classmethod + @staticmethod def parse_csv( - self, csvfile: "Union[Path, str]", type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, - dialect="excel", + dialect: "Union[csv.Dialect, str]" = "excel", **kwargs, - ) -> None: - """Parse a csv file.""" - with open(csvfile, newline="") as f: + ) -> "TableDoc": + # pylint: disable=line-too-long + """Parse a csv file using the standard library csv module. + + Arguments: + csvfile: CSV file to parse. + type: Type of data to save (applies to all rows). Should + either be one of the pre-defined names: "dataset", + "distribution", "accessService", "parser" and "generator" + or an IRI to a class in an ontology. Defaults to + "dataset". + prefixes: Dict with prefixes in addition to those included in the + JSON-LD context. Should map namespace prefixes to IRIs. + context: Dict with user-defined JSON-LD context. + dialect: A subclass of csv.Dialect, or the name of the dialect, + specifying how the `csvfile` is formatted. For more details, + see [Dialects and Formatting Parameters]. + kwargs: Additional keyword arguments overriding individual + formatting parameters. For more details, see + [Dialects and Formatting Parameters]. + + References: + [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters + """ + with open(csvfile, encoding="utf-8") as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader)[0].split(reader.dialect.delimiter) - data = [row for row in reader] + data = list(reader) return TableDoc( header=header, data=data, From 4d7d77adc9233b39ad05dfd6fb4222feef814b94 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 13:33:10 +0100 Subject: [PATCH 15/59] Added encoding to keyword arguments --- tests/input/semdata.csv | 4 ++-- tripper/dataset/tabledoc.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv index 4df732ef..c3cf536c 100644 --- a/tests/input/semdata.csv +++ b/tests/input/semdata.csv @@ -1,5 +1,5 @@ @id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf -semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 -semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 μm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 μm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 1dc37098..46ad4313 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -75,6 +75,7 @@ def parse_csv( type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, + encoding: str = "utf-8", dialect: "Union[csv.Dialect, str]" = "excel", **kwargs, ) -> "TableDoc": @@ -91,6 +92,8 @@ def parse_csv( prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. context: Dict with user-defined JSON-LD context. + encoding: The encoding of the csv file. Note that Excel may + encode as "ISO-8859" (commonly used in 1990th). dialect: A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, see [Dialects and Formatting Parameters]. @@ -101,7 +104,7 @@ def parse_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with open(csvfile, encoding="utf-8") as f: + with open(csvfile, encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader)[0].split(reader.dialect.delimiter) data = list(reader) From 80048677f0c4612154dcca687565c90423a56eb0 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 20 Dec 2024 15:30:39 +0100 Subject: [PATCH 16/59] Strip off blanks when parsing a table. --- tripper/dataset/tabledoc.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 46ad4313..75edb181 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -28,6 +28,7 @@ class TableDoc: prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. context: Dict with user-defined JSON-LD context. + strip: Whether to strip leading and trailing whitespaces from cells. """ @@ -40,12 +41,14 @@ def __init__( type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, + strip: bool = True, ): self.header = header self.data = data self.type = type self.prefixes = prefixes self.context = context + self.strip = strip def asdicts(self) -> "List[dict]": """Return the table as a list of dicts.""" @@ -55,9 +58,11 @@ def asdicts(self) -> "List[dict]": for row in self.data: d = AttrDict() for i, colname in enumerate(self.header): - cell = row[i] + cell = row[i].strip() if self.strip else row[i] if cell: - addnested(d, colname, cell) + addnested( + d, colname.strip() if self.strip else colname, cell + ) jsonld = as_jsonld( d, type=self.type, prefixes=self.prefixes, **kw # type: ignore ) @@ -106,8 +111,9 @@ def parse_csv( """ with open(csvfile, encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) - header = next(reader)[0].split(reader.dialect.delimiter) + header = next(reader) data = list(reader) + return TableDoc( header=header, data=data, From 731253cd16ba58525b739166892251a0d88ca8af Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 20 Dec 2024 15:39:14 +0100 Subject: [PATCH 17/59] Added extra test to ensure that all properties are parsed correctly --- tests/dataset/test_tabledoc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index f1640dbc..e9fff0c1 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -112,6 +112,10 @@ def test_parse_csv(): "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" "77600-23-001/77600-23-001_5kV_400x_m001" ) + assert img.distribution.downloadURL == ( + "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/" + "tests/input/77600-23-001_5kV_400x_m001.tif" + ) ts = Triplestore(backend="rdflib") td.save(ts) From 60b0c6d2657242d36c5cac1f979ba7172783fba6 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 20 Dec 2024 15:57:11 +0100 Subject: [PATCH 18/59] Added write_csv() method to TableDoc --- tests/dataset/test_tabledoc.py | 9 +++++++-- tripper/dataset/tabledoc.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index e9fff0c1..5020989c 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -82,15 +82,16 @@ def test_as_dicts(): # if True: -def test_parse_csv(): +def test_csv(): """Test parsing a csv file.""" - from dataset_paths import indir # pylint: disable=import-error + from dataset_paths import indir, outdir # pylint: disable=import-error pytest.importorskip("rdflib") from tripper import Triplestore from tripper.dataset import TableDoc + # Read csv file td = TableDoc.parse_csv( indir / "semdata.csv", delimiter=";", @@ -117,6 +118,10 @@ def test_parse_csv(): "tests/input/77600-23-001_5kV_400x_m001.tif" ) + # Write the table to a new csv file + td.write_csv(outdir / "semdata.csv") + + # Print serialised KB ts = Triplestore(backend="rdflib") td.save(ts) print(ts.serialize()) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 75edb181..65337d83 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -109,7 +109,7 @@ def parse_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with open(csvfile, encoding=encoding) as f: + with open(csvfile, mode="rt", encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader) data = list(reader) @@ -121,3 +121,32 @@ def parse_csv( prefixes=prefixes, context=context, ) + + def write_csv( + self, + csvfile: "Union[Path, str]", + encoding: str = "utf-8", + dialect: "Union[csv.Dialect, str]" = "excel", + **kwargs, + ) -> None: + # pylint: disable=line-too-long + """Write the table to a csv file using the standard library csv module. + + Arguments: + csvfile: CSV file to parse. + encoding: The encoding of the csv file. + dialect: A subclass of csv.Dialect, or the name of the dialect, + specifying how the `csvfile` is formatted. For more details, + see [Dialects and Formatting Parameters]. + kwargs: Additional keyword arguments overriding individual + formatting parameters. For more details, see + [Dialects and Formatting Parameters]. + + References: + [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters + """ + with open(csvfile, mode="wt", encoding=encoding) as f: + writer = csv.writer(f, dialect=dialect, **kwargs) + writer.writerow(self.header) + for row in self.data: + writer.writerow(row) From d26d92fdabfc27090558a66ed5c9362de026bb11 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:14:49 +0100 Subject: [PATCH 19/59] Save serialised documentation to turtle file. --- tests/dataset/test_tabledoc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 5020989c..2d3c8779 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -124,4 +124,5 @@ def test_csv(): # Print serialised KB ts = Triplestore(backend="rdflib") td.save(ts) + ts.serialize(outdir / "semdata.ttl") print(ts.serialize()) From 66b9dd75d0291359f9507033b9a8159cf0a8d320 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:23:13 +0100 Subject: [PATCH 20/59] Apply suggestions from code review Co-authored-by: Tor S. Haugland --- pyproject.toml | 1 - tests/dataset/test_tabledoc.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 21196860..0398f0a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,6 @@ max-locals = 20 disable = [ "fixme", "invalid-name", - #"too-many-positional-arguments", ] good-names = [ # Default diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 4a1c0613..da74203c 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -1,4 +1,4 @@ -"""Test the dataset module.""" +"""Test the TableDoc class.""" import pytest From 575f09d2b13deb60ce8ae9addeb734650223ca31 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:30:22 +0100 Subject: [PATCH 21/59] Apply suggestions from code review Co-authored-by: Tor S. Haugland --- tripper/dataset/tabledoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index b1a8ef51..9fd5d988 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -47,7 +47,7 @@ def __init__( def asdicts(self) -> "List[dict]": """Return the table as a list of dicts.""" - kw = {"_context": self.context} if self.context else {} + kw = {"@context": self.context} if self.context else {} results = [] for row in self.data: From f45376db770b151d6ea7eb4de901fb646ef3dc43 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:49:06 +0100 Subject: [PATCH 22/59] Added a clarifying comment as a responce to review comment by @torhaugl. --- tests/dataset/test_tabledoc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index da74203c..278e7881 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -32,6 +32,8 @@ def test_as_dicts(): "onto": "http:/example.com/onto#", "ds": "http:/example.com/datasets#", }, + # Replace the "ds" prefix above with this, once the "context" keyword + # argument is fully implemented. # context={ # "ds": "http:/example.com/datasets#", # }, From 1752db0016a521ad6f0b3e9a2bd4ab5997482181 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 15:43:00 +0100 Subject: [PATCH 23/59] Fix test failure --- tripper/dataset/tabledoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 0711565b..6dbf8b32 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -58,7 +58,7 @@ def asdicts(self) -> "List[dict]": for row in self.data: d = AttrDict() for i, colname in enumerate(self.header): - cell = row[i].strip() if self.strip else row[i] + cell = row[i].strip() if row[i] and self.strip else row[i] if cell: addnested( d, colname.strip() if self.strip else colname, cell From 9b53f5e3eccfe6c7c326b18207a49dd53fdd4c15 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 18:30:53 +0100 Subject: [PATCH 24/59] Added `context` argument to get_jsonld_context() --- tests/dataset/test_dataset.py | 9 +++++++ tripper/dataset/dataset.py | 48 ++++++++++++++++++++++------------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 9bdec2c6..240d1bf7 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -11,6 +11,7 @@ def test_get_jsonld_context(): """Test get_jsonld_context().""" from tripper.dataset import get_jsonld_context + from tripper.dataset.dataset import CONTEXT_URL context = get_jsonld_context() assert isinstance(context, dict) @@ -21,6 +22,14 @@ def test_get_jsonld_context(): online_context = get_jsonld_context(fromfile=False) assert online_context == context + # Test context argument + context2 = get_jsonld_context(context=CONTEXT_URL) + assert context2 == context + + assert "newkey" not in context + context3 = get_jsonld_context(context={"newkey": "onto:newkey"}) + assert context3["newkey"] == "onto:newkey" + def test_get_prefixes(): """Test get_prefixes().""" diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index bbb4a178..4e779170 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -26,15 +26,12 @@ --- -__TODO__: Update the URL to the JSON-LD context when merged to master - [DCAT]: https://www.w3.org/TR/vocab-dcat-3/ [JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/dataset/tripper/context/0.2/context.json """ # pylint: disable=invalid-name,redefined-builtin,import-outside-toplevel -import functools import io import json import re @@ -50,18 +47,12 @@ from tripper.utils import Triple -# Cache decorator -cache = ( - functools.cache # new in Python 3.9, smaller and faster than lru_cache() - if hasattr(functools, "cache") - else functools.lru_cache(maxsize=1) -) # Local path (for fast loading) and URL to the JSON-LD context CONTEXT_PATH = ( Path(__file__).parent.parent / "context" / "0.2" / "context.json" ).as_uri() -CONTEXT_URL = ( # __TODO__: Update URL when merged to master +CONTEXT_URL = ( "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/" "master/tripper/context/0.2/context.json" ) @@ -339,11 +330,11 @@ def get_values( return values -# TODO: update this function to take an initial argument `context`, -# which can be an URL (string), dict with raw context or a list of -# strings or dicts. -@cache # type: ignore -def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: +def get_jsonld_context( + context: "Optional[Union[str, dict, Sequence[Union[str, dict]]]]" = None, + timeout: float = 5, + fromfile: bool = True, +) -> dict: """Returns the JSON-LD context as a dict. The JSON-LD context maps all the keywords that can be used as keys @@ -351,6 +342,10 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: common vocabularies and ontologies. Arguments: + context: Additional user-defined context that should be returned + on top of the default context. It may be a string with an URL + to the user-defined context, a dict with the user-defined context + or a sequence of strings and dicts. timeout: Number of seconds before timing out. fromfile: Whether to load the context from local file. @@ -359,11 +354,28 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: if fromfile: with open(CONTEXT_PATH[7:], "r", encoding="utf-8") as f: - context = json.load(f)["@context"] + ctx = json.load(f)["@context"] else: r = requests.get(CONTEXT_URL, allow_redirects=True, timeout=timeout) - context = json.loads(r.content)["@context"] - return context + ctx = json.loads(r.content)["@context"] + + if isinstance(context, (str, dict)): + context = [context] + + if context: + for token in context: + if isinstance(token, str): + r = requests.get(token, allow_redirects=True, timeout=timeout) + ctx.update(json.loads(r.content)["@context"]) + elif isinstance(token, dict): + ctx.update(token) + else: + raise TypeError( + "`context` must be a string (URL), dict or a sequence of " + f"strings and dicts. Not '{type(token)}'" + ) + + return ctx # TODO: update this to take an initial argument `context`. From 26ee5188d73577455014de06dbff96a8fc115bdc Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 18:41:01 +0100 Subject: [PATCH 25/59] Added `context` argument to get_prefixes() --- tests/dataset/test_dataset.py | 4 ++++ tripper/dataset/dataset.py | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 240d1bf7..4d833a9b 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -39,6 +39,10 @@ def test_get_prefixes(): assert prefixes["dcat"] == "http://www.w3.org/ns/dcat#" assert prefixes["emmo"] == "https://w3id.org/emmo#" + # Test context argument + prefixes2 = get_prefixes(context={"onto": "http://example.com/onto#"}) + assert prefixes2["onto"] == "http://example.com/onto#" + def test_get_shortnames(): """Test get_shortnames().""" diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 4e779170..2e4a3398 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -378,15 +378,22 @@ def get_jsonld_context( return ctx -# TODO: update this to take an initial argument `context`. -# See get_jsonld_context() -def get_prefixes(timeout: float = 5) -> dict: +def get_prefixes( + context: "Optional[Union[str, dict, Sequence[Union[str, dict]]]]" = None, + timeout: float = 5, + fromfile: bool = True, +) -> dict: """Loads the JSON-LD context and returns a dict mapping prefixes to - their namespace URL.""" - context = get_jsonld_context(timeout=timeout) + their namespace URL. + + For arguments, see get_jsonld_context(). + """ + ctx = get_jsonld_context( + context=context, timeout=timeout, fromfile=fromfile + ) prefixes = { k: v - for k, v in context.items() + for k, v in ctx.items() if isinstance(v, str) and v.endswith(("#", "/")) } return prefixes From 568abd79517bd9147f7f01bbb07a4ad65db535ea Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 18:47:49 +0100 Subject: [PATCH 26/59] Added `context? argument to get_shortnames() --- tripper/dataset/dataset.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 2e4a3398..022bcc3e 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -386,7 +386,7 @@ def get_prefixes( """Loads the JSON-LD context and returns a dict mapping prefixes to their namespace URL. - For arguments, see get_jsonld_context(). + Arguments are passed to `get_jsonld_context()`. """ ctx = get_jsonld_context( context=context, timeout=timeout, fromfile=fromfile @@ -399,16 +399,23 @@ def get_prefixes( return prefixes -# TODO: update this to take an initial argument `context`. -# See get_jsonld_context() -def get_shortnames(timeout: float = 5) -> dict: +def get_shortnames( + context: "Optional[Union[str, dict, Sequence[Union[str, dict]]]]" = None, + timeout: float = 5, + fromfile: bool = True, +) -> dict: """Loads the JSON-LD context and returns a dict mapping IRIs to their - short names defined in the context.""" - context = get_jsonld_context(timeout=timeout) - prefixes = get_prefixes() + short names defined in the context. + + Arguments are passed to `get_jsonld_context()`. + """ + ctx = get_jsonld_context( + context=context, timeout=timeout, fromfile=fromfile + ) + prefixes = get_prefixes(context=ctx) shortnames = { expand_iri(v["@id"] if isinstance(v, dict) else v, prefixes): k - for k, v in context.items() + for k, v in ctx.items() if ( (isinstance(v, str) and not v.endswith(("#", "/"))) or isinstance(v, dict) From 2988a324bc612691a7462a912f9a94ef2806c1f0 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 18:52:44 +0100 Subject: [PATCH 27/59] Updated .gitignore files --- .gitignore | 1 + tests/output/.gitignore | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index c872f80e..9a0e7df5 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ dist/ # Test output route.svg +coverage.xml diff --git a/tests/output/.gitignore b/tests/output/.gitignore index c26b5163..613dbf65 100644 --- a/tests/output/.gitignore +++ b/tests/output/.gitignore @@ -3,3 +3,4 @@ *.ttl *.png *.tiff +*.csv From 841a74d05faa846a5676c1dc689232fcf9844529 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 11:34:15 +0100 Subject: [PATCH 28/59] Added documentation for the dataset sub-package --- README.md | 15 +- docs/dataset/datadoc-keywords.md | 222 ++++++++++++++++++++++++++++ docs/dataset/datadoc-prefixes.md | 26 ++++ docs/dataset/datadoc.md | 241 +++++++++++++++++++++++++++++++ docs/index.md | 15 +- mkdocs.yml | 4 + pyproject.toml | 2 +- tests/dataset/test_dataset.py | 19 ++- tests/input/openfile.txt | 1 + tests/input/semdata.yaml | 2 +- tests/test_utils.py | 81 ++++++++--- tripper/context/0.2/context.json | 44 ++++-- tripper/convert/convert.py | 2 + tripper/dataset/dataset.py | 14 +- tripper/dataset/tabledoc.py | 4 +- tripper/utils.py | 49 ++++++- 16 files changed, 679 insertions(+), 62 deletions(-) create mode 100644 docs/dataset/datadoc-keywords.md create mode 100644 docs/dataset/datadoc-prefixes.md create mode 100644 docs/dataset/datadoc.md create mode 100644 tests/input/openfile.txt diff --git a/README.md b/README.md index 61e9828f..8c791e31 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,13 @@ New namespaces can be defined with the [`tripper.Namespace`][Namespace] class. A triplestore wrapper is created with the [`tripper.Triplestore`][Triplestore] class. -Advanced features ------------------ -The submodules `mappings` and `convert` provide additional functionality beyond interfacing triplestore backends: -- **tripper.mappings**: traverse mappings stored in the triplestore and find possible mapping routes. -- **tripper.convert**: convert between RDF and other data representations. +Sub-packages +------------ +Additional functionality beyond interfacing triplestore backends is provided by specialised sub-package: + +* [tripper.dataset]: An API for data documentation. +* [tripper.mappings]: Traverse mappings stored in the triplestore and find possible mapping routes. +* [tripper.convert]: Convert between RDF and other data representations. Available backends @@ -104,6 +106,9 @@ We gratefully acknowledge the following projects for supporting the development [Tutorial]: https://emmc-asbl.github.io/tripper/latest/tutorial/ +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset +[tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings +[tripper.convert]: https://emmc-asbl.github.io/tripper/latest/api_reference/convert/convert/ [Discovery of custom backends]: https://emmc-asbl.github.io/tripper/latest/backend_discovery/ [Reference manual]: https://emmc-asbl.github.io/tripper/latest/api_reference/triplestore/ [Known issues]: https://emmc-asbl.github.io/tripper/latest/known-issues/ diff --git a/docs/dataset/datadoc-keywords.md b/docs/dataset/datadoc-keywords.md new file mode 100644 index 00000000..f8a27852 --- /dev/null +++ b/docs/dataset/datadoc-keywords.md @@ -0,0 +1,222 @@ +Predefined keywords +=================== +All keywords listed on this page (except for the special "@"-prefixed keywords) are defined in the [default JSON-LD context]. + + +Special keywords for JSON-LD +---------------------------- +See the [JSON-LD documentation] for a complete list of "@"-prefixed keywords. +Here we only list those that are commonly used for data documentation with Tripper. + +- **@context**: URL to or dict with user-defined JSON-LD context. + Used to extend the keywords listed on this page with domain- or application-specific keywords. +- **@id**: IRI of the documented resource. +- **@type**: IRI of ontological class that the resource is an individual of. + + +General properties on resources used by DCAT +-------------------------------------------- +These can also be used on datasets and distributions. +See the DCAT documentation for [dcat:Dataset] and [dcat:Distribution] for recommendations. + +- **[accessRights]**: Information about who can access the resource or an indication of its security status. +- **[conformsTo]**: An established standard to which the described resource conforms. +- **[contactPoint]**: Relevant contact information for the cataloged resource. Use of [vCard] is recommended. +- **[creator]**: The entity responsible for producing the resource. +- **[description]**: A free-text account of the resource. +- **[hasCurrentVersion]**: This resource has a more specific, versioned resource with equivalent content. +- **[hasPart]**: A related resource that is included either physically or logically in the described resource. +- **[hasPolicy]**: An ODRL conformant policy expressing the rights associated with the resource. +- **[hasVersion]**: This resource has a more specific, versioned resource. +- **[identifier]**: A unique identifier of the resource being described or cataloged. +- **[isReferencedBy]**: A related resource, such as a publication, that references, cites, or otherwise points to the cataloged resource. +- **[issued]**: Date of formal issuance (e.g., publication) of the resource. +- **[keyword]**: A keyword or tag describing the resource. +- **[landingPage]**: A Web page that can be navigated to in a Web browser to gain access to the catalog, a dataset, its distributions and/or additional information. +- **[language]**: A language of the resource. This refers to the natural language used for textual metadata (i.e., titles, descriptions, etc.) of a cataloged resource (i.e., dataset or service) or the textual values of a dataset distribution. +- **[license]**: A legal document under which the resource is made available. +- **[modified]**: Most recent date on which the resource was changed, updated or modified. +- **[publisher]**: The entity responsible for making the resource available. +- **[qualifiedAttribution]**: Link to an Agent having some form of responsibility for the resource. +- **[qualifiedRelation]**: Link to a description of a relationship with another resource. +- **[relation]**: A resource with an unspecified relationship to the cataloged resource. +- **[replaces]**: A related resource that is supplanted, displaced, or superseded by the described resource. +- **[rights]**: A statement that concerns all rights not addressed with `license` or `accessRights`, such as copyright statements. +- **[status]**: The status of the resource in the context of a particular workflow process. +- **[theme]**: A main category of the resource. A resource can have multiple themes. +- **[title]**: A name given to the resource. +- **[type]**: The nature or genre of the resource. +- **[version]**: The version indicator (name or identifier) of a resource. +- **[versionNotes]**: A description of changes between this version and the previous version of the resource. + + +Other general properties on resources +------------------------------------- + +- **[abstract]**: A summary of the resource. +- **[bibliographicCitation]**: A bibliographic reference for the resource. Recommended practice is to include sufficient bibliographic detail to identify the resource as unambiguously as possible. +- **[deprecated]**: The annotation property that indicates that a given entity has been deprecated. It should equal to `"true"^^xsd:boolean`. +- **[isDefinedBy]**: Indicate a resource defining the subject resource. This property may be used to indicate an RDF vocabulary in which a resource is described. +- **[label]**: Provides a human-readable version of a resource's name. +- **[seeAlso]**: Indicates a resource that might provide additional information about the subject resource. +- **[source]**: A related resource from which the described resource is derived. +- **[statements]**: A list of subject-predicate-object triples with additional RDF statements documenting the resource. + + +Properties specific for datasets +-------------------------------- + +- **[datamodel]**: URI of DLite datamodel for the dataset. +- **[datamodelStorage]**: URL to DLite storage plugin where the datamodel is stored. +- **[distribution]**: An available distribution of the dataset. +- **[hasDatum]**: Relates a dataset to its datum parts. `hasDatum` relations are normally specified manually, since they are generated from the DLite data model. +- **[inSeries]**: A dataset series of which the dataset is part. +- **[isInputOf]**: A process that this dataset is the input to. +- **[isOutputOf]**: A process that this dataset is the output of. +- **[mappings]**: A list of subject-predicate-object triples mapping the datamodel to ontological concepts. +- **[mappingURL]**: URL to a document defining the mappings of the datamodel. + The file format is given by `mappingFormat`. + Defaults to turtle. +- **[mappingFormat]**: File format for `mappingURL`. Defaults to turtle. +- **[spatial]**: The geographical area covered by the dataset. +- **[spatialResolutionMeters]**: Minimum spatial separation resolvable in a dataset, measured in meters. +- **[temporal]**: The temporal period that the dataset covers. +- **[temporalResolution]**: Minimum time period resolvable in the dataset. +- **[wasGeneratedBy]**: An activity that generated, or provides the business context for, the creation of the dataset. + + + +Properties specific for distributions +------------------------------------- +- **[accessService]**: A data service that gives access to the distribution of the dataset. +- **[accessURL]**: A URL of the resource that gives access to a distribution of the dataset. E.g., landing page, feed, SPARQL endpoint. +- **[byteSize]**: The size of a distribution in bytes. +- **[checksum]**: The checksum property provides a mechanism that can be used to verify that the contents of a file or package have not changed. +- **[compressFormat]**: The compression format of the distribution in which the data is contained in a compressed form, e.g., to reduce the size of the downloadable file. +- **[downloadURL]**: The URL of the downloadable file in a given format. E.g., CSV file or RDF file. The format is indicated by the distribution's `format` and/or `mediaType`. +- **[format]**: The file format of the distribution. + Use `mediaType` instead if the type of the distribution is defined by [IANA]. +- **[generator]**: A generator that can create the distribution. +- **[mediaType]**: The media type of the distribution as defined by [IANA]. +- **[packageFormat]**: The package format of the distribution in which one or more data files are grouped together, e.g., to enable a set of related files to be downloaded together. +- **[parser]**: A parser that can parse the distribution. + + +Properties for parsers and generators +------------------------------------- +- **[configuration]**: A JSON string with configurations specific to the parser or generator. +- **[generatorType]**: Generator type. Ex: `application/vnd.dlite-generate`. +- **[parserType]**: Parser type. Ex: `application/vnd.dlite-parse`. + + + +[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json +[JSON-LD documentation]: https://www.w3.org/TR/json-ld/#syntax-tokens-and-keywords + +[accessRights]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_access_rights +[conformsTo]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_conforms_to +[contactPoint]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_contact_point +[creator]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_creator +[description]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_description +[hasCurrentVersion]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_has_current_version +[hasPart]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_has_part +[hasPolicy]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_has_policy +[hasVersion]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_has_version +[identifier]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_identifier +[isReferencedBy]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_is_referenced_by +[issued]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_release_date +[keyword]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_keyword +[landingPage]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_landing_page +[language]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_language +[license]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_license +[modified]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_update_date +[publisher]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_publisher +[qualifiedAttribution]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_qualified_attribution +[qualifiedRelation]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_qualified_relation +[relation]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_relation +[replaces]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_replaces +[rights]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_rights +[status]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_status +[theme]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_theme +[title]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_title +[type]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_type +[version]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_version +[versionNotes]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_version_notes + + +[abstract]: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/abstract +[bibliographicCitation]: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/bibliographicCitation +[deprecated]: https://www.w3.org/TR/owl2-quick-reference/ +[isDefinedBy]: https://www.w3.org/TR/rdf12-schema/#ch_isdefinedby +[label]: https://www.w3.org/TR/rdf12-schema/#ch_label +[seeAlso]: https://www.w3.org/TR/rdf12-schema/#ch_seealso +[source]: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/source + + +[datamodel]: https://w3id.org/emmo/domain/oteio#hasDatamodel +[datamodelStorage]: https://w3id.org/emmo/domain/oteio#hasDatamodelStorage +[distribution]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_distribution +[hasDatum]: https://w3id.org/emmo#EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e +[inSeries]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_in_series +[isInputOf]: https://w3id.org/emmo#EMMO_1494c1a9_00e1_40c2_a9cc_9bbf302a1cac +[isOutputOf]: https://w3id.org/emmo#EMMO_2bb50428_568d_46e8_b8bf_59a4c5656461 +[mappings]: https://w3id.org/emmo/domain/oteio#mapping +[mappingFormat]: https://w3id.org/emmo/domain/oteio#mappingFormat +[mappingURL]: https://w3id.org/emmo/domain/oteio#mappingURL +[spatial]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_spatial +[spatialResolutionMeters]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_spatial_resolution +[temporal]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_temporal +[temporalResolution]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_temporal_resolution +[wasGeneratedBy]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_was_generated_by +[statements]: https://w3id.org/emmo/domain/oteio#statement + + +[accessService]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_access_service +[accessURL]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_access_url +[byteSize]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_size +[checksum]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_checksum +[compressFormat]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_compression_format +[downloadURL]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_download_url +[format]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_format +[mediaType]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_media_type +[packageFormat]: https://www.w3.org/TR/vocab-dcat-3/#Property:distribution_packaging_format +[generator]: https://w3id.org/emmo/domain/oteio#generator +[parser]: https://w3id.org/emmo/domain/oteio#parser + + +[configuration]: https://w3id.org/emmo/domain/oteio#configuration +[generatorType]: https://w3id.org/emmo/domain/oteio#generatorType +[parserType]: https://w3id.org/emmo/domain/oteio#parserType + + + + +[DCAT]: https://www.w3.org/TR/vocab-dcat-3/ +[dcat:Dataset]: https://www.w3.org/TR/vocab-dcat-3/#Class:Dataset +[dcat:Distribution]: https://www.w3.org/TR/vocab-dcat-3/#Class:Distribution +[vCard]: https://www.w3.org/TR/vcard-rdf/ +[IANA]: https://www.iana.org/assignments/media-types/media-types.xhtml diff --git a/docs/dataset/datadoc-prefixes.md b/docs/dataset/datadoc-prefixes.md new file mode 100644 index 00000000..d5d3e538 --- /dev/null +++ b/docs/dataset/datadoc-prefixes.md @@ -0,0 +1,26 @@ +Predefined prefixes +=================== +All prefixes listed on this page are defined in the [default JSON-LD context]. + +* adms: http://www.w3.org/ns/adms# +* dcat: http://www.w3.org/ns/dcat# +* dcterms: http://purl.org/dc/terms/ +* dctype: http://purl.org/dc/dcmitype/ +* foaf: http://xmlns.com/foaf/0.1/ +* odrl: http://www.w3.org/ns/odrl/2/ +* owl: http://www.w3.org/2002/07/owl# +* prov: http://www.w3.org/ns/prov# +* rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# +* rdfs: http://www.w3.org/2000/01/rdf-schema# +* schema: http://schema.org/ +* skos: http://www.w3.org/2004/02/skos/core# +* spdx: http://spdx.org/rdf/terms# +* vcard: http://www.w3.org/2006/vcard/ns# +* xsd: http://www.w3.org/2001/XMLSchema# + +* emmo: https://w3id.org/emmo# +* oteio: https://w3id.org/emmo/domain/oteio# +* chameo: https://w3id.org/emmo/domain/characterisation-methodology/chameo# + + +[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json diff --git a/docs/dataset/datadoc.md b/docs/dataset/datadoc.md new file mode 100644 index 00000000..6c343ff1 --- /dev/null +++ b/docs/dataset/datadoc.md @@ -0,0 +1,241 @@ +Data documentation +================== + + + +Introduction +------------ +The data documentation is based on small [JSON-LD documents], each documenting a single resource. +Examples of resources can be a dataset, an instrument, a sample, etc. +All resources are uniquely identified by their IRI. + +The primary focus of the [tripper.dataset] module is to document datasets such that they are consistent with the [DCAT vocabulary], but at the same time easily extended additional semantic meaning provided by other ontologies. +It is also easy to add and relate the datasets to other types of documents, like people, instruments and samples. + +The [tripper.dataset] module provides a Python API for documenting resources at all four levels of data documentation, including: + +- **Cataloguing**: Storing and accessing *documents* based on their IRI and data properties. + (Addressed FAIR aspects: *findability* and *accessibility*). +- **Structural documentation**: The structure of a dataset. Provided via [DLite] data models. + (Addressed FAIR aspects: *interoperability*). +- **Contextual documentation**: Relations between resources, i.e. *linked data*. Enables contextual search. + (Addressed FAIR aspects: *findability* and *reusability*). +- **Semantic documentation**: Describe what the resource *is* using ontologies. In combination with structural documentation, maps the properties of a data model to ontological concepts. + (Addressed FAIR aspects: *findability*, *interoperability* and *reusability*). + +The figure below shows illustrates how a dataset is documented in a triplestore. + +![Documentation of a dataset](https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/docs/figs/dataset-Dataset.png) + + +Resource types +-------------- +The [tripper.dataset] module include the following set of predefined resource types: + +- **dataset**: Individual of [dcat:Dataset] and [emmo:DataSet]. +- **distribution**: Individual of [dcat:Distribution]. +- **accessService**: Individual of [dcat:AccessService]. +- **generator**: Individual of [oteio:Generator]. +- **parser**: Individual of [oteio:Parser]. +- **resource**: Any other documented resource, with no implicit type. + +Future releases will support adding custom resource types. + + +Documenting a resource +---------------------- +In the Python API are the JSON-LD documents describing the resources internally represented as Python dicts. +However, the [tripper.dataset] module tries to hide away the complexities of [JSON-LD] behind a simple interface. + + +### Documenting as a Python dict +The API supports two Python dict representations, one for documenting a single resource and one for documenting multiple resources. + + +#### Single-resource dict +Below is a simple example of how to document a SEM image dataset as a Python dict: + +```python +>>> dataset = { +... "@id": "kb:image1", +... "@type": "sem:SEMImage", +... "creator": "Sigurd Wenner", +... "description": "Back-scattered SEM image of cement, polished with 1 µm diamond compound.", +... "distribution": { +... "downloadURL": "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/tests/input/77600-23-001_5kV_400x_m001.tif", +... "mediaType": "image/tiff" +... } +... } + +``` + +The keywords are defined in the [default JSON-LD context] and documented under [Predefined keywords]. + +This example uses two namespace prefixes not included in the [predefined prefixes]. +We therefore have to define them explicitly + +```python +>>> prefixes = { +... "sem": "https://w3id.com/emmo/domain/sem/0.1#", +... "kb": "http://example.com/kb/" +... } + +``` + +!!! note "Side note" + + This dict is actually a [JSON-LD] document with an implicit context. + You can use [as_jsonld()] to create a valid JSON-LD document from it. + In addition to add a `@context` field, this function also adds some implicit `@type` declarations. + + ```python + >>> import json + >>> from tripper.dataset import as_jsonld + >>> d = as_jsonld(dataset, prefixes=prefixes) + >>> print(json.dumps(d, indent=2)) + { + "@context": "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json", + "@type": [ + "http://www.w3.org/ns/dcat#Dataset", + "https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a", + "https://w3id.com/emmo/domain/sem/0.1#SEMImage" + ], + "@id": "http://example.com/kb/image1", + "creator": "Sigurd Wenner", + "description": "Back-scattered SEM image of cement, polished with 1 \u00b5m diamond compound.", + "distribution": { + "@type": "http://www.w3.org/ns/dcat#Distribution", + "downloadURL": "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/tests/input/77600-23-001_5kV_400x_m001.tif", + "mediaType": "image/tiff" + } + } + + ``` + +You can use [save_dict()] to save this documentation to a triplestore. +Since the prefixes "sem" and "kb" are not included in the [Predefined prefixes], they are have to be provided explicitly. + +```python +>>> from tripper import Triplestore +>>> from tripper.dataset import save_dict +>>> ts = Triplestore(backend="rdflib") +>>> save_dict(ts, dataset, prefixes=prefixes) # doctest: +ELLIPSIS +AttrDict(...) + +``` + +You can use `ts.serialize()` to list the content of the triplestore (defaults to turtle): + +```python +>>> print(ts.serialize()) +@prefix dcat: . +@prefix dcterms: . +@prefix emmo: . +@prefix kb: . +@prefix sem: . + +kb:image1 a dcat:Dataset, + sem:SEMImage, + emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a ; + dcterms:creator "Sigurd Wenner" ; + dcterms:description "Back-scattered SEM image of cement, polished with 1 µm diamond compound." ; + dcat:distribution [ a dcat:Distribution ; + dcat:downloadURL "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/tests/input/77600-23-001_5kV_400x_m001.tif" ; + dcat:mediaType "image/tiff" ] . + + + +``` + +Note that the image implicitly has been declared to be an individual of the classes `dcat:Dataset` and `emmo:DataSet`. +This is because the `type` argument of [save_dict()] defaults to "dataset". + + +#### Multi-resource dict +It is also possible to document multiple resources as a Python dict. + +!!! note + + Unlike the single-resource dict representation, the multi-resource dict representation is not valid (possible incomplete) JSON-LD. + +This dict representation accepts the following keywords: + +- **@context**: Optional user-defined context to be appended to the documentation of all resources. +- **prefixes**: A dict mapping namespace prefixes to their corresponding URLs. +- **datasets**/**distributions**/**accessServices**/**generators**/**parsers**/**resources**: A list of valid [single-resource](#single-resource-dict) dict of the given [resource type](#resource-types). + +See [semdata.yaml] for an example of a [YAML] representation of a multi-resource dict documentation. + + +### Documenting as a YAML file +The [save_datadoc()] function allow to save a [YAML] file in [multi-resource](#multi-resource-dict) format to a triplestore. + +See [semdata.yaml] for an example. + + +### Documenting as table +The [TableDoc] class can be used to document multiple resources as rows in a table. + +The table must have a header row with defined keywords (either [predefined][predefined keywords] or provided with a custom context). +Nested fields may be specified as dot-separated keywords. For example, the table + +| @id | distribution.downloadURL | +| --- | ------------------------ | +| :a | http://example.com/a.txt | +| :b | http://example.com/b.txt | + +correspond to the following turtle representation: + +```turtle +:a dcat:distribution [ + a dcat:Distribution ; + downloadURL "http://example.com/a.txt" ] . + +:b dcat:distribution [ + a dcat:Distribution ; + downloadURL "http://example.com/b.txt" ] . +``` + +The below example shows how to save all datasets listed in the CSV file [semdata.csv] to a triplestore. + +```python +>>> from tripper.dataset import TableDoc + +>>> td = TableDoc.parse_csv( +... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv", +... delimiter=";", +... prefixes={ +... "sem": "https://w3id.com/emmo/domain/sem/0.1#", +... "semdata": "https://he-matchmaker.eu/data/sem/", +... "sample": "https://he-matchmaker.eu/sample/", +... "mat": "https://he-matchmaker.eu/material/", +... "dm": "http://onto-ns.com/meta/characterisation/0.1/SEMImage#", +... "parser": "http://sintef.no/dlite/parser#", +... "gen": "http://sintef.no/dlite/generator#", +... }, +... ) +>>> td.save(ts) + +``` + + +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset +[DCAT vocabulary]: https://www.w3.org/TR/vocab-dcat-3/ +[DLite]: https://github.com/SINTEF/dlite +[YAML]: https://yaml.org/ +[JSON-LD documents]: https://json-ld.org/ +[JSON-LD]: https://www.w3.org/TR/json-ld/ +[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json +[predefined prefixes]: datadoc-prefixes.md +[predefined keywords]: datadoc-keywords.md +[dcat:Dataset]: https://www.w3.org/TR/vocab-dcat-3/#Class:Dataset +[dcat:Distribution]: https://www.w3.org/TR/vocab-dcat-3/#Class:Distribution +[dcat:AccessService]: https://www.w3.org/TR/vocab-dcat-3/#Class:AccessService +[emmo:DataSet]: https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a +[oteio:Generator]: https://w3id.org/emmo/domain/oteio/Generator +[oteio:Parser]: https://w3id.org/emmo/domain/oteio/Parser +[save_dict()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict +[save_datadoc()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +[semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml +[semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv +[TableDoc]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.tabledoc.TableDoc diff --git a/docs/index.md b/docs/index.md index 61e9828f..8c791e31 100644 --- a/docs/index.md +++ b/docs/index.md @@ -38,11 +38,13 @@ New namespaces can be defined with the [`tripper.Namespace`][Namespace] class. A triplestore wrapper is created with the [`tripper.Triplestore`][Triplestore] class. -Advanced features ------------------ -The submodules `mappings` and `convert` provide additional functionality beyond interfacing triplestore backends: -- **tripper.mappings**: traverse mappings stored in the triplestore and find possible mapping routes. -- **tripper.convert**: convert between RDF and other data representations. +Sub-packages +------------ +Additional functionality beyond interfacing triplestore backends is provided by specialised sub-package: + +* [tripper.dataset]: An API for data documentation. +* [tripper.mappings]: Traverse mappings stored in the triplestore and find possible mapping routes. +* [tripper.convert]: Convert between RDF and other data representations. Available backends @@ -104,6 +106,9 @@ We gratefully acknowledge the following projects for supporting the development [Tutorial]: https://emmc-asbl.github.io/tripper/latest/tutorial/ +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset +[tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings +[tripper.convert]: https://emmc-asbl.github.io/tripper/latest/api_reference/convert/convert/ [Discovery of custom backends]: https://emmc-asbl.github.io/tripper/latest/backend_discovery/ [Reference manual]: https://emmc-asbl.github.io/tripper/latest/api_reference/triplestore/ [Known issues]: https://emmc-asbl.github.io/tripper/latest/known-issues/ diff --git a/mkdocs.yml b/mkdocs.yml index cf56fbff..117bec6b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -81,6 +81,10 @@ nav: - Home: index.md - Tutorial: tutorial.md - Backend discovery: backend_discovery.md + - Data documentation: + - Introduction: dataset/datadoc.md + - Predefined prefixes: dataset/datadoc-prefixes.md + - Predefined keywords: dataset/datadoc-keywords.md - ... | api_reference/** - Known issues: known-issues.md - For developers: developers.md diff --git a/pyproject.toml b/pyproject.toml index 0398f0a0..58304083 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,7 +121,7 @@ good-names = [ [tool.pytest.ini_options] minversion = "7.0" -addopts = "-rs --cov=tripper --cov-report=term --doctest-modules --doctest-ignore-import-errors" +addopts = "-rs --cov=tripper --cov-report=term --doctest-modules --doctest-ignore-import-errors --ignore=examples" filterwarnings = [ "ignore:.*imp module.*:DeprecationWarning", "ignore:::tripper.literal:243", # Ignore warning in doctest diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4d833a9b..0352d4e3 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -15,21 +15,30 @@ def test_get_jsonld_context(): context = get_jsonld_context() assert isinstance(context, dict) - assert "@version" in context - assert len(context) > 20 + assert len(context) > 80 + assert context["@version"] == 1.1 + assert context["status"] == "adms:status" - # Check for consistency between context online and on disk + # Test online context. It should equal context on disk. + # However, since they are updated asynchronously, we do not test for + # equality. online_context = get_jsonld_context(fromfile=False) - assert online_context == context + assert isinstance(online_context, dict) + assert len(online_context) > 80 + assert online_context["@version"] == 1.1 + assert online_context["status"] == "adms:status" # Test context argument context2 = get_jsonld_context(context=CONTEXT_URL) - assert context2 == context + assert context2 == online_context assert "newkey" not in context context3 = get_jsonld_context(context={"newkey": "onto:newkey"}) assert context3["newkey"] == "onto:newkey" + with pytest.raises(TypeError): + get_jsonld_context(context=[None]) + def test_get_prefixes(): """Test get_prefixes().""" diff --git a/tests/input/openfile.txt b/tests/input/openfile.txt new file mode 100644 index 00000000..6946578d --- /dev/null +++ b/tests/input/openfile.txt @@ -0,0 +1 @@ +Example file. diff --git a/tests/input/semdata.yaml b/tests/input/semdata.yaml index 2d1da201..e1d1918d 100644 --- a/tests/input/semdata.yaml +++ b/tests/input/semdata.yaml @@ -81,7 +81,7 @@ generators: # Other entities, like samples, instruments, persons, models etc... -other_entries: +resources: - "@id": sample:SEM_cement_batch2/77600-23-001 "@type": chameo:Sample title: Series for SEM images for sample 77600-23-001. diff --git a/tests/test_utils.py b/tests/test_utils.py index fe8e125f..04e8f324 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,6 +5,63 @@ import pytest +def test_AttrDict(): + """Test AttrDict.""" + from tripper.utils import AttrDict + + d = AttrDict(a=1, b=2) + assert d.a == 1 + + with pytest.raises(KeyError): + d.c # pylint: disable=pointless-statement + + d.c = 3 + assert d.c == 3 + + d.get = 4 + assert d["get"] == 4 + assert d.get("get") == 4 # pylint: disable=not-callable + + d2 = AttrDict({"a": "A"}) + assert d2.a == "A" + assert d2 == {"a": "A"} + assert repr(d2) == "AttrDict({'a': 'A'})" + assert "a" in dir(d2) + + +def test_openfile(): + """Test openfile().""" + from paths import indir + + from tripper.utils import openfile + + with openfile(indir / "openfile.txt") as f: + assert f.read().strip() == "Example file." + + with openfile(f"file:{indir}/openfile.txt") as f: + assert f.read().strip() == "Example file." + + with openfile(f"file://{indir}/openfile.txt") as f: + assert f.read().strip() == "Example file." + + with pytest.raises(IOError): + with openfile("xxx://unknown_scheme"): + pass + + +def test_openfile_http(): + """Test openfile().""" + from tripper.utils import openfile + + pytest.importorskip("requests") + + with openfile( + "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/" + "dataset-docs/tests/input/openfile.txt" + ) as f: + assert f.read().strip() == "Example file." + + def infer_IRIs(): """Test infer_IRIs""" from tripper import RDFS @@ -328,27 +385,3 @@ def test_extend_namespace(): EX = Namespace("http://example.com#") with pytest.raises(TypeError): extend_namespace(EX, {"Item": EX + "Item"}) - - -def test_AttrDict(): - """Test AttrDict.""" - from tripper.utils import AttrDict - - d = AttrDict(a=1, b=2) - assert d.a == 1 - - with pytest.raises(KeyError): - d.c # pylint: disable=pointless-statement - - d.c = 3 - assert d.c == 3 - - d.get = 4 - assert d["get"] == 4 - assert d.get("get") == 4 # pylint: disable=not-callable - - d2 = AttrDict({"a": "A"}) - assert d2.a == "A" - assert d2 == {"a": "A"} - assert repr(d2) == "AttrDict({'a': 'A'})" - assert "a" in dir(d2) diff --git a/tripper/context/0.2/context.json b/tripper/context/0.2/context.json index 3f658c0d..409e1117 100644 --- a/tripper/context/0.2/context.json +++ b/tripper/context/0.2/context.json @@ -24,10 +24,7 @@ "status": "adms:status", "versionNotes": "adms:versionNotes", - "distribution": { - "@id": "dcat:distribution", - "@type": "@id" - }, + "contactPoint": "dcat:contactPoint", "hasCurrentVersion": "dcat:hasCurrentVersion", "hasVersion": "dcat:hasVersion", @@ -37,14 +34,20 @@ }, "keyword": "dcat:keyword", "landingPage": "dcat:landingPage", - "qualifiedRelation": "dcat:qualifiedRelation", + "qualifiedRelation": { + "@id": "dcat:qualifiedRelation", + "@type": "@id" + }, "theme": "dcat:theme", "version": "dcat:version", "accessRights": "dcterms:accessRights", "conformsTo": "dcterms:conformsTo", "creator": "dcterms:creator", "description": "dcterms:description", - "hasPart": "dcterms:hasPart", + "hasPart": { + "@id": "dcterms:hasPart", + "@type": "@id" + }, "identifier": "dcterms:identifier", "isReferencedBy": "dcterms:isReferencedBy", "issued": "dcterms:issued", @@ -52,13 +55,29 @@ "license": "dcterms:license", "modified": "dcterms:modified", "publisher": "dcterms:publisher", - "relation": "dcterms:relation", - "replaces": "dcterms:replaces", + "relation": { + "@id": "dcterms:relation", + "@type": "@id" + }, + + "replaces": { + "@id": "dcterms:replaces", + "@type": "@id" + }, + "rights": "dcterms:rights", "title": "dcterms:title", "type": "dcterms:type", "hasPolicy": "odrl:hasPolicy", - "qualifiedAttribution": "prov:qualifiedAttribution", + "qualifiedAttribution": { + "@id": "prov:qualifiedAttribution", + "@type": "@id" + }, + + "distribution": { + "@id": "dcat:distribution", + "@type": "@id" + }, "accessService": { "@id": "dcat:accessService", @@ -76,16 +95,19 @@ "temporalResolution": "dcat:temporalResolution", "wasGeneratedBy": "prov:wasGeneratedBy", "format": "dcterms:format", - "checksum": "spdx:checksum", + "checksum": { + "@id": "spdx:checksum", + "@type": "@id" + }, "abstract": "dcterms:abstract", "bibliographicCitation": "dcterms:bibliographicCitation", - "source": "dcterms:source", "deprecated": "owl:deprecated", "comment": "rdfs:comment", "isDefinedBy": "rdfs:isDefinedBy", "label": "rdfs:label", "seeAlso": "rdfs:seeAlso", + "source": "dcterms:source", "hasDatum": { "@id": "emmo:EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e", diff --git a/tripper/convert/convert.py b/tripper/convert/convert.py index 8f420f5a..2efaa1af 100644 --- a/tripper/convert/convert.py +++ b/tripper/convert/convert.py @@ -3,6 +3,7 @@ Example use: +```python >>> from tripper import DCTERMS, Literal, Triplestore >>> from tripper.convert import load_container, save_container @@ -22,6 +23,7 @@ >>> load_container(ts, ":data_indv", ignore_unrecognised=True) {'a': 1, 'b': 2} +``` """ # pylint: disable=invalid-name,redefined-builtin diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 022bcc3e..5052169b 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -82,10 +82,10 @@ "datadoc_label": "datasets", "@type": [DCAT.Dataset, EMMO.DataSet], }, - "entry": { - # General datacatalog entry that is not one of the above + "resource": { + # General data resource # Ex: samples, instruments, models, people, projects, ... - "datadoc_label": "other_entries", # XXX better label? + "datadoc_label": "resources", "@type": OWL.NamedIndividual, }, } @@ -620,7 +620,6 @@ def as_jsonld( dct: dict, type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, - _entryid: "Optional[str]" = None, **kwargs, ) -> dict: """Return an updated copy of dict `dct` as valid JSON-LD. @@ -633,18 +632,19 @@ def as_jsonld( Defaults to "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. - _entryid: Id of base entry that is documented. Intended for - internal use only. kwargs: Additional keyword arguments to add to the returned dict. A leading underscore in a key will be translated to a leading "@"-sign. For example, "@id" or "@context" may be provided as "_id" or "_context", respectively. - Returns: An updated copy of `dct` as valid JSON-LD. """ # pylint: disable=too-many-branches + + # Id of base entry that is documented + _entryid = kwargs.pop("_entryid", None) + d = AttrDict() if not _entryid: d["@context"] = CONTEXT_URL diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 6dbf8b32..609fdd3d 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -6,7 +6,7 @@ from tripper import Triplestore from tripper.dataset.dataset import addnested, as_jsonld, save_dict -from tripper.utils import AttrDict +from tripper.utils import AttrDict, openfile if TYPE_CHECKING: # pragma: no cover from typing import List, Optional, Sequence, Union @@ -109,7 +109,7 @@ def parse_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with open(csvfile, mode="rt", encoding=encoding) as f: + with openfile(csvfile, mode="rt", encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader) data = list(reader) diff --git a/tripper/utils.py b/tripper/utils.py index 41525269..4c6cc6d6 100644 --- a/tripper/utils.py +++ b/tripper/utils.py @@ -7,13 +7,15 @@ import random import re import string +import tempfile +from contextlib import contextmanager +from pathlib import Path from typing import TYPE_CHECKING from tripper.literal import Literal from tripper.namespace import XSD, Namespace if TYPE_CHECKING: # pragma: no cover - from pathlib import Path from typing import ( Any, Callable, @@ -65,6 +67,51 @@ def __dir__(self): return dict.__dir__(self) + list(self.keys()) +@contextmanager +def openfile(url: "Union[str, Path]", timeout: float = 3, **kwargs): + """Like open(), but allows opening remote files using http requests. + + Should always be used in a with-statement. + + Arguments: + url: File path or URL to open. + timeout: Timeout for accessing the file in seconds. + kwargs: Additional passed to open(). + + Returns: + A stream object returned by open(). + + """ + url = str(url) + u = url.lower() + tmpfile = False + + if u.startswith("file:"): + fname = url[7:] if u.startswith("file://") else url[5:] + + elif u.startswith("http://") or u.startswith("https://"): + import requests # pylint: disable=import-outside-toplevel + + tmpfile = True + r = requests.get(url, timeout=timeout) + r.raise_for_status() + with tempfile.NamedTemporaryFile(delete=False) as f: + fname = f.name + f.write(r.content) + + elif re.match(r"[a-zA-Z][a-zA-Z0-9+.-]*://", url): + raise IOError(f"unknown scheme: {url.split(':', 1)[0]}") + + else: + fname = url + + try: + yield open(fname, **kwargs) # pylint: disable=unspecified-encoding + finally: + if tmpfile: + Path(fname).unlink() + + def infer_iri(obj): """Return IRI of the individual that stands for Python object `obj`. From 39c9c1a3a6f15870a640a1bd10bef60cbfe0051e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 12:59:03 +0100 Subject: [PATCH 29/59] Added return annotation to utils.openfile() --- README.md | 4 ++-- docs/index.md | 4 ++-- tripper/convert/convert.py | 1 + tripper/dataset/dataaccess.py | 6 ++++-- tripper/utils.py | 4 +++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8c791e31..b494f1b6 100644 --- a/README.md +++ b/README.md @@ -106,8 +106,8 @@ We gratefully acknowledge the following projects for supporting the development [Tutorial]: https://emmc-asbl.github.io/tripper/latest/tutorial/ -[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset -[tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/dataset/datadoc/ +[tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings/ [tripper.convert]: https://emmc-asbl.github.io/tripper/latest/api_reference/convert/convert/ [Discovery of custom backends]: https://emmc-asbl.github.io/tripper/latest/backend_discovery/ [Reference manual]: https://emmc-asbl.github.io/tripper/latest/api_reference/triplestore/ diff --git a/docs/index.md b/docs/index.md index 8c791e31..b494f1b6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -106,8 +106,8 @@ We gratefully acknowledge the following projects for supporting the development [Tutorial]: https://emmc-asbl.github.io/tripper/latest/tutorial/ -[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset -[tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/dataset/datadoc/ +[tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings/ [tripper.convert]: https://emmc-asbl.github.io/tripper/latest/api_reference/convert/convert/ [Discovery of custom backends]: https://emmc-asbl.github.io/tripper/latest/backend_discovery/ [Reference manual]: https://emmc-asbl.github.io/tripper/latest/api_reference/triplestore/ diff --git a/tripper/convert/convert.py b/tripper/convert/convert.py index 2efaa1af..cf949f59 100644 --- a/tripper/convert/convert.py +++ b/tripper/convert/convert.py @@ -24,6 +24,7 @@ {'a': 1, 'b': 2} ``` + """ # pylint: disable=invalid-name,redefined-builtin diff --git a/tripper/dataset/dataaccess.py b/tripper/dataset/dataaccess.py index 3e248e36..dbe0ee25 100644 --- a/tripper/dataset/dataaccess.py +++ b/tripper/dataset/dataaccess.py @@ -3,11 +3,13 @@ from the datasets module. High-level functions for accessing and storing actual data: + - `load()`: Load documented dataset from its source. - `save()`: Save documented dataset to a data resource. -Note: This module may eventually be moved out of tripper into a -separate package. +Note: + This module may eventually be moved out of tripper into a separate + package. """ import secrets # From Python 3.9 we could use random.randbytes(16).hex() diff --git a/tripper/utils.py b/tripper/utils.py index 4c6cc6d6..94850280 100644 --- a/tripper/utils.py +++ b/tripper/utils.py @@ -68,7 +68,9 @@ def __dir__(self): @contextmanager -def openfile(url: "Union[str, Path]", timeout: float = 3, **kwargs): +def openfile( + url: "Union[str, Path]", timeout: float = 3, **kwargs +) -> "Generator": """Like open(), but allows opening remote files using http requests. Should always be used in a with-statement. From 4302ddeae24362c418a3cafdbaa771b53764d963 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 14:23:53 +0100 Subject: [PATCH 30/59] Try to avoid pytest failure during collection phase. --- docs/api_reference/triplestore_extend.md | 3 +++ docs/api_reference/tripper.md | 3 --- tripper/__init__.py | 2 +- tripper/{tripper.py => triplestore_extend.py} | 0 tripper/utils.py | 5 +++++ 5 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 docs/api_reference/triplestore_extend.md delete mode 100644 docs/api_reference/tripper.md rename tripper/{tripper.py => triplestore_extend.py} (100%) diff --git a/docs/api_reference/triplestore_extend.md b/docs/api_reference/triplestore_extend.md new file mode 100644 index 00000000..03c1fbbd --- /dev/null +++ b/docs/api_reference/triplestore_extend.md @@ -0,0 +1,3 @@ +# triplestore_extend + +::: tripper.triplestore_extend diff --git a/docs/api_reference/tripper.md b/docs/api_reference/tripper.md deleted file mode 100644 index 57f90b06..00000000 --- a/docs/api_reference/tripper.md +++ /dev/null @@ -1,3 +0,0 @@ -# tripper - -::: tripper.tripper diff --git a/tripper/__init__.py b/tripper/__init__.py index 0d9197d3..af17dac1 100644 --- a/tripper/__init__.py +++ b/tripper/__init__.py @@ -24,7 +24,7 @@ Namespace, ) from .triplestore import Triplestore, backend_packages -from .tripper import Tripper +from .triplestore_extend import Tripper __version__ = "0.3.4" diff --git a/tripper/tripper.py b/tripper/triplestore_extend.py similarity index 100% rename from tripper/tripper.py rename to tripper/triplestore_extend.py diff --git a/tripper/utils.py b/tripper/utils.py index 94850280..7c2289b1 100644 --- a/tripper/utils.py +++ b/tripper/utils.py @@ -55,6 +55,11 @@ class AttrDict(dict): def __getattr__(self, name): if name in self: return self[name] + if name == "__wrapped__": + # Hack to work around a pytest bug. During its collection + # phase pytest tries to mock namespace objects with an + # attribute `__wrapped__`. + return None raise KeyError(name) def __setattr__(self, name, value): From 8f727c7d74e7dc87250250745655ddfaedf690ba Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 15:25:23 +0100 Subject: [PATCH 31/59] Remove --ignore=examples from pytest options in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 58304083..0398f0a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,7 +121,7 @@ good-names = [ [tool.pytest.ini_options] minversion = "7.0" -addopts = "-rs --cov=tripper --cov-report=term --doctest-modules --doctest-ignore-import-errors --ignore=examples" +addopts = "-rs --cov=tripper --cov-report=term --doctest-modules --doctest-ignore-import-errors" filterwarnings = [ "ignore:.*imp module.*:DeprecationWarning", "ignore:::tripper.literal:243", # Ignore warning in doctest From 065e8935441594b28b75d56df29d1cadd281d572 Mon Sep 17 00:00:00 2001 From: "Tor S. Haugland" Date: Thu, 2 Jan 2025 15:35:38 +0100 Subject: [PATCH 32/59] Fix CI doctest bug --- tripper/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tripper/__init__.py b/tripper/__init__.py index af17dac1..5cbe0da0 100644 --- a/tripper/__init__.py +++ b/tripper/__init__.py @@ -26,6 +26,10 @@ from .triplestore import Triplestore, backend_packages from .triplestore_extend import Tripper +# Import backends here to avoid defining new globals later +# Needed for pytest+doctest to pass +import tripper.backends # pylint: disable=unused-import + __version__ = "0.3.4" # Pre-defined namespaces From dd92304e7ffec8ce2c80def5915a25d2f954b6e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:35:53 +0000 Subject: [PATCH 33/59] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tripper/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tripper/__init__.py b/tripper/__init__.py index 5cbe0da0..c5561c62 100644 --- a/tripper/__init__.py +++ b/tripper/__init__.py @@ -4,6 +4,10 @@ See the README.md file for a description for how to use this package. """ +# Import backends here to avoid defining new globals later +# Needed for pytest+doctest to pass +import tripper.backends # pylint: disable=unused-import + from .literal import Literal from .namespace import ( DC, @@ -26,10 +30,6 @@ from .triplestore import Triplestore, backend_packages from .triplestore_extend import Tripper -# Import backends here to avoid defining new globals later -# Needed for pytest+doctest to pass -import tripper.backends # pylint: disable=unused-import - __version__ = "0.3.4" # Pre-defined namespaces From 4241295c357016900293e62247d4bf51fd258d3d Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 15:51:08 +0100 Subject: [PATCH 34/59] Use relative import from __init__.py file --- tripper/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tripper/__init__.py b/tripper/__init__.py index c5561c62..e880db91 100644 --- a/tripper/__init__.py +++ b/tripper/__init__.py @@ -6,8 +6,7 @@ # Import backends here to avoid defining new globals later # Needed for pytest+doctest to pass -import tripper.backends # pylint: disable=unused-import - +from . import backends # pylint: disable=unused-import from .literal import Literal from .namespace import ( DC, From 38e0483d93665dffa349222e15dd61a391a9e431 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 16:28:59 +0100 Subject: [PATCH 35/59] Updated documentation --- docs/dataset/datadoc.md | 15 ++++++++++++++- tripper/dataset/dataset.py | 12 ++++++++---- tripper/utils.py | 2 +- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/docs/dataset/datadoc.md b/docs/dataset/datadoc.md index 6c343ff1..0afb16f5 100644 --- a/docs/dataset/datadoc.md +++ b/docs/dataset/datadoc.md @@ -124,6 +124,9 @@ AttrDict(...) ``` +The returned `AttrDict` instance is an updated copy of `dataset` (casted to a dict subclass with attribute access). +It correspond to a valid JSON-LD document and is the same as returned by [as_jsonld()]. + You can use `ts.serialize()` to list the content of the triplestore (defaults to turtle): ```python @@ -169,8 +172,17 @@ See [semdata.yaml] for an example of a [YAML] representation of a multi-resource ### Documenting as a YAML file The [save_datadoc()] function allow to save a [YAML] file in [multi-resource](#multi-resource-dict) format to a triplestore. +Saving [semdata.yaml] to a triplestore can e.g. be done with + +```python +>>> from tripper.dataset import save_datadoc +>>> save_datadoc( # doctest: +ELLIPSIS +... ts, +... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml" +... ) +AttrDict(...) -See [semdata.yaml] for an example. +``` ### Documenting as table @@ -235,6 +247,7 @@ The below example shows how to save all datasets listed in the CSV file [semdata [oteio:Generator]: https://w3id.org/emmo/domain/oteio/Generator [oteio:Parser]: https://w3id.org/emmo/domain/oteio/Parser [save_dict()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict +[as_jsonld()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld [save_datadoc()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc [semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml [semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 5052169b..dd967521 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -40,7 +40,7 @@ from typing import TYPE_CHECKING from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore -from tripper.utils import AttrDict, as_python +from tripper.utils import AttrDict, as_python, openfile if TYPE_CHECKING: # pragma: no cover from typing import Any, Iterable, List, Mapping, Optional, Sequence, Union @@ -541,10 +541,13 @@ def expand_iri(iri: str, prefixes: dict) -> str: def read_datadoc(filename: "Union[str, Path]") -> dict: - """Read YAML data documentation and return it as a dict.""" + """Read YAML data documentation and return it as a dict. + + The filename may also be an URL to a file accessible with HTTP GET. + """ import yaml # type: ignore - with open(filename, "r", encoding="utf-8") as f: + with openfile(filename, mode="rt", encoding="utf-8") as f: d = yaml.safe_load(f) return prepare_datadoc(d) @@ -557,7 +560,8 @@ def save_datadoc( Arguments: ts: Triplestore to save dataset documentation to. file_or_dict: Data documentation dict or name of a YAML file to read - the data documentation from. + the data documentation from. It may also be an URL to a file + accessible with HTTP GET. Returns: Dict-representation of the loaded dataset. diff --git a/tripper/utils.py b/tripper/utils.py index 7c2289b1..76e9cc0f 100644 --- a/tripper/utils.py +++ b/tripper/utils.py @@ -76,7 +76,7 @@ def __dir__(self): def openfile( url: "Union[str, Path]", timeout: float = 3, **kwargs ) -> "Generator": - """Like open(), but allows opening remote files using http requests. + """Like open(), but allows opening remote files using HTTP GET requests. Should always be used in a with-statement. From 8756727cdd04d2c05a6620e96af944250647d222 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 Jan 2025 20:36:02 +0100 Subject: [PATCH 36/59] Added types (literal/iri) to datadoc-keywords.md and reordered context.json to match the order in datadoc-keywords.md --- docs/dataset/datadoc-keywords.md | 140 +++++++++++++++-------------- tripper/context/0.2/context.json | 150 ++++++++++++++++--------------- 2 files changed, 147 insertions(+), 143 deletions(-) diff --git a/docs/dataset/datadoc-keywords.md b/docs/dataset/datadoc-keywords.md index f8a27852..68a34fc5 100644 --- a/docs/dataset/datadoc-keywords.md +++ b/docs/dataset/datadoc-keywords.md @@ -8,10 +8,10 @@ Special keywords for JSON-LD See the [JSON-LD documentation] for a complete list of "@"-prefixed keywords. Here we only list those that are commonly used for data documentation with Tripper. -- **@context**: URL to or dict with user-defined JSON-LD context. +- **@context** (*IRI*): URL to or dict with user-defined JSON-LD context. Used to extend the keywords listed on this page with domain- or application-specific keywords. -- **@id**: IRI of the documented resource. -- **@type**: IRI of ontological class that the resource is an individual of. +- **@id** (*IRI*): IRI of the documented resource. +- **@type** (*IRI*): IRI of ontological class that the resource is an individual of. General properties on resources used by DCAT @@ -19,94 +19,95 @@ General properties on resources used by DCAT These can also be used on datasets and distributions. See the DCAT documentation for [dcat:Dataset] and [dcat:Distribution] for recommendations. -- **[accessRights]**: Information about who can access the resource or an indication of its security status. -- **[conformsTo]**: An established standard to which the described resource conforms. -- **[contactPoint]**: Relevant contact information for the cataloged resource. Use of [vCard] is recommended. -- **[creator]**: The entity responsible for producing the resource. -- **[description]**: A free-text account of the resource. -- **[hasCurrentVersion]**: This resource has a more specific, versioned resource with equivalent content. -- **[hasPart]**: A related resource that is included either physically or logically in the described resource. -- **[hasPolicy]**: An ODRL conformant policy expressing the rights associated with the resource. -- **[hasVersion]**: This resource has a more specific, versioned resource. -- **[identifier]**: A unique identifier of the resource being described or cataloged. -- **[isReferencedBy]**: A related resource, such as a publication, that references, cites, or otherwise points to the cataloged resource. -- **[issued]**: Date of formal issuance (e.g., publication) of the resource. -- **[keyword]**: A keyword or tag describing the resource. -- **[landingPage]**: A Web page that can be navigated to in a Web browser to gain access to the catalog, a dataset, its distributions and/or additional information. -- **[language]**: A language of the resource. This refers to the natural language used for textual metadata (i.e., titles, descriptions, etc.) of a cataloged resource (i.e., dataset or service) or the textual values of a dataset distribution. -- **[license]**: A legal document under which the resource is made available. -- **[modified]**: Most recent date on which the resource was changed, updated or modified. -- **[publisher]**: The entity responsible for making the resource available. -- **[qualifiedAttribution]**: Link to an Agent having some form of responsibility for the resource. -- **[qualifiedRelation]**: Link to a description of a relationship with another resource. -- **[relation]**: A resource with an unspecified relationship to the cataloged resource. -- **[replaces]**: A related resource that is supplanted, displaced, or superseded by the described resource. -- **[rights]**: A statement that concerns all rights not addressed with `license` or `accessRights`, such as copyright statements. -- **[status]**: The status of the resource in the context of a particular workflow process. -- **[theme]**: A main category of the resource. A resource can have multiple themes. -- **[title]**: A name given to the resource. -- **[type]**: The nature or genre of the resource. -- **[version]**: The version indicator (name or identifier) of a resource. -- **[versionNotes]**: A description of changes between this version and the previous version of the resource. +- **[accessRights]** (*Literal*): Information about who can access the resource or an indication of its security status. +- **[conformsTo]** (*Literal*): An established standard to which the described resource conforms. +- **[contactPoint]** (*Literal*): Relevant contact information for the cataloged resource. Use of [vCard] is recommended. +- **[creator]** (*Literal*): The entity responsible for producing the resource. +- **[description]** (*Literal*): A free-text account of the resource. +- **[hasCurrentVersion]** (*Literal*): This resource has a more specific, versioned resource with equivalent content. +- **[hasPart]** (*IRI*): A related resource that is included either physically or logically in the described resource. +- **[hasPolicy]** (*Literal*): An ODRL conformant policy expressing the rights associated with the resource. +- **[hasVersion]** (*Literal*): This resource has a more specific, versioned resource. +- **[identifier]** (*Literal*): A unique identifier of the resource being described or cataloged. +- **[isReferencedBy]** (*Literal*): A related resource, such as a publication, that references, cites, or otherwise points to the cataloged resource. +- **[issued]** (*Literal*): Date of formal issuance (e.g., publication) of the resource. +- **[keyword]** (*Literal*): A keyword or tag describing the resource. +- **[landingPage]** (*Literal*): A Web page that can be navigated to in a Web browser to gain access to the catalog, a dataset, its distributions and/or additional information. +- **[language]** (*Literal*): A language of the resource. This refers to the natural language used for textual metadata (i.e., titles, descriptions, etc.) of a cataloged resource (i.e., dataset or service) or the textual values of a dataset distribution. +- **[license]** (*Literal*): A legal document under which the resource is made available. +- **[modified]** (*Literal*): Most recent date on which the resource was changed, updated or modified. +- **[publisher]** (*Literal*): The entity responsible for making the resource available. +- **[qualifiedAttribution]** (*IRI*): Link to an Agent having some form of responsibility for the resource. +- **[qualifiedRelation]** (*IRI*): Link to a description of a relationship with another resource. +- **[relation]** (*IRI*): A resource with an unspecified relationship to the cataloged resource. +- **[replaces]** (*IRI*): A related resource that is supplanted, displaced, or superseded by the described resource. +- **[rights]** (*Literal*): A statement that concerns all rights not addressed with `license` or `accessRights`, such as copyright statements. +- **[status]** (*Literal*): The status of the resource in the context of a particular workflow process. +- **[theme]** (*Literal*): A main category of the resource. A resource can have multiple themes. +- **[title]** (*Literal*): A name given to the resource. +- **[type]** (*Literal*): The nature or genre of the resource. +- **[version]** (*Literal*): The version indicator (name or identifier) of a resource. +- **[versionNotes]** (*Literal*): A description of changes between this version and the previous version of the resource. Other general properties on resources ------------------------------------- -- **[abstract]**: A summary of the resource. -- **[bibliographicCitation]**: A bibliographic reference for the resource. Recommended practice is to include sufficient bibliographic detail to identify the resource as unambiguously as possible. -- **[deprecated]**: The annotation property that indicates that a given entity has been deprecated. It should equal to `"true"^^xsd:boolean`. -- **[isDefinedBy]**: Indicate a resource defining the subject resource. This property may be used to indicate an RDF vocabulary in which a resource is described. -- **[label]**: Provides a human-readable version of a resource's name. -- **[seeAlso]**: Indicates a resource that might provide additional information about the subject resource. -- **[source]**: A related resource from which the described resource is derived. -- **[statements]**: A list of subject-predicate-object triples with additional RDF statements documenting the resource. +- **[abstract]** (*Literal*): A summary of the resource. +- **[bibliographicCitation]** (*Literal*): A bibliographic reference for the resource. Recommended practice is to include sufficient bibliographic detail to identify the resource as unambiguously as possible. +- **[comment]** (*Literal*): A description of the subject resource. +- **[deprecated]** (*Literal*): The annotation property that indicates that a given entity has been deprecated. It should equal to `"true"^^xsd:boolean`. +- **[isDefinedBy]** (*Literal*): Indicate a resource defining the subject resource. This property may be used to indicate an RDF vocabulary in which a resource is described. +- **[label]** (*Literal*): Provides a human-readable version of a resource's name. +- **[seeAlso]** (*Literal*): Indicates a resource that might provide additional information about the subject resource. +- **[source]** (*Literal*): A related resource from which the described resource is derived. +- **[statements]** (*Literal*): A list of subject-predicate-object triples with additional RDF statements documenting the resource. Properties specific for datasets -------------------------------- -- **[datamodel]**: URI of DLite datamodel for the dataset. -- **[datamodelStorage]**: URL to DLite storage plugin where the datamodel is stored. -- **[distribution]**: An available distribution of the dataset. -- **[hasDatum]**: Relates a dataset to its datum parts. `hasDatum` relations are normally specified manually, since they are generated from the DLite data model. -- **[inSeries]**: A dataset series of which the dataset is part. -- **[isInputOf]**: A process that this dataset is the input to. -- **[isOutputOf]**: A process that this dataset is the output of. -- **[mappings]**: A list of subject-predicate-object triples mapping the datamodel to ontological concepts. -- **[mappingURL]**: URL to a document defining the mappings of the datamodel. +- **[datamodel]** (*Literal*): URI of DLite datamodel for the dataset. +- **[datamodelStorage]** (*Literal*): URL to DLite storage plugin where the datamodel is stored. +- **[distribution]** (*IRI*): An available distribution of the dataset. +- **[hasDatum]** (*IRI*): Relates a dataset to its datum parts. `hasDatum` relations are normally specified manually, since they are generated from the DLite data model. +- **[inSeries]** (*IRI*): A dataset series of which the dataset is part. +- **[isInputOf]** (*IRI*): A process that this dataset is the input to. +- **[isOutputOf]** (*IRI*): A process that this dataset is the output of. +- **[mappings]** (*Literal*): A list of subject-predicate-object triples mapping the datamodel to ontological concepts. +- **[mappingURL]** (*Literal*): URL to a document defining the mappings of the datamodel. The file format is given by `mappingFormat`. Defaults to turtle. -- **[mappingFormat]**: File format for `mappingURL`. Defaults to turtle. -- **[spatial]**: The geographical area covered by the dataset. -- **[spatialResolutionMeters]**: Minimum spatial separation resolvable in a dataset, measured in meters. -- **[temporal]**: The temporal period that the dataset covers. -- **[temporalResolution]**: Minimum time period resolvable in the dataset. -- **[wasGeneratedBy]**: An activity that generated, or provides the business context for, the creation of the dataset. +- **[mappingFormat]** (*Literal*): File format for `mappingURL`. Defaults to turtle. +- **[spatial]** (*Literal*): The geographical area covered by the dataset. +- **[spatialResolutionMeters]** (*Literal*): Minimum spatial separation resolvable in a dataset, measured in meters. +- **[temporal]** (*Literal*): The temporal period that the dataset covers. +- **[temporalResolution]** (*Literal*): Minimum time period resolvable in the dataset. +- **[wasGeneratedBy]** (*Literal*): An activity that generated, or provides the business context for, the creation of the dataset. Properties specific for distributions ------------------------------------- -- **[accessService]**: A data service that gives access to the distribution of the dataset. -- **[accessURL]**: A URL of the resource that gives access to a distribution of the dataset. E.g., landing page, feed, SPARQL endpoint. -- **[byteSize]**: The size of a distribution in bytes. -- **[checksum]**: The checksum property provides a mechanism that can be used to verify that the contents of a file or package have not changed. -- **[compressFormat]**: The compression format of the distribution in which the data is contained in a compressed form, e.g., to reduce the size of the downloadable file. -- **[downloadURL]**: The URL of the downloadable file in a given format. E.g., CSV file or RDF file. The format is indicated by the distribution's `format` and/or `mediaType`. -- **[format]**: The file format of the distribution. +- **[accessService]** (*IRI*): A data service that gives access to the distribution of the dataset. +- **[accessURL]** (*Literal*): A URL of the resource that gives access to a distribution of the dataset. E.g., landing page, feed, SPARQL endpoint. +- **[byteSize]** (*Literal*): The size of a distribution in bytes. +- **[checksum]** (*IRI*): The checksum property provides a mechanism that can be used to verify that the contents of a file or package have not changed. +- **[compressFormat]** (*Literal*): The compression format of the distribution in which the data is contained in a compressed form, e.g., to reduce the size of the downloadable file. +- **[downloadURL]** (*Literal*): The URL of the downloadable file in a given format. E.g., CSV file or RDF file. The format is indicated by the distribution's `format` and/or `mediaType`. +- **[format]** (*Literal*): The file format of the distribution. Use `mediaType` instead if the type of the distribution is defined by [IANA]. -- **[generator]**: A generator that can create the distribution. -- **[mediaType]**: The media type of the distribution as defined by [IANA]. -- **[packageFormat]**: The package format of the distribution in which one or more data files are grouped together, e.g., to enable a set of related files to be downloaded together. -- **[parser]**: A parser that can parse the distribution. +- **[generator]** (*IRI*): A generator that can create the distribution. +- **[mediaType]** (*Literal*): The media type of the distribution as defined by [IANA]. +- **[packageFormat]** (*Literal*): The package format of the distribution in which one or more data files are grouped together, e.g., to enable a set of related files to be downloaded together. +- **[parser]** (*IRI*): A parser that can parse the distribution. Properties for parsers and generators ------------------------------------- -- **[configuration]**: A JSON string with configurations specific to the parser or generator. -- **[generatorType]**: Generator type. Ex: `application/vnd.dlite-generate`. -- **[parserType]**: Parser type. Ex: `application/vnd.dlite-parse`. +- **[configuration]** (*Literal*): A JSON string with configurations specific to the parser or generator. +- **[generatorType]** (*Literal*): Generator type. Ex: `application/vnd.dlite-generate`. +- **[parserType]** (*Literal*): Parser type. Ex: `application/vnd.dlite-parse`. - - -Introduction ------------- -The data documentation is based on small [JSON-LD documents], each documenting a single resource. -Examples of resources can be a dataset, an instrument, a sample, etc. -All resources are uniquely identified by their IRI. - -The primary focus of the [tripper.dataset] module is to document datasets such that they are consistent with the [DCAT vocabulary], but at the same time easily extended additional semantic meaning provided by other ontologies. -It is also easy to add and relate the datasets to other types of documents, like people, instruments and samples. - -The [tripper.dataset] module provides a Python API for documenting resources at all four levels of data documentation, including: - -- **Cataloguing**: Storing and accessing *documents* based on their IRI and data properties. - (Addressed FAIR aspects: *findability* and *accessibility*). -- **Structural documentation**: The structure of a dataset. Provided via [DLite] data models. - (Addressed FAIR aspects: *interoperability*). -- **Contextual documentation**: Relations between resources, i.e. *linked data*. Enables contextual search. - (Addressed FAIR aspects: *findability* and *reusability*). -- **Semantic documentation**: Describe what the resource *is* using ontologies. In combination with structural documentation, maps the properties of a data model to ontological concepts. - (Addressed FAIR aspects: *findability*, *interoperability* and *reusability*). - -The figure below shows illustrates how a dataset is documented in a triplestore. - -![Documentation of a dataset](https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/docs/figs/dataset-Dataset.png) - - -Resource types --------------- -The [tripper.dataset] module include the following set of predefined resource types: - -- **dataset**: Individual of [dcat:Dataset] and [emmo:DataSet]. -- **distribution**: Individual of [dcat:Distribution]. -- **accessService**: Individual of [dcat:AccessService]. -- **generator**: Individual of [oteio:Generator]. -- **parser**: Individual of [oteio:Parser]. -- **resource**: Any other documented resource, with no implicit type. - -Future releases will support adding custom resource types. - - Documenting a resource ----------------------- +====================== In the Python API are the JSON-LD documents describing the resources internally represented as Python dicts. However, the [tripper.dataset] module tries to hide away the complexities of [JSON-LD] behind a simple interface. -### Documenting as a Python dict +Documenting as a Python dict +---------------------------- The API supports two Python dict representations, one for documenting a single resource and one for documenting multiple resources. -#### Single-resource dict +### Single-resource dict Below is a simple example of how to document a SEM image dataset as a Python dict: ```python @@ -154,7 +111,7 @@ Note that the image implicitly has been declared to be an individual of the clas This is because the `type` argument of [save_dict()] defaults to "dataset". -#### Multi-resource dict +### Multi-resource dict It is also possible to document multiple resources as a Python dict. !!! note @@ -170,7 +127,8 @@ This dict representation accepts the following keywords: See [semdata.yaml] for an example of a [YAML] representation of a multi-resource dict documentation. -### Documenting as a YAML file +Documenting as a YAML file +-------------------------- The [save_datadoc()] function allow to save a [YAML] file in [multi-resource](#multi-resource-dict) format to a triplestore. Saving [semdata.yaml] to a triplestore can e.g. be done with @@ -185,7 +143,8 @@ AttrDict(...) ``` -### Documenting as table +Documenting as table +-------------------- The [TableDoc] class can be used to document multiple resources as rows in a table. The table must have a header row with defined keywords (either [predefined][predefined keywords] or provided with a custom context). @@ -238,8 +197,8 @@ The below example shows how to save all datasets listed in the CSV file [semdata [JSON-LD documents]: https://json-ld.org/ [JSON-LD]: https://www.w3.org/TR/json-ld/ [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json -[predefined prefixes]: datadoc-prefixes.md -[predefined keywords]: datadoc-keywords.md +[predefined prefixes]: prefixes.md +[predefined keywords]: keywords.md [dcat:Dataset]: https://www.w3.org/TR/vocab-dcat-3/#Class:Dataset [dcat:Distribution]: https://www.w3.org/TR/vocab-dcat-3/#Class:Distribution [dcat:AccessService]: https://www.w3.org/TR/vocab-dcat-3/#Class:AccessService diff --git a/docs/dataset/introduction.md b/docs/dataset/introduction.md new file mode 100644 index 00000000..0be53abf --- /dev/null +++ b/docs/dataset/introduction.md @@ -0,0 +1,66 @@ +Data documentation +================== + + + +Introduction +------------ +The data documentation is based on small [JSON-LD documents], each documenting a single resource. +Examples of resources can be a dataset, an instrument, a sample, etc. +All resources are uniquely identified by their IRI. + +The primary focus of the [tripper.dataset] module is to document datasets such that they are consistent with the [DCAT vocabulary], but at the same time easily extended additional semantic meaning provided by other ontologies. +It is also easy to add and relate the datasets to other types of documents, like people, instruments and samples. + +The [tripper.dataset] module provides a Python API for documenting resources at all four levels of data documentation, including: + +- **Cataloguing**: Storing and accessing *documents* based on their IRI and data properties. + (Addressed FAIR aspects: *findability* and *accessibility*). +- **Structural documentation**: The structure of a dataset. Provided via [DLite] data models. + (Addressed FAIR aspects: *interoperability*). +- **Contextual documentation**: Relations between resources, i.e. *linked data*. Enables contextual search. + (Addressed FAIR aspects: *findability* and *reusability*). +- **Semantic documentation**: Describe what the resource *is* using ontologies. In combination with structural documentation, maps the properties of a data model to ontological concepts. + (Addressed FAIR aspects: *findability*, *interoperability* and *reusability*). + +The figure below shows illustrates how a dataset is documented in a triplestore. + +![Documentation of a dataset](https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/docs/figs/dataset-Dataset.png) + + +Resource types +-------------- +The [tripper.dataset] module include the following set of predefined resource types: + +- **dataset**: Individual of [dcat:Dataset] and [emmo:DataSet]. +- **distribution**: Individual of [dcat:Distribution]. +- **accessService**: Individual of [dcat:AccessService]. +- **generator**: Individual of [oteio:Generator]. +- **parser**: Individual of [oteio:Parser]. +- **resource**: Any other documented resource, with no implicit type. + +Future releases will support adding custom resource types. + + + +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset +[DCAT vocabulary]: https://www.w3.org/TR/vocab-dcat-3/ +[DLite]: https://github.com/SINTEF/dlite +[YAML]: https://yaml.org/ +[JSON-LD documents]: https://json-ld.org/ +[JSON-LD]: https://www.w3.org/TR/json-ld/ +[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json +[predefined prefixes]: prefixes.md +[predefined keywords]: keywords.md +[dcat:Dataset]: https://www.w3.org/TR/vocab-dcat-3/#Class:Dataset +[dcat:Distribution]: https://www.w3.org/TR/vocab-dcat-3/#Class:Distribution +[dcat:AccessService]: https://www.w3.org/TR/vocab-dcat-3/#Class:AccessService +[emmo:DataSet]: https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a +[oteio:Generator]: https://w3id.org/emmo/domain/oteio/Generator +[oteio:Parser]: https://w3id.org/emmo/domain/oteio/Parser +[save_dict()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict +[as_jsonld()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld +[save_datadoc()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +[semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml +[semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv +[TableDoc]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.tabledoc.TableDoc diff --git a/docs/dataset/datadoc-keywords.md b/docs/dataset/keywords.md similarity index 100% rename from docs/dataset/datadoc-keywords.md rename to docs/dataset/keywords.md diff --git a/docs/dataset/datadoc-prefixes.md b/docs/dataset/prefixes.md similarity index 100% rename from docs/dataset/datadoc-prefixes.md rename to docs/dataset/prefixes.md diff --git a/docs/index.md b/docs/index.md index b494f1b6..5ad9f7e0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -106,7 +106,7 @@ We gratefully acknowledge the following projects for supporting the development [Tutorial]: https://emmc-asbl.github.io/tripper/latest/tutorial/ -[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/dataset/datadoc/ +[tripper.dataset]: https://emmc-asbl.github.io/tripper/latest/dataset/introduction/ [tripper.mappings]: https://emmc-asbl.github.io/tripper/latest/api_reference/mappings/mappings/ [tripper.convert]: https://emmc-asbl.github.io/tripper/latest/api_reference/convert/convert/ [Discovery of custom backends]: https://emmc-asbl.github.io/tripper/latest/backend_discovery/ diff --git a/mkdocs.yml b/mkdocs.yml index 117bec6b..f666b395 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,9 +82,10 @@ nav: - Tutorial: tutorial.md - Backend discovery: backend_discovery.md - Data documentation: - - Introduction: dataset/datadoc.md - - Predefined prefixes: dataset/datadoc-prefixes.md - - Predefined keywords: dataset/datadoc-keywords.md + - Introduction: dataset/introduction.md + - Documenting a resource: dataset/documenting-a-resource.md + - Predefined prefixes: dataset/prefixes.md + - Predefined keywords: dataset/keywords.md - ... | api_reference/** - Known issues: known-issues.md - For developers: developers.md From c7709ae5ec683e2e37462c8bbea434008dce808c Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 12:24:59 +0100 Subject: [PATCH 38/59] Added a section about customisation to the documentation --- docs/dataset/customisation.md | 100 +++++++++++++++++++++++++ docs/dataset/documenting-a-resource.md | 7 +- docs/dataset/introduction.md | 7 +- docs/dataset/keywords.md | 9 ++- docs/dataset/prefixes.md | 4 +- mkdocs.yml | 1 + 6 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 docs/dataset/customisation.md diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md new file mode 100644 index 00000000..4cfeddb2 --- /dev/null +++ b/docs/dataset/customisation.md @@ -0,0 +1,100 @@ +Customisations +============== + + +User-defined prefixes +--------------------- +A namespace prefix is a mapping from a *prefix* to a *namespace URL*. +For example + + owl: http://www.w3.org/2002/07/owl# + +Tripper already include a default list of [predefined prefixes]. +Additional prefixed can be provided in two ways. + +### With the `prefixes` argument +Several functions in the API (like [save_dict()], [as_jsonld()] and [TableDoc.parse_csv()]) takes a `prefixes` argument with which additional namespace prefixes can provided. + +This may be handy when from the Python API. + + +### With custom context +Additional prefixes can also be provided via a custom JSON-LD context as a `"prefix": "namespace URL"` mapping. + +See [User-defined keywords] for how this is done. + + +User-defined keywords +--------------------- +Tripper already include a long list of [predefined keywords], that are defined in the [default JSON-LD context]. + +A new custom keyword can be added by providing mapping in a custom JSON-LD context from the keyword to the IRI of the corresponding concept in an ontology. + +Lets assume that you already have a domain ontology with base IRI http://example.com/myonto#, that defines the concepts for the keywords you want to use for the data documentation. + +First, you can add the prefix for the base IRI of your domain ontology to a custom JSON-LD context + + "myonto": "http://example.com/myonto#", + +How the keywords should be specified in the context depends on whether they correspond to a data property or an object property in the ontology and whether a given datatype is expected. + +### Simple literal +Simple literals keywords correspond to data properties with no specific datatype (just a plain string). + +Assume you want to add the keyword `batchNumber` to relate documented samples to the number assigned to the batch they are taken from. +It corresponds to the data property http://example.com/myonto#batchNumber in your domain ontology. +By adding the following mapping to your custom JSON-LD context, `batchNumber` becomes available as a keyword for your data documentation: + + "batchNumber": "myonto:batchNumber", + +### Literal with specific datatype +If `batchNumber` must always be an integer, you can specify this by replacing the above mapping with the following: + + "batchNumber": { + "@id": "myonto:batchNumber", + "@type": "xsd:integer" + }, + +Here "@id" refer to the IRI `batchNumber` is mapped to and "@type" its datatype. In this case we use `xsd:integer`, which is defined in the W3C `xsd` vocabulary. + +### Object property +Object properties are relations between two individuals in the knowledge base. + +If you want to say more about the batches, you may want to store them as individuals in the knowledge base. +In that case, you may want to add a keyword `fromBatch` which relate your sample to the batch it was taken from. +In your ontology you may define `fromBatch` as a object property with IRI: http://example.com/myonto/fromBatch. + + + "fromBatch": { + "@id": "myonto:fromBatch", + "@type": "@id" + }, + +Here the special value "@id" for the "@type" means that the value of `fromBatch` must be an IRI. + +### Providing a custom context +TODO + + +User-defined resource types +--------------------------- +TODO + +Extending the list of predefined [resource types] it not implemented yet. + +Since JSON-LD is not designed for categorisation, new resource types should not be added in a custom JSON-LD context. +Instead, the list of available resource types should be stored and retrieved from the knowledge base. + + + +[With custom context]: #with-custom-context +[User-defined keywords]: #user-defined-keywords +[resource types]: introduction.md#resource-types +[predefined prefixes]: ../prefixes +[predefined keywords]: ../keywords +[save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict +[as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld +[save_datadoc()]: +../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +[TableDoc.parse_csv()]: ../../api_reference/dataset/tabledoc/#tripper.dataset.tabledoc.TableDoc.parse_csv +[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json diff --git a/docs/dataset/documenting-a-resource.md b/docs/dataset/documenting-a-resource.md index 495ba3fc..3e838ce8 100644 --- a/docs/dataset/documenting-a-resource.md +++ b/docs/dataset/documenting-a-resource.md @@ -205,9 +205,10 @@ The below example shows how to save all datasets listed in the CSV file [semdata [emmo:DataSet]: https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a [oteio:Generator]: https://w3id.org/emmo/domain/oteio/Generator [oteio:Parser]: https://w3id.org/emmo/domain/oteio/Parser -[save_dict()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict -[as_jsonld()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld -[save_datadoc()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +[save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict +[as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld +[save_datadoc()]: +../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc [semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml [semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv [TableDoc]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.tabledoc.TableDoc diff --git a/docs/dataset/introduction.md b/docs/dataset/introduction.md index 0be53abf..b6431bac 100644 --- a/docs/dataset/introduction.md +++ b/docs/dataset/introduction.md @@ -58,9 +58,10 @@ Future releases will support adding custom resource types. [emmo:DataSet]: https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a [oteio:Generator]: https://w3id.org/emmo/domain/oteio/Generator [oteio:Parser]: https://w3id.org/emmo/domain/oteio/Parser -[save_dict()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict -[as_jsonld()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld -[save_datadoc()]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +[save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict +[as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld +[save_datadoc()]: +../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc [semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml [semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv [TableDoc]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.tabledoc.TableDoc diff --git a/docs/dataset/keywords.md b/docs/dataset/keywords.md index 68a34fc5..3739cb7e 100644 --- a/docs/dataset/keywords.md +++ b/docs/dataset/keywords.md @@ -1,6 +1,7 @@ Predefined keywords =================== All keywords listed on this page (except for the special "@"-prefixed keywords) are defined in the [default JSON-LD context]. +See [User-defined keywords] for how to extend this list with additional namespace prefixes. Special keywords for JSON-LD @@ -61,7 +62,7 @@ Other general properties on resources - **[label]** (*Literal*): Provides a human-readable version of a resource's name. - **[seeAlso]** (*Literal*): Indicates a resource that might provide additional information about the subject resource. - **[source]** (*Literal*): A related resource from which the described resource is derived. -- **[statements]** (*Literal*): A list of subject-predicate-object triples with additional RDF statements documenting the resource. +- **[statements]** (*Literal JSON*): A list of subject-predicate-object triples with additional RDF statements documenting the resource. Properties specific for datasets @@ -74,7 +75,7 @@ Properties specific for datasets - **[inSeries]** (*IRI*): A dataset series of which the dataset is part. - **[isInputOf]** (*IRI*): A process that this dataset is the input to. - **[isOutputOf]** (*IRI*): A process that this dataset is the output of. -- **[mappings]** (*Literal*): A list of subject-predicate-object triples mapping the datamodel to ontological concepts. +- **[mappings]** (*Literal JSON*): A list of subject-predicate-object triples mapping the datamodel to ontological concepts. - **[mappingURL]** (*Literal*): URL to a document defining the mappings of the datamodel. The file format is given by `mappingFormat`. Defaults to turtle. @@ -105,7 +106,7 @@ Properties specific for distributions Properties for parsers and generators ------------------------------------- -- **[configuration]** (*Literal*): A JSON string with configurations specific to the parser or generator. +- **[configuration]** (*Literal JSON*): A JSON string with configurations specific to the parser or generator. - **[generatorType]** (*Literal*): Generator type. Ex: `application/vnd.dlite-generate`. - **[parserType]** (*Literal*): Parser type. Ex: `application/vnd.dlite-parse`. @@ -222,3 +223,5 @@ Properties for parsers and generators [dcat:Distribution]: https://www.w3.org/TR/vocab-dcat-3/#Class:Distribution [vCard]: https://www.w3.org/TR/vcard-rdf/ [IANA]: https://www.iana.org/assignments/media-types/media-types.xhtml + +[User-defined keywords]: ../customisation/#user-defined-keywords diff --git a/docs/dataset/prefixes.md b/docs/dataset/prefixes.md index d5d3e538..5af69c85 100644 --- a/docs/dataset/prefixes.md +++ b/docs/dataset/prefixes.md @@ -1,6 +1,7 @@ Predefined prefixes =================== -All prefixes listed on this page are defined in the [default JSON-LD context]. +All namespace prefixes listed on this page are defined in the [default JSON-LD context]. +See [User-defined prefixes] for how to extend this list with additional namespace prefixes. * adms: http://www.w3.org/ns/adms# * dcat: http://www.w3.org/ns/dcat# @@ -24,3 +25,4 @@ All prefixes listed on this page are defined in the [default JSON-LD context]. [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json +[User-defined prefixes]: ../customisation/#user-defined-prefixes diff --git a/mkdocs.yml b/mkdocs.yml index f666b395..db90385f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -84,6 +84,7 @@ nav: - Data documentation: - Introduction: dataset/introduction.md - Documenting a resource: dataset/documenting-a-resource.md + - Customisation: dataset/customisation.md - Predefined prefixes: dataset/prefixes.md - Predefined keywords: dataset/keywords.md - ... | api_reference/** From 1a1cbad4806a9b5ec09ce320f7473f2bc1970e6c Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 14:53:19 +0100 Subject: [PATCH 39/59] Update docs/dataset/customisation.md Co-authored-by: Tor S. Haugland --- docs/dataset/customisation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 4cfeddb2..ad176916 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -15,7 +15,7 @@ Additional prefixed can be provided in two ways. ### With the `prefixes` argument Several functions in the API (like [save_dict()], [as_jsonld()] and [TableDoc.parse_csv()]) takes a `prefixes` argument with which additional namespace prefixes can provided. -This may be handy when from the Python API. +This may be handy when used from the Python API. ### With custom context From 3f30c0072ee890148789e0ebbcddf875f4d84531 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 14:54:20 +0100 Subject: [PATCH 40/59] Update docs/dataset/customisation.md Co-authored-by: Tor S. Haugland --- docs/dataset/customisation.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index ad176916..f52cf0a6 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -27,6 +27,7 @@ See [User-defined keywords] for how this is done. User-defined keywords --------------------- Tripper already include a long list of [predefined keywords], that are defined in the [default JSON-LD context]. +A description of how to define new concepts in the JSON-LD context is given by [JSON-LD 1.1](https://www.w3.org/TR/json-ld11/) document, and can be tested in the [JSON-LD Playground](https://json-ld.org/playground/). A new custom keyword can be added by providing mapping in a custom JSON-LD context from the keyword to the IRI of the corresponding concept in an ontology. From c69dd23409d18f86c922e32a885864fdb338e67c Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 14:55:49 +0100 Subject: [PATCH 41/59] Documented custum context --- docs/dataset/customisation.md | 11 ++-- docs/dataset/documenting-a-resource.md | 6 ++- tests/dataset/test_dataset.py | 71 ++++++++++++++++++++++++++ tests/input/custom_context.yaml | 40 +++++++++++++++ tripper/dataset/dataset.py | 40 +++++++++------ 5 files changed, 148 insertions(+), 20 deletions(-) create mode 100644 tests/input/custom_context.yaml diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 4cfeddb2..7f12bf06 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -72,8 +72,12 @@ In your ontology you may define `fromBatch` as a object property with IRI: http: Here the special value "@id" for the "@type" means that the value of `fromBatch` must be an IRI. -### Providing a custom context -TODO + +Providing a custom context +-------------------------- +Custom context can be provided for all the interfaces described in the section [Documenting a resource]. + +In the YAML documentation, Custom context can be provided with the "@context" User-defined resource types @@ -87,9 +91,10 @@ Instead, the list of available resource types should be stored and retrieved fro +[Documenting a resource]: ../documenting-a-resource [With custom context]: #with-custom-context [User-defined keywords]: #user-defined-keywords -[resource types]: introduction.md#resource-types +[resource types]: ../introduction#resource-types [predefined prefixes]: ../prefixes [predefined keywords]: ../keywords [save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict diff --git a/docs/dataset/documenting-a-resource.md b/docs/dataset/documenting-a-resource.md index 3e838ce8..a85342dd 100644 --- a/docs/dataset/documenting-a-resource.md +++ b/docs/dataset/documenting-a-resource.md @@ -1,7 +1,9 @@ Documenting a resource ====================== -In the Python API are the JSON-LD documents describing the resources internally represented as Python dicts. -However, the [tripper.dataset] module tries to hide away the complexities of [JSON-LD] behind a simple interface. +In the [tripper.dataset] sub-package are the documents documenting the resources internally represented as [JSON-LD] documents stored as Python dicts. +However, the API tries to hide away the complexities of JSON-LD behind simple interfaces. +To support different use cases, the sub-package provide several interfaces for data documentation, including Python dicts, YAML files and tables. +These are further described below. Documenting as a Python dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 0352d4e3..2bd94cb6 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -144,6 +144,50 @@ def test_expand_iri(): assert expand_iri("xxx:type", prefixes) == "xxx:type" +def test_as_jsonld(): + """Test as_jsonld().""" + from tripper import DCAT, EMMO, OWL, Namespace + from tripper.dataset import as_jsonld + from tripper.dataset.dataset import CONTEXT_URL + + with pytest.raises(ValueError): + as_jsonld({}) + + EX = Namespace("http://example.com/ex#") + SER = Namespace("http://example.com/series#") + dct = {"@id": "ex:indv", "a": "val"} + context = {"ex": EX, "a": "ex:a"} + + d = as_jsonld(dct, _context=context) + assert len(d["@context"]) == 2 + assert d["@context"][0] == CONTEXT_URL + assert d["@context"][1] == context + assert d["@id"] == EX.indv + assert len(d["@type"]) == 2 + assert set(d["@type"]) == {DCAT.Dataset, EMMO.DataSet} + assert d.a == "val" + + d2 = as_jsonld(dct, type="resource", _context=context) + assert d2["@context"] == d["@context"] + assert d2["@id"] == d["@id"] + assert d2["@type"] == OWL.NamedIndividual + assert d2.a == "val" + + d3 = as_jsonld( + {"inSeries": "ser:main"}, + prefixes={"ser": SER}, + a="value", + _id="ex:indv2", + _type="ex:Item", + _context=context, + ) + assert d3["@context"] == d["@context"] + assert d3["@id"] == EX.indv2 + assert set(d3["@type"]) == {DCAT.Dataset, EMMO.DataSet, EX.Item} + assert d3.a == "value" + assert d3.inSeries == SER.main + + # if True: def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" @@ -236,6 +280,33 @@ def test_datadoc(): } +def test_custom_context(): + """Test saving YAML file with custom context to triplestore.""" + from dataset_paths import indir # pylint: disable=import-error + + from tripper import Triplestore + from tripper.dataset import save_datadoc + + ts = Triplestore("rdflib") + d = save_datadoc(ts, indir / "custom_context.yaml") + + KB = ts.namespaces["kb"] + assert d.resources[0]["@id"] == KB.sampleA + assert d.resources[0].fromBatch == KB.batch1 + + assert d.resources[1]["@id"] == KB.sampleB + assert d.resources[1].fromBatch == KB.batch1 + + assert d.resources[2]["@id"] == KB.sampleC + assert d.resources[2].fromBatch == KB.batch2 + + assert d.resources[3]["@id"] == KB.batch1 + assert d.resources[3].batchNumber == 1 + + assert d.resources[4]["@id"] == KB.batch2 + assert d.resources[4].batchNumber == 2 + + # if True: def test_pipeline(): """Test creating OTEAPI pipeline.""" diff --git a/tests/input/custom_context.yaml b/tests/input/custom_context.yaml new file mode 100644 index 00000000..f2270f0d --- /dev/null +++ b/tests/input/custom_context.yaml @@ -0,0 +1,40 @@ +# Custom context +"@context": + myonto: http://example.com/myonto# + + batchNumber: + "@id": myonto:batchNumber + "@type": xsd:integer + + fromBatch: + "@id": myonto:fromBatch + "@type": "@id" + + +# Additional prefixes +prefixes: + kb: http://example.com/kb# + + +resources: + # Samples + - "@id": kb:sampleA + "@type": chameo:Sample + fromBatch: kb:batch1 + + - "@id": kb:sampleB + "@type": chameo:Sample + fromBatch: kb:batch1 + + - "@id": kb:sampleC + "@type": chameo:Sample + fromBatch: kb:batch2 + + # Batches + - "@id": kb:batch1 + "@type": myonto:Batch + batchNumber: 1 + + - "@id": kb:batch2 + "@type": myonto:Batch + batchNumber: 2 diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index dd967521..c7fbb16f 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -39,7 +39,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore +from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Namespace, Triplestore from tripper.utils import AttrDict, as_python, openfile if TYPE_CHECKING: # pragma: no cover @@ -392,9 +392,9 @@ def get_prefixes( context=context, timeout=timeout, fromfile=fromfile ) prefixes = { - k: v + k: str(v) for k, v in ctx.items() - if isinstance(v, str) and v.endswith(("#", "/")) + if isinstance(v, (str, Namespace)) and str(v).endswith(("#", "/")) } return prefixes @@ -572,7 +572,8 @@ def save_datadoc( d = read_datadoc(file_or_dict) # Bind prefixes - prefixes = get_prefixes() + context = d.get("@context") + prefixes = get_prefixes(context=context) prefixes.update(d.get("prefixes", {})) for prefix, ns in prefixes.items(): # type: ignore ts.bind(prefix, ns) @@ -584,7 +585,9 @@ def save_datadoc( for spec in dicttypes.values(): label = spec["datadoc_label"] for dct in get(d, label): - dct = as_jsonld(dct=dct, type=types[label], prefixes=prefixes) + dct = as_jsonld( + dct=dct, type=types[label], prefixes=prefixes, _context=context + ) f = io.StringIO(json.dumps(dct)) with Triplestore(backend="rdflib") as ts2: ts2.parse(f, format="json-ld") @@ -604,7 +607,8 @@ def prepare_datadoc(datadoc: dict) -> dict: d = AttrDict({"@context": CONTEXT_URL}) d.update(datadoc) - prefixes = get_prefixes() + context = datadoc.get("@context") + prefixes = get_prefixes(context=context) if "prefixes" in d: d.prefixes.update(prefixes) else: @@ -613,13 +617,13 @@ def prepare_datadoc(datadoc: dict) -> dict: for type, spec in dicttypes.items(): label = spec["datadoc_label"] for i, dct in enumerate(get(d, label)): - d[label][i] = as_jsonld(dct=dct, type=type, prefixes=d.prefixes) + d[label][i] = as_jsonld( + dct=dct, type=type, prefixes=d.prefixes, _context=context + ) return d -# TODO: update this function to correctly handle multiple contexts -# provided with the `_context` keyword argument. def as_jsonld( dct: dict, type: "Optional[str]" = "dataset", @@ -629,29 +633,35 @@ def as_jsonld( """Return an updated copy of dict `dct` as valid JSON-LD. Arguments: - dct: Dict with data documentation to represent as JSON-LD. + dct: Dict documenting a resource to be represented as JSON-LD. type: Type of data to document. Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" or an IRI to a class in an ontology. Defaults to "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. - kwargs: Additional keyword arguments to add to the returned dict. - A leading underscore in a key will be translated to a - leading "@"-sign. For example, "@id" or "@context" may be - provided as "_id" or "_context", respectively. + kwargs: Additional keyword arguments to add to the returned + dict. A leading underscore in a key will be translated to + a leading "@"-sign. For example, "@id", "@type" or + "@context" may be provided as "_id" "_type" or "_context", + respectively. Returns: An updated copy of `dct` as valid JSON-LD. + """ # pylint: disable=too-many-branches # Id of base entry that is documented _entryid = kwargs.pop("_entryid", None) + context = kwargs.pop("_context", None) + d = AttrDict() if not _entryid: d["@context"] = CONTEXT_URL + if context: + add(d, "@context", context) if type: t = dicttypes[type]["@type"] if type in dicttypes else type @@ -674,7 +684,7 @@ def as_jsonld( if "@type" not in d: warnings.warn(f"Missing '@type' in dict to document: {_entryid}") - all_prefixes = get_prefixes() + all_prefixes = get_prefixes(context=context) if prefixes: all_prefixes.update(prefixes) From 5686804769516e67c314232435b8bb199a8c012c Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 15:29:24 +0100 Subject: [PATCH 42/59] Added example with custom context --- docs/dataset/customisation.md | 111 +++++++++++++++++++++++++++++++- tests/input/custom_context.yaml | 2 + 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 31503035..3691e187 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -78,7 +78,116 @@ Providing a custom context -------------------------- Custom context can be provided for all the interfaces described in the section [Documenting a resource]. -In the YAML documentation, Custom context can be provided with the "@context" +### Python dict +Both for the single-resource and multi-resource dicts, you can add a `"@context"` key to the dict who's value is +- a string containing a resolvable URL to the custom context, +- a dict with the custom context or +- a list of the aforementioned strings and dicts. + +### YAML file +Since the YAML representation is just a YAML serialisation of a multi-resource dict, custom context can be provided by adding a `"@context"` keyword. + +For example, the following YAML file defines a custom context defining the `myonto` prefix as well as the `batchNumber` and `fromBatch` keywords. +An additional "kb" prefix (used for documented resources) is defined with the `prefixes` keyword. + +```yaml +--- + +# Custom context +"@context": + myonto: http://example.com/myonto# + + batchNumber: + "@id": myonto:batchNumber + "@type": xsd:integer + + fromBatch: + "@id": myonto:fromBatch + "@type": "@id" + + +# Additional prefixes +prefixes: + kb: http://example.com/kb# + + +resources: + # Samples + - "@id": kb:sampleA + "@type": chameo:Sample + fromBatch: kb:batch1 + + - "@id": kb:sampleB + "@type": chameo:Sample + fromBatch: kb:batch1 + + - "@id": kb:sampleC + "@type": chameo:Sample + fromBatch: kb:batch2 + + # Batches + - "@id": kb:batch1 + "@type": myonto:Batch + batchNumber: 1 + + - "@id": kb:batch2 + "@type": myonto:Batch + batchNumber: 2 +``` + +You can save this context to a triplestore with + +```python +>>> from tripper import Triplestore +>>> from tripper.dataset import save_datadoc +>>> +>>> ts = Triplestore("rdflib") +>>> save_datadoc( # doctest: +ELLIPSIS +... ts, +... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml", +... ) +AttrDict(...) + +``` + +The content of the triplestore should now be + +```python +>>> print(ts.serialize()) +@prefix chameo: . +@prefix kb: . +@prefix myonto: . +@prefix owl: . +@prefix xsd: . + +kb:sampleA a owl:NamedIndividual, + chameo:Sample ; + myonto:fromBatch kb:batch1 . + +kb:sampleB a owl:NamedIndividual, + chameo:Sample ; + myonto:fromBatch kb:batch1 . + +kb:sampleC a owl:NamedIndividual, + chameo:Sample ; + myonto:fromBatch kb:batch2 . + +kb:batch2 a myonto:Batch, + owl:NamedIndividual ; + myonto:batchNumber 2 . + +kb:batch1 a myonto:Batch, + owl:NamedIndividual ; + myonto:batchNumber 1 . + + + +``` + + +### Table +TODO + User-defined resource types diff --git a/tests/input/custom_context.yaml b/tests/input/custom_context.yaml index f2270f0d..5e647afa 100644 --- a/tests/input/custom_context.yaml +++ b/tests/input/custom_context.yaml @@ -1,3 +1,5 @@ +--- + # Custom context "@context": myonto: http://example.com/myonto# From abbef4b78e2276c9ab0e439812d4a5caf8e2a9f6 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 15:34:58 +0100 Subject: [PATCH 43/59] Correct example --- docs/dataset/customisation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 3691e187..70db2b0a 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -144,7 +144,7 @@ You can save this context to a triplestore with >>> ts = Triplestore("rdflib") >>> save_datadoc( # doctest: +ELLIPSIS ... ts, -... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml", +... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/dataset-docs/tests/input/custom_context.yaml", ... ) AttrDict(...) From 85a51ae24c7531b370a94946b62644fcae94d71c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 22:29:16 +0000 Subject: [PATCH 44/59] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tripper/dataset/tabledoc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index c4f14567..6dbf8b32 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -150,4 +150,3 @@ def write_csv( writer.writerow(self.header) for row in self.data: writer.writerow(row) - From aa99eecf1b89dd1ea196c9cc6affc4eb1466e3aa Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 Jan 2025 23:37:43 +0100 Subject: [PATCH 45/59] Added datadoc tool --- docs/api_reference/dataset/datadoc.md | 3 + docs/dataset/customisation.md | 4 +- mkdocs.yml | 4 + pyproject.toml | 3 + tests/dataset/test_dataset.py | 2 +- tests/dataset/test_tabledoc.py | 51 ++++++- tripper/dataset/datadoc.py | 184 ++++++++++++++++++++++++++ tripper/dataset/dataset.py | 37 +++++- tripper/dataset/tabledoc.py | 118 ++++++++++++++--- 9 files changed, 379 insertions(+), 27 deletions(-) create mode 100644 docs/api_reference/dataset/datadoc.md create mode 100644 tripper/dataset/datadoc.py diff --git a/docs/api_reference/dataset/datadoc.md b/docs/api_reference/dataset/datadoc.md new file mode 100644 index 00000000..b601d56d --- /dev/null +++ b/docs/api_reference/dataset/datadoc.md @@ -0,0 +1,3 @@ +# datadoc + +::: tripper.dataset.datadoc diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 70db2b0a..197b7fb9 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -205,8 +205,8 @@ Instead, the list of available resource types should be stored and retrieved fro [With custom context]: #with-custom-context [User-defined keywords]: #user-defined-keywords [resource types]: ../introduction#resource-types -[predefined prefixes]: ../prefixes -[predefined keywords]: ../keywords +[predefined prefixes]: ../prefixes/ +[predefined keywords]: ../keywords/ [save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict [as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld [save_datadoc()]: diff --git a/mkdocs.yml b/mkdocs.yml index db90385f..815211d4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -49,6 +49,10 @@ markdown_extensions: - toc: permalink: true +exclude_docs: | + ../dataset/datadoc.py + + plugins: - search: lang: en diff --git a/pyproject.toml b/pyproject.toml index 0398f0a0..1e871728 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,9 @@ Source = "https://github.com/EMMC-ASBL/tripper" Changelog = "https://github.com/EMMC-ASBL/tripper/blob/master/CHANGELOG.md" Package = "https://pypi.org/project/tripper" +[project.scripts] +datadoc = "tripper.dataset.datadoc:main" + [tool.isort] line_length = 79 # PEP8 diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 2bd94cb6..01a90c57 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -263,7 +263,7 @@ def test_datadoc(): # Test searching the triplestore SAMPLE = ts.namespaces["sample"] - datasets = search_iris(ts) + datasets = search_iris(ts, type="dcat:Dataset") named_datasets = { SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"], SEMDATA["SEM_cement_batch2/77600-23-001"], diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 67ce74ed..0df11e53 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -4,8 +4,8 @@ # if True: -def test_as_dicts(): - """Test the as_dicts() method.""" +def test_asdicts(): + """Test the asdicts() method.""" pytest.importorskip("rdflib") @@ -83,9 +83,33 @@ def test_as_dicts(): print(ts.serialize()) +def test_fromdicts(): + """Test the fromdicts() method.""" + from tripper import Namespace + from tripper.dataset import TableDoc + + EX = Namespace("http://example.com/ex#") + dicts = [ + {"@id": EX.data1, "label": "data1"}, + { + "@id": EX.data2, + "distribution": {"downloadURL": "http://example.com/data2"}, + }, + ] + td = TableDoc.fromdicts(dicts) + + assert td.header == ["@id", "label", "distribution.downloadURL"] + assert td.data == [ + [EX.data1, "data1", None], + [EX.data2, None, "http://example.com/data2"], + ] + + # if True: def test_csv(): """Test parsing a csv file.""" + import io + from dataset_paths import indir, outdir # pylint: disable=import-error pytest.importorskip("rdflib") @@ -123,6 +147,29 @@ def test_csv(): # Write the table to a new csv file td.write_csv(outdir / "semdata.csv") + # Write table to string + with io.StringIO() as f: + td.write_csv(f) + s = f.getvalue() + + # Re-read the csv file from the string + with io.StringIO(s) as f: + td2 = TableDoc.parse_csv( + indir / "semdata.csv", + delimiter=";", + prefixes={ + "sem": "https://w3id.com/emmo/domain/sem/0.1#", + "semdata": "https://he-matchmaker.eu/data/sem/", + "sample": "https://he-matchmaker.eu/sample/", + "mat": "https://he-matchmaker.eu/material/", + "dm": "http://onto-ns.com/meta/characterisation/0.1/SEMImage#", + "parser": "http://sintef.no/dlite/parser#", + "gen": "http://sintef.no/dlite/generator#", + }, + ) + assert td2.header == td.header + assert td2.data == td.data + # Print serialised KB ts = Triplestore(backend="rdflib") td.save(ts) diff --git a/tripper/dataset/datadoc.py b/tripper/dataset/datadoc.py new file mode 100644 index 00000000..8b7b0eec --- /dev/null +++ b/tripper/dataset/datadoc.py @@ -0,0 +1,184 @@ +"""A script for data documentation.""" + +import argparse +import io +import json + +from tripper import Triplestore +from tripper.dataset import ( + TableDoc, + get_jsonld_context, + load_dict, + save_datadoc, + save_dict, + search_iris, +) + + +def subcommand_add(ts, args): + """Subcommand for populating the triplestore""" + if args.yamlfile: + save_datadoc(ts, args.yamlfile) + + if args.table: + td = TableDoc.parse_csv( + args.table, context=get_jsonld_context(args.context) + ) + td.save(ts) + + save_datadoc(ts, args.yamlfile) + + if args.serialize: + ts.serialize(args.serialize, format=args.sformat) + + +def subcommand_find(ts, args): + """Subcommand for finding IRIs in the triplestore.""" + if args.criteria: + kwargs = dict(crit.split("=", 1) for crit in args.criteria) + else: + kwargs = {} + iris = search_iris(ts, type=args.type, **kwargs) + + # Create output + if args.format == "iris": + s = "\n".join(iris) + elif args.format == "json": + s = json.dumps([load_dict(ts, iri) for iri in iris], indent=2) + elif args.format == "turtle": + ts2 = Triplestore("rdflib") + for iri in iris: + d = load_dict(ts, iri) + save_dict(ts2, d) + s = ts2.serialize() + elif args.format == "csv": + dicts = [load_dict(ts, iri) for iri in iris] + td = TableDoc.fromdicts(dicts) + with io.StringIO() as f: + td.write_csv(f) + s = f.getvalue() + else: + raise ValueError(args.format) + + print(s) + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser( + description=( + "Tool for data documentation.\n\n" + "It allows populating and searching a triplestore for existing " + "documentation." + ), + ) + + subparsers = parser.add_subparsers(required=True, help="subcommand help") + + # Subcommand: add + parser_add = subparsers.add_parser("add", help="add help") + parser_add.set_defaults(func=subcommand_add) + parser_add.add_argument( + "--context", + help="Path or URL to custom JSON-LD context. Used with `--table`.", + ) + parser_add.add_argument( + "--yamlfile", + help="Path or URL to YAML file to add to the triplestore.", + ) + parser_add.add_argument( + "--table", help="Path to table to populate the triplestore from." + ) + parser_add.add_argument( + "--tformat", + help=( + "Used with `--table`. Format of the table to load. " + "Only csv is currently supported." + ), + ) + parser_add.add_argument( + "--serialize", + metavar="FILENAME", + help="File to serialise the populated triplestore to.", + ) + parser_add.add_argument( + "--sformat", + default="turtle", + help='Format to use with `--serialize`. Default is "turtle".', + ) + + # Subcommand: find + parser_find = subparsers.add_parser( + "find", help="Find IRIs of resources in the triplestore." + ) + parser_find.set_defaults(func=subcommand_find) + parser_find.add_argument( + "--type", + "-t", + help="The type of resources to find.", + ) + parser_find.add_argument( + "--criteria", + "-c", + action="extend", + nargs="+", + metavar="KEY=VALUE", + help=("Matching criteria for resources to find. "), + ) + parser_find.add_argument( + "--format", + "-f", + default="iris", + choices=["iris", "json", "turtle", "csv"], + help="Output format to list the matched resources.", + ) + + # General: options + parser.add_argument( + "--backend", + "-b", + default="rdflib", + help="Triplestore backend to use.", + ) + parser.add_argument( + "--base_iri", + help="Base IRI of the triplestore (seldom needed).", + ) + parser.add_argument( + "--database", + "-d", + help="Name of database to connect to (for backends supporting it).", + ) + parser.add_argument( + "--package", + help="Only needed when `backend` is a relative module.", + ) + parser.add_argument( + "--parse", + "-p", + metavar="LOCATION", + help="Load triplestore from this location.", + ) + parser.add_argument( + "--pformat", + "-f", + help="Used with `--parse`. Format to use when parsing triplestore.", + ) + + args = parser.parse_args() + + ts = Triplestore( + backend=args.backend, + base_iri=args.base_iri, + database=args.database, + package=args.package, + ) + if args.parse: + ts.parse(args.parse, format=args.pformat) + + # Call subcommand handler + args.func(ts, args) + + +if __name__ == "__main__": + main() diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index c7fbb16f..cc0a9bda 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -61,6 +61,7 @@ r"^([a-z0-9]*):([a-zA-Z_]([a-zA-Z0-9_/+-]*[a-zA-Z0-9_+-])?)$" ) +# Resource types dicttypes = { "parser": { "datadoc_label": "parsers", @@ -91,6 +92,10 @@ } +class InvalidKeywordError(KeyError): + """Keyword is not defined.""" + + def save_dict( ts: Triplestore, dct: dict, @@ -846,17 +851,19 @@ def get_partial_pipeline( return pipeline -def search_iris(ts: Triplestore, type=DCAT.Dataset, **kwargs): - """Return a list of IRIs for all entries of the given type. +def search_iris(ts: Triplestore, type=None, **kwargs): + """Return a list of IRIs for all matching resources. Additional matching criterias can be specified by `kwargs`. - Arguments: ts: Triplestore to search. type: Search for entries that are individuals of the class with - this IRI. The default is `dcat:Dataset`. + this IRI. kwargs: Match criterias. + Returns: + List of IRIs for matching resources. + Examples: List all dataset IRIs: @@ -880,11 +887,29 @@ def search_iris(ts: Triplestore, type=DCAT.Dataset, **kwargs): crit = [] if type: - crit.append(f" ?iri rdf:type <{type}> .") + if ":" in type: + expanded = ts.expand_iri(type) + crit.append(f" ?iri rdf:type <{expanded}> .") + elif type in dicttypes: + types = dicttypes[type]["@type"] + if isinstance(types, str): + types = [types] + for t in types: + crit.append(f" ?iri rdf:type <{t}> .") + else: + raise ValueError( + "`type` must either be an IRI or the name of one the " + f"resource types. Got: {type}" + ) + + else: + crit.append(" ?iri rdf:type ?o .") expanded = {v: k for k, v in get_shortnames().items()} for k, v in kwargs.items(): key = f"@{k[1:]}" if k.startswith("_") else k + if key not in expanded: + raise InvalidKeywordError(key) predicate = expanded[key] if v in expanded: value = f"<{expanded[v]}>" @@ -894,7 +919,7 @@ def search_iris(ts: Triplestore, type=DCAT.Dataset, **kwargs): ) else: value = v - crit.append(f" ?iri <{predicate}> {value} .") + crit.append(f" ?iri <{predicate}> {value} .") criterias = "\n".join(crit) query = f""" PREFIX rdf: <{RDF}> diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 609fdd3d..6624afe8 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -9,7 +9,14 @@ from tripper.utils import AttrDict, openfile if TYPE_CHECKING: # pragma: no cover - from typing import List, Optional, Sequence, Union + from typing import Iterable, List, Optional, Protocol, Sequence, Union + + class Writer(Protocol): + """Prototype for a class with a write() method.""" + + # pylint: disable=too-few-public-methods,missing-function-docstring + + def write(self, data: str) -> None: ... class TableDoc: @@ -43,13 +50,18 @@ def __init__( context: "Optional[Union[dict, list]]" = None, strip: bool = True, ): - self.header = header - self.data = data + self.header = list(header) + self.data = [list(row) for row in data] self.type = type self.prefixes = prefixes self.context = context self.strip = strip + def save(self, ts: Triplestore) -> None: + """Save tabular datadocumentation to triplestore.""" + for d in self.asdicts(): + save_dict(ts, d) + def asdicts(self) -> "List[dict]": """Return the table as a list of dicts.""" kw = {"@context": self.context} if self.context else {} @@ -69,14 +81,74 @@ def asdicts(self) -> "List[dict]": results.append(jsonld) return results - def save(self, ts: Triplestore) -> None: - """Save tabular datadocumentation to triplestore.""" - for d in self.asdicts(): - save_dict(ts, d) + @staticmethod + def fromdicts( + dicts: "Sequence[dict]", + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + context: "Optional[Union[dict, list]]" = None, + strip: bool = True, + ) -> "TableDoc": + """Create new TableDoc instance from a sequence of dicts. + + Arguments: + dicts: Sequence of single-resource dicts. + type: Type of data to save (applies to all rows). Should + either be one of the pre-defined names: "dataset", + "distribution", "accessService", "parser" and "generator" + or an IRI to a class in an ontology. Defaults to + "dataset". + prefixes: Dict with prefixes in addition to those included in + the JSON-LD context. Should map namespace prefixes to IRIs. + context: Dict with user-defined JSON-LD context. + strip: Whether to strip leading and trailing whitespaces from + cells. + + Returns: + New TableDoc instance. + + """ + # Store the header as keys in a dict to keep ordering + header = {} + + def addheader(d, prefix=""): + """Add keys in `d` to header. + + Nested dicts will result in dot-separated keys. + """ + for k, v in d.items(): + if isinstance(v, dict): + addheader(v, k + ".") + else: + header[prefix + k] = True + + # Assign the header + for d in dicts: + addheader(d) + + # Assign table data. Nested dicts are accounted for + data = [] + for dct in dicts: + row = [] + for head in header: + d = dct + for key in head.split("."): + d = d.get(key, {}) + row.append(d if d != {} else None) + data.append(row) + + return TableDoc( + header=header.keys(), # type: ignore + data=data, # type: ignore + type=type, + prefixes=prefixes, + context=context, + strip=strip, + ) @staticmethod def parse_csv( - csvfile: "Union[Path, str]", + csvfile: "Union[Iterable[str], Path, str]", type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, @@ -88,7 +160,7 @@ def parse_csv( """Parse a csv file using the standard library csv module. Arguments: - csvfile: CSV file to parse. + csvfile: Name of CSV file to parse or an iterable of strings. type: Type of data to save (applies to all rows). Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" @@ -106,13 +178,20 @@ def parse_csv( formatting parameters. For more details, see [Dialects and Formatting Parameters]. + Returns: + New TableDoc instance. + References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with openfile(csvfile, mode="rt", encoding=encoding) as f: - reader = csv.reader(f, dialect=dialect, **kwargs) - header = next(reader) - data = list(reader) + if isinstance(csvfile, (str, Path)): + with openfile(csvfile, mode="rt", encoding=encoding) as f: + reader = csv.reader(f, dialect=dialect, **kwargs) + else: + reader = csv.reader(csvfile, dialect=dialect, **kwargs) + + header = next(reader) + data = list(reader) return TableDoc( header=header, @@ -124,7 +203,7 @@ def parse_csv( def write_csv( self, - csvfile: "Union[Path, str]", + csvfile: "Union[Path, str, Writer]", encoding: str = "utf-8", dialect: "Union[csv.Dialect, str]" = "excel", **kwargs, @@ -133,7 +212,7 @@ def write_csv( """Write the table to a csv file using the standard library csv module. Arguments: - csvfile: CSV file to parse. + csvfile: File-like object or name of CSV file to write. encoding: The encoding of the csv file. dialect: A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, @@ -145,8 +224,15 @@ def write_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with open(csvfile, mode="wt", encoding=encoding) as f: + + def write(f): writer = csv.writer(f, dialect=dialect, **kwargs) writer.writerow(self.header) for row in self.data: writer.writerow(row) + + if isinstance(csvfile, (str, Path)): + with open(csvfile, mode="wt", encoding=encoding) as f: + write(f) + else: + write(csvfile) From 8dbe71ac4e3e0073863658b93707389290f9d602 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 00:04:31 +0100 Subject: [PATCH 46/59] Removed duplicated tests --- tests/dataset/test_dataset.py | 8 -------- tripper/dataset/dataset.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 678913b5..01a90c57 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -39,14 +39,6 @@ def test_get_jsonld_context(): with pytest.raises(TypeError): get_jsonld_context(context=[None]) - # Test context argument - context2 = get_jsonld_context(context=CONTEXT_URL) - assert context2 == context - - assert "newkey" not in context - context3 = get_jsonld_context(context={"newkey": "onto:newkey"}) - assert context3["newkey"] == "onto:newkey" - def test_get_prefixes(): """Test get_prefixes().""" diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index cc0a9bda..774b49b9 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -851,7 +851,7 @@ def get_partial_pipeline( return pipeline -def search_iris(ts: Triplestore, type=None, **kwargs): +def search_iris(ts: Triplestore, type=None, **kwargs) -> "List[str]": """Return a list of IRIs for all matching resources. Additional matching criterias can be specified by `kwargs`. From e054557ed7a23605804088b82ecd02011086776f Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 00:07:32 +0100 Subject: [PATCH 47/59] Removed duplicated test --- tests/dataset/test_dataset.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index ca539262..2bd94cb6 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -39,14 +39,6 @@ def test_get_jsonld_context(): with pytest.raises(TypeError): get_jsonld_context(context=[None]) - # Test context argument - context2 = get_jsonld_context(context=CONTEXT_URL) - assert context2 == context - - assert "newkey" not in context - context3 = get_jsonld_context(context={"newkey": "onto:newkey"}) - assert context3["newkey"] == "onto:newkey" - def test_get_prefixes(): """Test get_prefixes().""" From 58974e8cf86d1fd7cae6b6fd495c4226a4a8b0aa Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 00:11:20 +0100 Subject: [PATCH 48/59] Updated URL after branch dataset-docs has been merged to master. --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 04e8f324..5911ba1f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -57,7 +57,7 @@ def test_openfile_http(): with openfile( "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/" - "dataset-docs/tests/input/openfile.txt" + "master/tests/input/openfile.txt" ) as f: assert f.read().strip() == "Example file." From 726e9ac548c3aeb7fd12f0a4c3181e4c29fd2d8f Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 00:37:27 +0100 Subject: [PATCH 49/59] Updated tabledoc test --- tests/dataset/test_tabledoc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 0df11e53..1c6aec35 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -155,8 +155,8 @@ def test_csv(): # Re-read the csv file from the string with io.StringIO(s) as f: td2 = TableDoc.parse_csv( - indir / "semdata.csv", - delimiter=";", + f, + delimiter=",", prefixes={ "sem": "https://w3id.com/emmo/domain/sem/0.1#", "semdata": "https://he-matchmaker.eu/data/sem/", From 3d64b34117af6e61bc17f7a5616781c03e31e204 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 00:50:01 +0100 Subject: [PATCH 50/59] Added more tests --- tests/dataset/test_dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 01a90c57..93f57a47 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -263,7 +263,8 @@ def test_datadoc(): # Test searching the triplestore SAMPLE = ts.namespaces["sample"] - datasets = search_iris(ts, type="dcat:Dataset") + datasets = search_iris(ts, type="dataset") + assert search_iris(ts, type="dcat:Dataset") == datasets named_datasets = { SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"], SEMDATA["SEM_cement_batch2/77600-23-001"], @@ -279,6 +280,9 @@ def test_datadoc(): SAMPLE["SEM_cement_batch2/77600-23-001"], } + with pytest.raises(ValueError): + search_iris(ts, type="invalid-type") + def test_custom_context(): """Test saving YAML file with custom context to triplestore.""" From 2773d239cd1beb29a482d285bb254b79eddad92d Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 00:54:12 +0100 Subject: [PATCH 51/59] Skip test_fromdicts() if rdflib isn't available --- tests/dataset/test_tabledoc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 1c6aec35..4f9dfd93 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -85,6 +85,9 @@ def test_asdicts(): def test_fromdicts(): """Test the fromdicts() method.""" + + pytest.importorskip("rdflib") + from tripper import Namespace from tripper.dataset import TableDoc From 972cca4a6bd0e1390506395186882825af4fe686 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 20:45:07 +0100 Subject: [PATCH 52/59] Documented the datadoc tool --- docs/dataset/customisation.md | 66 ++++-- docs/figs/semdata.png | Bin 0 -> 54829 bytes docs/tools/datadoc.md | 378 +++++++++++++++++++++++++++++++ mkdocs.yml | 5 +- tests/input/semdata-context.json | 18 ++ tests/input/semdata.csv | 2 +- tripper/context/0.2/context.json | 4 + tripper/dataset/dataaccess.py | 5 + tripper/dataset/datadoc.py | 165 ++++++++++---- tripper/dataset/dataset.py | 25 +- tripper/dataset/tabledoc.py | 55 +++-- tripper/utils.py | 4 +- 12 files changed, 637 insertions(+), 90 deletions(-) create mode 100644 docs/figs/semdata.png create mode 100644 docs/tools/datadoc.md create mode 100644 tests/input/semdata-context.json diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 197b7fb9..71013c49 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -35,7 +35,9 @@ Lets assume that you already have a domain ontology with base IRI http://example First, you can add the prefix for the base IRI of your domain ontology to a custom JSON-LD context - "myonto": "http://example.com/myonto#", +```json +"myonto": "http://example.com/myonto#", +``` How the keywords should be specified in the context depends on whether they correspond to a data property or an object property in the ontology and whether a given datatype is expected. @@ -46,15 +48,19 @@ Assume you want to add the keyword `batchNumber` to relate documented samples to It corresponds to the data property http://example.com/myonto#batchNumber in your domain ontology. By adding the following mapping to your custom JSON-LD context, `batchNumber` becomes available as a keyword for your data documentation: - "batchNumber": "myonto:batchNumber", +```json +"batchNumber": "myonto:batchNumber", +``` ### Literal with specific datatype If `batchNumber` must always be an integer, you can specify this by replacing the above mapping with the following: - "batchNumber": { - "@id": "myonto:batchNumber", - "@type": "xsd:integer" - }, +```json +"batchNumber": { + "@id": "myonto:batchNumber", + "@type": "xsd:integer" +}, +``` Here "@id" refer to the IRI `batchNumber` is mapped to and "@type" its datatype. In this case we use `xsd:integer`, which is defined in the W3C `xsd` vocabulary. @@ -65,11 +71,12 @@ If you want to say more about the batches, you may want to store them as individ In that case, you may want to add a keyword `fromBatch` which relate your sample to the batch it was taken from. In your ontology you may define `fromBatch` as a object property with IRI: http://example.com/myonto/fromBatch. - - "fromBatch": { - "@id": "myonto:fromBatch", - "@type": "@id" - }, +```json +"fromBatch": { + "@id": "myonto:fromBatch", + "@type": "@id" +}, +``` Here the special value "@id" for the "@type" means that the value of `fromBatch` must be an IRI. @@ -80,10 +87,36 @@ Custom context can be provided for all the interfaces described in the section [ ### Python dict Both for the single-resource and multi-resource dicts, you can add a `"@context"` key to the dict who's value is + - a string containing a resolvable URL to the custom context, - a dict with the custom context or - a list of the aforementioned strings and dicts. +For example + +```json +{ + "@context": [ + # URL to a JSON file, typically a domain-specific context + "https://json-ld.org/contexts/person.jsonld", + + # Local context + { + "fromBatch": { + "@id": "myonto:fromBatch", + "@type": "@id" + } + } + ], + + # Documenting of the resource using keywords defined in the context + ... +} +``` + +Note that the [default context] is always included and doesn't need to be specified explicitly. + + ### YAML file Since the YAML representation is just a YAML serialisation of a multi-resource dict, custom context can be provided by adding a `"@context"` keyword. @@ -144,7 +177,7 @@ You can save this context to a triplestore with >>> ts = Triplestore("rdflib") >>> save_datadoc( # doctest: +ELLIPSIS ... ts, -... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/dataset-docs/tests/input/custom_context.yaml", +... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/custom_context.yaml", ... ) AttrDict(...) @@ -186,8 +219,8 @@ kb:batch1 a myonto:Batch, ### Table -TODO - +The `__init__()` method of the [TableDoc] class takes a `context` argument with witch user-defined context can be provided. +The value of the `context` argument is the same as for the `@context` key of a [Python dict]. User-defined resource types @@ -201,15 +234,18 @@ Instead, the list of available resource types should be stored and retrieved fro -[Documenting a resource]: ../documenting-a-resource [With custom context]: #with-custom-context [User-defined keywords]: #user-defined-keywords +[Python dict]: #python-dict [resource types]: ../introduction#resource-types +[Documenting a resource]: ../documenting-a-resource [predefined prefixes]: ../prefixes/ [predefined keywords]: ../keywords/ +[default context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json [save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict [as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld [save_datadoc()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +[TableDoc]: ../../api_reference/dataset/tabledoc/#tripper.dataset.tabledoc.TableDoc [TableDoc.parse_csv()]: ../../api_reference/dataset/tabledoc/#tripper.dataset.tabledoc.TableDoc.parse_csv [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json diff --git a/docs/figs/semdata.png b/docs/figs/semdata.png new file mode 100644 index 0000000000000000000000000000000000000000..1645a5f00df7332b0c3c444b7f1ac8abc3baba6b GIT binary patch literal 54829 zcmbrG1yEbxx9_RY(&7|%XrZ{fySux)7B3no?(PKl;O-Q6_u#I9;tntU{ok8=@BiL; z^JdoK~5a;Gwx>?7#KuJ2@xe27A#qbBJJlA)j8L(k#AY*)hRE&rwA z@0-9kgx&w%$N<9=@aEqd4Pb=w2)(!GZT%mvS&t~8M}OOpe1Y*7R`BCP;aBh_))_<8 z%Kst2O(ksmBe>;(FK0DpQaHw+k+|YBBa7V&h_y#lldm91_vLTG`dH$;IRG!a7J@SmKE(#1x zZJ!7c;d+~05BW=E+6NZvnU=Fxvomk(at4=@YyqWS9?oj#i)hGl9fHk#gUiHb{0)yJ zqf2Tw!^mj|F59{Nbc^a)mYGfa(+9?n!zl+n+vy9RtP-v(?>Bgf73h5AGe3#JFaH?%x%-y7tP#oND%Tj9>?oLybw2h9G1iQL> z*EP9~z^av*d6=&{V+&e0*K?hf7DNH4*qUayqDsc=yv?o#J<}w&{1tk4!k=PGMqtfE zG)#Mw5`ilgLU1`0Y5hdMQW#m(d?b9H_76e_L})*}w0Vhfx>;(C4yaFbEH>xVbn#5t zMGnwPhaUYX?LSl2b|;a0v+>yf?WNXXcp3|_i2hJVXt;wDCI#12wI$(c<#6CxObyNa zNetEf=E^!c_vWW_|BeOT$kcV*X+R;ysW~`?@11WfgFYRwO}7p5s`aSD?@k-U=;8na zGfGHScT<70l5L^1Q}q5S?RWInn4inQq1?>=Rhe?PASt?LbK(t1dAj8fuMpB+Xwu@n z8X_ULea34m!7)AY**gq$S$L)?77wz^pnfKiS81Rn zrO^!GcLh;vu!|e*&NJm-7u{Cyg5yNht2KR|Mod@t@j5y&=h1EdL3Y(Rmf_jYj1g3I^={cEKO9+TSY%m;+GaZvM==E5@6Vh2kR{reJ=R zU&c*Bqei*4Gdff)=2rD0_ymI;cygcmW$Johp&_@=Mfqn9R z6mab$k@kQ}FIB)&+;gk@&tn2~k^nd!KuTvBfYPrdTxF4=nvt=0XpZvhjPz&V>g{d% zVE|m%twv30>zP~DE2StgNdSe?)-J28CLs)rFGAQK-tc+G^KLCB<>dD;KXWti*r;0% zuL>#=wK$7bIDN+I=NySVYD=WASMvf?o;s`3sipfd>j^aZb{o4Nu~sx*U|@_T+TECT z=86I&f#aw`DOWop1In*g*m^knC`d5bepMG$vLNo(85xgJ(1^9q^ZMeBDX7(egz#s! zjZK;4Xd`ye;ua?Y1If|?y+hKC>)@S0Xw^}%EwqoZ0ySKKY0FspIyjItK;f;lHjkg- z&xFWw#LySCa^fTw63pxVvZc&4SJU%s;gUORo?JA)02I;Qwa|FI)CLjgtqedB;(Q;e zzy5Xa7IkEwkh;1G$iAlXj)H@t)nm-6VjT-@DR*~!#!Z@yXjkaf`iSXG4qV?LQl_-U z>O!HRaQaMgYBv`_w-g))+l1V z&$C@$8szO?@zf=h=QU%8?>|cr$ zJE`cOWj=0jf2Wu;{n66|oGE&7dG1R#NfnAs`m?J|YWH^qyA)rV$G@;<_c&X*ZroyY z6($eVNlK#RC8#A}CMyAxGF%UyX_G!{FI2I(7z*Gx?K`e4y&oP>wNRA6iu)rj1!G_l z7BcfaQ_Y5ZigurpzU60aq3A7;=dt7*`HsQ}2FB%o`x#u0sB2~-UX=c1C65xzMyRVL zk7cCE0vISAZ1;qoMOWuYi^}6Dha+!E0_^imjp5apqc*_E5mBq?*ggI*u`=7yg7@+R zM`e((>_`=$+W#t=7-X-xF!$y1&0UmY)SK=3aqc?9fuJncw%4TY5E=+?*qn_*Khid9 z3R&O&yGE!%f>}@Qs z&T3KDQN%0!2m-jOfW1R!5kGX8&GyLaK&X8$v9FNsbFooe+=7nOxjiLq*?L6;h7Sw_ z3%KYut*3p?XaByZzr@AWpxqm?6M!v* z4fuQbJubCATn5M`Q>VPPB;3%bNN8{C(jp_qpCDMMaiD(~w1mF=pY1n# zzTk}8oJ1~-?YBE(0JNFn+kcIXQi24mY>khY{`SB|$)x=|$ba4#V66H|Rr|RgvVFYS z?N-g&w*M{KLTAF$pJQ-A^=%aFJ4F#H&hLG{4K#)ZczHp{>}%)H`0sZ{U|=9DENp5E zxesB5rqIs#oh;6C5~)@#2mIJVO{U6$y0>`lEE%)6!k!CT=v(Tarokhpq_=; z|9mok_pxFAUjxmbaQ}9yF#mI4n6g6>aaG*O>CnJ2omm_P(aMKxKnx;~SdT zWM4ire3EAL)D5ifS=83`g}|qRfI?Bs8@*k;tBj7@^`Ak5)=(pw-mbzj)PCz^l_K<*!Kk}G)?Di0d3|H#njs?1bcGz(+EH5sfmU+zN9{# zI?1@XVS6JLyIR(0$B%>shz{sTP1F~hY?hUh`rU@W<7n^UhA;;iaob}E!-z>S)44$Enq9V<&`=Xk8F3|6RzAdbSL3%jhhH!pcpB(ay z&|iLcp~)zSQoN&_>mo#Wt~#hNP)U2iNEO%h-g0Q_d-dn9$;uy@wH~J#@b|Bh6ARbb zbpVtsi?L0y-W;7jcBa%QYF9XX2m+LKZ@ z+&{_aEaqpcg=a24-s1lm8NASAb5@!SPZw{rOPxNQyvInEkD#C)jy6hRcCs;bQXM=B z+g?>1tk5FJ@=PtDLRh6Gwr3HiddEl-L9;ni3;O6*hVOH6Fvh#UR_~VQd}@aV=NP}? z!1+swe^gJ8{4!LF{B*b!$5T)3fZbjJQL_KIt~RUjUiI@SI+v^C+?;0n7P8N!1 zCbI_YpR2e3Y-Gk+2Y*1vLJlYQ| za8cP5Vi*n&9Z@SaqoX#0&j{i(>99ht^k8N{SgZ;-9p*szcSle=A8pNCo{Q5l6Hd(j=`>l`6ZVDxHRb-@rQpw<1z!6wRsyb?;QDhZbC?k7 z9_G935{R$NRilO2Y}>-3UHM$zD95`-MYM~zf%f=&xYkJ>+??gsm7>bvYu6Tc6NTz0 z0;^F+h_nLJ&qay3O^%7WK;D7Muacf$YAg;C;w>wX-0iT)Z@PPtj89?hI-CuRgrjGR zKAO9Nw5R9#hu0ciC;HS$E1xR!^N;44tdYm)2-qwy{OA!#n)wU@?mfKwd>_bwG<`~* z>sq=1ZY;_P>%a3kG2ndseF}ZLwid@VUtEf-u@EJR?fT)shorZ`Q2=knOu3<~@!K!( zZ6C7azKsU6Mv`?aZBMMN^R-xVvRha84wvg}8{(byQ9^$At}L^4`0G2ETbIoC*Q>0K z$GblFJohwq$rKNWWf@ayN*Y^LB+v4+_Xa_Y<6{4Dtmf$qrWgC6<1@;5vb#i zC^T8xqY7QMAhIrW-rP*<5{`+_1k{u-gN!u=r^l>sYxyK}Z&fL7wcMLXnSVc{!I6l( z9O{bTLKG@~9j%KfB8~YVd%kSWHDti*1KqMtU$R`>ZgRx4J{uBX-rwMHZL@*K>7Q$@ zZ7whD=Fvid(fSh@?8eG^glW?iX^p_Z-u_yx{d+^H(9ZFc9p_IUq~0p3Q*o>ie8AIO z(X-+A!Byyw2FFF{aPt58t1&7A4-@M zJU>UZDw#Z9Xzy^nO6+8inWe zy7U)s^Y_0pSYs}xNorlVy-IemmW2IvE9i>4{M$y!Ca~0gGlMEI$WRX0|YV{1p zeC76ZiLPt$z4`b@hQd<~yvU*!;6PisH$~ahKq#)SVbFtgcx&DhqyHEbq z+*|-|*2~7)Px$ap-j&`sMr7Y88U1=Xo04-Lg);%}O)chmsaQswkhqwHlq#b+Dsttc za(ybT1VV3;D8rOHyv~Q^a2`C_Md!UUPxgs20xggRq{UR^W)-=YS*E*0Fq5sk(Myd1 zA@8oKA~nDj&x<_K5c__levH7vq1v_94cP8@KRVvbuL4qR&W~{={6UrYw@!8GbiUPB z_PR&%CB_@Ev$i}sT0q79WQzNis2%CGww>2@$z+V%{pOQNv`;xXS^~BwXP_avBA&Jc z#W9*`DGShJWB!u8k_e~^Zf**3J#M=c-Xr`3n7FD%<(S2-dD;jBsgIv(E;g0|f9Fiq z3|QXlGRK%>(d(Ja8gI*%@o-y?J}b=@8hnDxzKT%w5P#_K)C8hEj{YK3F?pp$ti$A3 zz^k`Kr!l(Zl0&z}gtOJ&)t*R&yR6Ig2wET-zhzORfP+Z^|#$3YLT%g{^r~kF_A&BYktv zLpu4>ydiyB5Pcy{%Za>VE7wx|x>XoO>zcT7lTP5U7UDWJldEv`pp7_T@^1!&FP{0) zc(Sc5soo?)3vbbvSBq+27d#J!JZ-Cx!`mJa8=A(7FRAMBT=acj4jdKUH#GEo}z`DO4(_A5Km;S#DvBh4@+_Nd@mlR|?5&5&y&fLE~uI5}uNEUr; zhCgXJi5%krr}N`#aI|tsT8(uLPV#Y5!pZsV z=B#mQAO&Ek4P&Sk#SWU{LSYWG|LD;)*b-F1A9U(tyW?0c-*jThTWzwYJqHLgNfW3*XgHwC7X3=_x@JHLpIE zUrD#JG3<8`dU{VR%oCU1P>HKPKqz+u-e%>Pqt&=dd$P2)gmnK?tlgv9Cy8h?eJca5 zKPQ9D<$})|!45Lqin?|x>Z`}%8Pf?coIaDS{D!WU=0gu08uj>Ox@>hBTxJY>m6 zUF)RtE-(W+8yqJh(!vt9BZ-*Gd+yjTus5PM-SFXCpOnG~4}GSZTU5|YSMZdy9++bLPWizEx z7u3LC)jcV5fUPG(9Slo=&IQb=wXfsbg5PT6aVoreLfiACn>+G7NA~h=yhp(TFLtWJ z(5rI+>=ESP8lN~Mf)E1Hi!d2~+ZJoRmO=W^NUX$X8Wz6+I&%UM$tqe8nlmmN|Fsjsv4qNeMRmq5JM#nRo9_!B186WVuGgCO4ufD5eqrZ0D zV4W{-ocDoXMCvyh@~vYwMFD%z^qE{a#os?PKX4Nz-Xr=%hp1aXcejF?&Mo+j7-mD5 zR3~S3gu({zfr`%FaoEITi|1s(ql;yC{c7T=X@j;ZElg8>V3Od7o{39#*dT4WEu}+y z-=<^GwRf*h5wq3Wpq8|adBp0%oZbd6Y2HqlJvVr{XRgh|(9&t*q;m&y2i1TbcJK|3m^}=XW|~vz1W|FH?tI>kA7B>^qvh$^S%x zi4=wT$>G!Ixvvaa_G@s-+7ImI=84zTic9n5!zr8W`OxLc7x$U@jSyV5C0Dk#;ysYR zk$>>cOK$18{V@~H>iSTXavyI&gjI{pY2<;u@J z{w@pmg~0Vjt<+QiB<8Bw9_w=HQW@V77)41&8~{z>#8mQZsqM?-86mo<6~_-~uBUow zq6PzM6`HMR;O|W?s)pT>;f;p{xaB!Buui|(GdP@`+%kZYo z_hY`DReX6!$ZmEcAO0*jko$;cafW1V&)eK)JMRlKFc_}wgSQM4mDM+oE`>9U9awRH|?u6@kjWJg~y#qgW$IHJNXsxDi_x04%K9~=xn)+;u^fN!Mq8cX1k zc-yk|!>w4|SWgTyDNM<$#pN<_g^HMClKj$TxByZ`CyIdft_`!meXTTOx*G{n+t~H-miIF{tHHh^e0P4Ra`M8RU#gWd?RMQZ&E^B6e=SVz*6IuFN3E&(F zIT3D=Qp=Y&akRV!uw*aXR1qEezE(Jw!&I@gd)QW$s~;rRSjAdv4Oi!KtSbckkl{qg z&~&iK#=mbkDqd*&-3``~U>q768g=Wu>C49_(ks-UQ~jPH&$=QNqVOpFA^+~`PO{Do z5*CFn3BY!x1uId0Cl46Re7$5DD>MU!2?@bThK9;jU)QTr*F8Dyn8QMLPN~N4fxpK_ zbOreK1~8z}mcSYg@fYF%6w>pv^`foPCGJLDpcb!0nveCw%l_8y#8s)C(G7~w;3B8- ziiEuIud>q}VttGvxc78I{k^BBTTR-}P1CoiB>^D_Kl==l&_mu32mDCUtO^dR>&oEm z1RO9MYo&)VmwA`YT2D5K24}W|Rb0_bPF@7XxZRsV=WfRZKdtiU)XU8{JDaK{Gyp@M zwjU)pAtw;ioGWSY>WHv_9UvmYGz3-@jwE4lT<(yfPCy_;$S|am0@io~Pfmx6i~MEX zJAAL4-w~3?n8eQ?3#m)6D;4JL}6(Rk$r z%nwUHWSHw#X*`g4A2w<>`a$a(O}u1v)kB}i!wNsy+8P5@+0jL3&<(MJeuPNIzI^3A zlB#SmYUZ8=G0K01mo~?~W|R_a{n@~WLkOG~3P%e$`% z?(0#FNyN0GXQuW?)+np*^%2__8?&C~*KfH{R9u3r-Ag+p2EpFleV>y60ygcm27dQa zW1Yqgcd}d$M|s(qZaWsVWZwXU z-3D9RTcPFcw4xzT(z94wJG*p4?y5ICe`3F@GO|n*5DHkmbyHH2HqRp-b8K>ny%ouwoju4|!JO$}t z!;I0eV>H>9a;`mZy_g@*)2q%o_H$HG=<^*G^qmcmWoed&dn`v5~x5 zz=kEM^UvcAeZHXOJuV^AyxSY1evOrEO}r}nWY|HV-^pIM*@h`+)mI4A0b2Gg56W9| zM#B%ScX)iCNQNrqFfhE>#9szQupn&NhgWaJ-GaPoH~@djjh72-GZGt3lt|YAiZNw4 z`CFI4@{Xh~F12}Zki%nIqs+pkgi753RC{8tWR!O?elBKRtST1EM%P?Opaq9T1oG3h zU#^Hgsh7tue&UZ@p6KW3RGIDW>Az8eM^{sOC+s`(HJZ^K8g>+f2y|1C-pcFS=a4>) zEsF@%tV3eN+ahBW+4S^UZeeRhFA=`aAE>tRwq2j7w?=v1jJ0}$gMZjNH(0n9Gq&gHpU)(PILL30Jv`ra?- z<;1QJ1vt<-i_T*u;vb9hKf5W@FrGH}%8fUh$Ytji+v%BFsuWiE<&$+}5Kli~* z);`X!73{?sN4`0`s&i5z^*I!EFDXzxSo4vhyslh*X9OCdQHN;{8(JTXQu$Yq;T8z* z-8%eKlZ33+m^twkz-IicYfygr$Ef!H ze{v)<`mQF^5<9wlzV`)kCD3veUY zlJBv4geFAM`hoU|R=L~tL{W-p`el{HY;p9RymF++vjIZ0k^eJUFN}!+P_rsXY8rx= z=ka>AFp|M*L(N(N5iA<$BWDb7gtOWEF5BF8I~ZKHMzQ-4@jxcspn0#O+-sHHPW_dO zbfrYFRx2(+`Y~F}jQ-uA9zLg+zTHB$rfs=><1hDZJh`%|k#FSR80+dm;imH$+=r)O z{NrTI5jsiN=9a^ZK}|||t)d7@y2IBKV7)TbT;rx$Ef`^T55i#sy<%kXqG()xlbu}d z^HTVxi^1)W72js;5~)2W9}54P;DfCarqM}y+1Kye{HFSt( zc^Ef=8^r5|-H%yczHHez1$(2OV$1_{af&u$N5C7q+Q+PjCG0dU1RkFf9PAYlGXuD_ zJh=>}ejgqfZ)}U@t>eg0)F{w2UMR*hZwOdHo3}mqP7)Ae{r*1uEa;TS{jl6P9+TDG z>mGCAvPK-s6GkD5)}z5ScKzNMJg#mj^d6dHKLYKBC6{<#@$4Dz^27 z{aU6@lF(Ts2JfA9V-E<7$ga1uUW&UF_7O6d8C+H z9j#Oy;2DQkTE~Am45l<367_gxYiJ3P_UqiQrxykWF3jZpihG_X+OHL6Q-qxAhF9p; z-mPq~j-`*U@;$q(5FZolKdcCAHUOgLt^|I>f2%BtgAJpbF! zWg&8%ZRg{@rSP$5VJBaw;Vxp)MeaUPM=SqdD2&45BJIIS{qnlC$iVmwhB8E!=8L|Y%CJ!FYj8Inlwk^E()w$OUGR`{|gL+;{;w=f-97B0v8 zVH{XhKUBHx-zp}1sK2g`cKOW&(zc6rK!|95u0QhTi*jUSkZN70itZssFARzh$0*IB zkmwPWmy^eC68BkFB;-`TTQKHsvuktrSYKlf5A{U$i%{70tIJM5X81te$(M8GN2V?0 zg;$-LqXO+Bh(EPMy3gU!u=9I>(MM9bg5ED7qo z+E7yX)66-lMJuyK6bw+U`ZD#+VLyXCi`v^7td|-V_q;)C`nMB@?S^lF)@tYVDtKO~ z>Qow9BCpE|*MyZ=gY$YVBY8V9LUmp--1sSLs_K za~o=1J$_e@KH>7>Zm~Fn#N$bXu2}|k2uBkx28pgcfACzncqV?^kTUCOBCfkzUBR(N z(ERYG1BehDR=(T`*p2M0AebGny+HvKHF>sTmHFsb#8F&$tBoC?zVpH?Sh7>CFa2X~ zue1!E+uU&R`8zyilV@L}Uifx@3dw0%=vGSVj(jDNHX|(KsK}Ha4rd-IB$TA4`sSbs z%WSun;%fKvJSn|wAc$8>LA;kx3xAxVWVOxjdawLguRs-u8pEiiv4B{b$!5D7Bm(N^ zf85`#U0E6*A%RZCzK(}&Cf)~mgm@1NKWjS0iyllfL_<-d$B7p49)6W@7~RH2jkg>p z+#EC*fD3Dl8=hD|1LdZIoO$(6eI=~aUsWCXhHHeDeGt779*gSX9BEa4Wh~Q4!5SR@ zt%L;&)NgVb>?}jwMk;*8p#TJ4O4oa?vq?;)r#Ake58Oy`{8LFMr=DjWD(2qmrRB22 zMjzApd9{X7O9IZ&L{eXGmA4h}+NWMaYMV^>l4rYD98gcM8^}DV8He^I;?^E2mCKNe z&YpLXBt%a&GAbgd7xOnka$)!0qC1>T@(iKtB!OfF7chF`21Yx_Aq3VHtBpLy@ zkmsuC8LD7%th^oEoKuN>>ymBjNhoIsb)Vl)I&D80k2~Fm-$M(En~N!2rGk>6Eef$>!;BJLt0l>r%W66A{cdFLnH|A; zjvC{4=~w}qW5pcBS6K74eB7t*uAepjQe#v$Ds%j=oeB2WV+Cl9>5S$c7f%yS7kyIqN?W z&!ivr3TjWNIrU-KSCOhEMEWcKm}B4XgFrza)z2H^!$gwC$&Q>e>n;^_T&o>{X@+QS zZgEPOx;MsBrmhM73VfTr3@1~bo-X{axi?0BgWy_6&?nCaCxtP3tvq0e-)p(yn;?GHT}`Uqy={jDr;7+N{R9I@ko+d0(ZT)Jq)3n zMM+VoDxJTg^9m%OSF`AuP_pXrXQdrEFR@bM_HSX?d>!a=Fl24Z@UN(a)y_#(6ny=u z;Qf|Icu#GqGHP{Uwa5nZh0y$!z%8)gb}*CiWzPhv(i6J$?Ve70niy7UkVEFC^uem< zb+HmQJk+q!OCD#}Nor_d(~Xjh^pGI3UtCY?z?yH#XK5 zhE#3MwjQQ-N_Fz*)x;>w+ct_i(4~7M9P!ZyA8cJ!1d!gWd)Gd)mTzqvkmQRd$@P_f zM9u{4XtrIz4%Z|1Qo5O|B`a)6#aDbcvoAT<&B?%IfqBoFMLZ!KJUS2gQ|fw?q0o#i z0)DCdNei(%`QJ6OO4s?q1s02%Z+}H0*~s*o&Tv>VNFBH0BOMQQ*mKZU++V+1-8Z!* zzD6DR1By!9qi;#q@P40T5u4~#joanFcLL>R`P}aK7CM{}28OS+{ua!rC>aGO1_YfK zn_QNQH+yP*q}*GeP%!GpxTl9#mR#`>Un(-4a@dQ6yGm60EWS4P!!Hr+mNO--^1z zq#8^|Yd@?12dWGgyAK%FHaLjMSrgn6BR8u)`(1un!|nG@&X-U>G4c~2Zm7DXz*=;A zI4x2`ECPB|VaXdn&lg?o!|?kZ``5JQ2mO7yJWuSqcN~AkDHxb+7^#kWGmGS?&Wy0} zhX_y6on~falcYpY^6s_i3KYgAP@98+xsdiL%1I*oHBj-HuvIGa>AEMFKGdE$wBI-1rpG3f2;m=$c31tKgR1sl_*He!)4g3P*p=QmZ70f71-K! z*L}b@<^j3gZ%-6Fhi&bnSzjvfPCgN2X zBnin60x$AN6tq@*RF-bRKgZ`**N{&0QIj>81HemB#pyGaLzuB;3vVJ>7mYE&E!UVOi_lC)S(O6RLm z5j=f2kV&K~CKFCI@iW`8M9e~Yo5^K6&G*y5ZXNvBHrQOK)RkH{2LIhjvN1*eI9(d{ zR9NV>3{zXFP!O`l zWT(|>5K7a1>n_h%VaeVLYQU)npR*Cp+}i@Z^^RkHEgGuX@UI?h!9x?+02ixx%*$67 zI9v`xO=R^pgj1YX+zb~EaQ$jwpMslvxbQ7r-9_NPvIo!wf`2N`~$4u$d<=dR*s**@Yp zuYTJ}vzJP3NUK#>`y0oV(Ym^8vwmB3Z(c`C_=xb-IxqL+$(E6`SX+L14};*fUo~cn zy4Drdg?y;!r@dr^6PVA3Du&*PReIWO=C*eV@1|E>xG41Y^->1xdW;d_nG`wi{>t@1 zgK(Q>^KcLct~7wCEahIFYBpE{OPgc)Je(u)6!Pjew535fj; z@N_voxZ(fa$n9f&T;AT^2ESf=WOZ-*@_n)Fhq5)ix*V&q8Udx1gWq1+&xYUX-T}{9t|3L2N`ifLMpBP&smAU+#M`Nb3 zXHmEkw&M+?HG)u~S1;zzY1h55r`gqFwZp{47j8hAMVF)MkUE>)v-rlrx6sAb^WK$y z{$e^d$|AZn|CO=}%yLJt9GSB!d?i~dt&Ed43}VDO2rErT3rY!w4=!?d9<{|3ym_Mk zuD`Y+Atht*iuHO5zTxx-Sm2;wt5mwY4soA}k^NYtW;BGZ1QD7FUTcd&Lfpt4iNg+$ zmCMVTp~o~{bR)}$r0jK7`DM7z=Srr4zQ!40X*i4I%#X%UQ5gFk>eGD*qRG#cT+!r! z`mzoUojiw_yy9+pw`o;<84k`#VI|J0%seRA2{!mW(NZw)s#J)Hyeyv0s!(1Dq5Z+ocUARcLa!iX8+cxs5jU2bw-z1C}OT)ci6CVFo+!^P5BMu$sE=77M z?Pf*}^$S;NT`zT$EMj%l^wlG0z3m+vYlWBXN;noh?xhw7(|J>dhffn0Ee-Rbna8RB z8-S)@pz}zG(v4evem;R~e|ih?hZx4)p%=zhNCKtjC_)@s@aSGGi>qDq2?lwwgWJ{u1Q% zmQ7Kboc2K!ne%*0=q?56hE_`D8D+^s9z3L+-$$KoEkI>*ZzclvADlWhCa=)M4Qw`(L z3KK=szGZr|T1fOHe;DtRn&P?K@AilK)P^<@+>}(h>^@xq*s9IEGR$qPPYN5B?YNq# zs2&?>)s+6`%ICwYz7iqp?rtH~YPIGH%@ZfAfcwGTLq2RF|D7%@c`>w+oIsK}##HEE zRU<{y=ES-^W^h>8&X{Oy{Bh*{jSWJO+}DB}Ep9v)FOe%7T&lKJJ)t0P@3j_dy?cYxay@#1KGRgu3(r%%PP!Q% z4}XQ?`Tfx_QM&p?O)ZKx2S(+LX@BgGf^FOu&L)CC|DF6Z3TfEe@Xej>MwYWt9hwm% zt#+hcwm4|vu9iZMK1YOEWfcn%5b*A(D>et_-b_ba&<|4ZLx15@dX?H~(6^#TR+Cj& zQ>I#3niA^s%GZi1@zAbJlBJXWL4D%7TJ`XNQ*|NO=hNYUz5-a{OaBl{0JuYoV;IhV7m6rhl8ugj<}k3>llr-BXOBsq?$c}-%k`_fVe{u3v?Ki;U#rntT5 zL~hWr?Wh-|nDi}xLBFyR8(Rh3CuOy(#AUH@0YVjjScE_EAQ{?9?Z~;gEI2`_x02$O z9e8!DdP-CJ>f;9fSX-=aQ>8*^ghnuhU$J5(263`*xy1`>r2h3yXno4?a4A^1O@B^; z^ber}NhAs_?Rb%3Acl4Wg;bvokJHyy_}J$!Xt;*Dw*y@-)9U6Ed=qO>sC2O4y8fe6)7v$cK}nNoZ zbrljRCDA-f5}b4-zbD}Fpm2AktPhR4(J?aEwk`ScIk_~Dune z2-Nejbt;R{IbV&^w70CmbA<^sfb~jm)0}m{?2YC%2f$mA_Wm!P0j*5ysXAEGvjehcPq(i zzpu-1zv@a4NoJpSqbEm4$01BRFFLqLwod1s+%kfgopE{YUZet@0L5x~j*^Ds`q&hF zL4`ml~Nf2SRzL2O2_0ZE7#->dN1tkQS2ssXY|PoZ1*}kvb)Eap9nHdLww$Tm1tZf} zR2-}6NVHz_;pa`x;@c!ApU+_B|XX>;og{Q#7RFCFzF0t7MDbh>w#YOAsdXhUUj}jF{ zju`sUSJ!CnH(Yn3BV8;u#GV|WFbNgwwfNEfGYqTNVSQRe6!@?gXU*L=_r2}9WfY(& zZevcH-?FD8D)Eln`;J0}si6RCpQy2_U76NWhy$BS$A^2>`<6Z%duoiI(HvR*3}IJ0 zz4^}dU={CP*5~W-Q_;Tpxqe{e@QU1vtE@6LnCqu;z;x6WpElT zny!_9kaU49mckn?=*^K7G(2gE&``3Rz7~N7ahi8&W;btdqn6hlX-GTmIJ5BBFEv-6 z#@~-x>!>|{L+DuOPJ^;1l{Plw;bHIPS6wf5-vAcs5MT-Yk|j`VRTP ztVB%H_6@jsg3*)fBY<-x*x4W4f|2Pf0xeWG>R`#_={T=;lv z+={2XQ;|T{VwPEquUP3k{RIsPC4qOz_Lb! zt2s8ajoTVC{9Fr@lG>1y=hAjFt4M+=l-|=!ZmW;weJL;0*kIj}5!x&2{yg&JB}|5+ zAg9an(fv)zv{|EW#bly67Bn69JbQV5Y}s!x__hT6t{wCnm#6vSH#)B}2V!V}6jAya ze2UGYhna4Biw``mX3@AZ-I#|}CPsk)o}_BBzD6s0trq*ZtTda^;<#qda<^g+G%SDN@gF z6ZV($!J>mWL9v~BOOr|)Q^x}1j`>fJhds8p3v0p5YsaB~>)(@v&rzoB*YQ7_lHZAH zvT0t@)yn&iKI12eqOlCx9%m2f;4yUdZ>gZ3mn_enD@|gh@{{!))vs{3t z_8~R=jR7u?Iy?U}w^Ev zU-zhXu50}5a?pqq-I;vUoa+8?Cw~Ub3R>zO z7jDS%lXipfU$S?7gJ_H9Gtm<(CHYi4>@FGsEGY0i=~rjEolU?HQgqux&Gw#z=ZTs) zm4cK1Mc7*g$JH!bq5@mY%(9r7*88pQ9cb=yrg4)`l8T$1>=(JRUDxo+Gh zO%2eUqDJWv??s86lOXiZtGy-V&`C3tPqk*z`!}>jH%xa7cS}uD>@6`II`5j9;6G=< z?wQRxYTc9kgH4=fMO&Gg64X~jF0`k7zcF$iT_1C$-B$bcyS~azmRzMNwineKnLfzV z|A#90+@QwFM10Le$ouzy$I|;RzA_|CWOtao8Jv?uJ!EW2nm#=}j3(&;(mi0#-w)R( zx*eJqXY}wZcN1xwt5w`3#@qu7k}v8Po!1S{i!e;HZc)fi8P5{xb-Q0Pv~^mY%5nEu z8y@j1?Vk^=96u1#Jvw{n7|MB14JCee8Ek0MR(D*r={`)Iosd?j>#%NGN%gQb1uFgJ zJ0TrLigvvjX%;5R7l79uSl1@6;M~p5(R%NBNXimVoNrK=I`-H1Ex9Q>$Gfb&bjy;F z73y!V%yUK=)eWk%ZQ^;=Qy0JMwo_3qB-83!RW8OPI%$HMuF5 zUd)y>tp(6=u9-S{v*{nsX3}nWD(K_0AU8%2TDXU>^t&DQDCMfSN{rEZ+T}R4mM|3g zGW`0LSn)|mfs&l>UUdsaf(h>BzNRog`@KIqNW| zpG_t(i2eYY&)p>?O9BZl|OhAUuJ7~b_WsbU_pAX1t zxZu)9WRo~eg*rhJ`mk2&Olfn%0lj(!1Ic)@_l>y0wF2$;ZXZ;(h@4>XOrk98ZK_*FWlIxO`0tDwD;DD2Z}Y@9Rehc?x9UB*}9vY z*53n+_XZhyqR$udi)$Jr7an%Gwp8D1`6-`|a`TDWJk+yXDCEy`vF=~pm~j2A)_SEv zP(=2o+}}Or=L;{91tIz%|5FQ`V!YwAaaw4fXDkC7HBCT(Onn1y7ym8~3xUjp437TJ zp&dJgiLr1r-L0t(RH}|nSH~<8>hxqvL+M5=?-;759q3s@UMWF;!lKKsdXrYpOT90brd{~=>n{?*For7e02~>5)QW6*dQ@~u3W{LEt$;$w9%TPGiZ@>u zUge@|R<;p?N*le27O|o~7w|-&HFULp;zopPI^`+{nV0~nDo8?E;e#coD@RZE>2Z5m z=w_9Q6+!Y`n)&BNhAXl=4r%RUyyWjUVD&C z+sdQ(bbnnuJ>JxVZgJbY>%GGrjujAceF}MUlTCKJ9w9^wDWf~}V&hVz|8&`|h3$z- z_gO8zT;0!FZ%g@&BDhgcgU6M_297vkt9#(cKyRG9Ggp;Sv!@Q;`%`P{r&wDf;bM`L z8*VFT8)sq%ET4$VNPYfaGVB$1o^$l|_HyR!p+TM1Zb>rpM~HN3Rs?Zngo0I3=Fw1S zj}8+DCKblq;XX-m$;u7h=48`FEc&W`8rgYHg?&7qm3Enp!yO?*WlQx%uVNUIs8WeF z^-cB`J#M)%r(XFiPuI4-ByS)}-!aCSQxo?mR8!?vd0f_+<47A9oJeXay!l6Riq(24 zH3RiVVVwQR0-ND=5Xy7KwDTv8ZhG?_Iy&U$%_`T3Xrpxi7E1}1Vg5su4WE~!$-N_= zU6-G$?{MYj$7XuBtJ3}Ci``Ng`=Z3+zJuQ4w47>zX~Ws9iO;9$OF_nC0*7q{C!)qj zoOOPmC#v++1Xa$djJwG}aRDXb^zD&nn%LuPCR{QCU`k>Fve7%6DPhmM<5r7$x)#b} zpfY~#;aFPQ62aEky#K{4GWq^b>+gI2;5oIIyDAFGAQa)?&G*v?GGrV62XSZCG}rC) zK;5)7dCR7%EfMfsy8jbV@b>G>IR^qmmW6JGe4H$+(`dIMc9LxmOiAYXmq^*o$)HX% zwG`|?@(2(caeunj|23xO=d6W{eY4E@{vO^2cw9x+;;qf6cwLk!L_9cqeizNbom*AS zG&~0s>HivH;JrIrD6UTkK*_a(8!J&$90MMavLecy9ORn}UT`q(jLa#=1wr zPquhOr6W~=_X6zsCtcnedaT}S*&2{TSQ`9q*24m)>K&_XSF!>aahHN47F5Vwb;%lt zYXrK;)b$MwV{Itf-@b|EP4D{Z%BU2UdQVEpba=GFNv$rGa))Rtzu~EZI7ha^(O-)U zfZtE0(z3UD_hG7!Y4hi~9kK^P{dLMi{NmJ7XDa*n^xO2(rlOtw7`0)e@hIZeW$jzb zCYx_4u$J*y8h$&`gpggJ;zF=C>#ZI#gAZ%il^tuRE$NBD!HGt*DgZV3)bw~6>ayUk z;+kB)g>)90Um7dBL86Pv_l%yzsdggUl(g`N55v)IIOS_)CVRrQ`h5AMX;|-=Ny|O4 zeZhmK;+JF)e00Ooc>-{;ly}Kk6P4DtP=cn`3{x za&eVp((~No!dNa?s`UlwM8|Jq{amM7UZ04X!qVS46Es2Pd-6$AxrSn?wVMU(du}j0jbS>JcO26=s5${!cbzDwZ?izz>FqeP=dw+ zThYB}He4o&j)MG=)$gxn*VklRj*783U~Bk3@s{M6rMW#5e38NIg+394)2p!wy{OC9 z!B^IV`Q|<;>A+&bbZrAM5CHIVFy0(5K%PRY2{syk!9|n@)FdG+3qG5gJ%_0 z%==dJ%WJYa+HrFsYozZoVuVVkQEuJKGf?qj4YVyF81j6bs+hENWNff;xA;lp6oy7wf>X zKy9!@%_}Mr``ABjl1(XBE!X64?feyD3S4(zd~8=JYU5;O3LUQxQBNG`pW`Wsrk2E( zoe|DKK)V<7>oEj&$k#!z(IppywuIbF% zfLHCfm7Zm<0>@*k*lX+>8CncfG3lf#V!NO2BpUJIZ`_`yQjU)6Wt=hSMggVi*v+#1 z*|{l;_TF?O+^%pLnV2_y(T;eBMQF&FwbJPchWPFt@`oZvc@z>cq0ds09%uy4ra^en zWWMMx{0ohjxvbOKauHxom}ww>96?dUdgw|rKe0&KX!Z|oxUx;)R5{~~ki2~@REZVQpznlPnz zbaQwJ0>%3QJ}nxO(wVZaEez*dZhuSdj8E8g+Uy3e4FcQO%)9@s+71qyi~C~geRrL6 z+au)u#%HCn{*X1L%S&%#s2Haj^HsTS)op95l=0Thm ze4>NBXA38~2m@~=!WwsDMD#3@+(dAKhG`Nd2f+J7<-_N*x@RA=#H}oUUvQ!M=bsf| z8dm$$5-{yn=>aigDb7aV`p6NY%<}YNUx98@%uoGyJ*4JFBnKt~-wuQEE{JB6#DYw309E;Ch;dVHYDGZ;Hm7%NFWPqxE zA;kS-ThW^{CkuT!M6kzniLUN5{$q8Oqa{j`c}Ke{Yi*2^hU6zoA9?W@ZeJl41tqx* zeP!X9{7_vr#@2^7L7c?Q-fe;RWv?}RNd zRLfD5{Y_>+nM7-f2Bbr|Wa$-MI3M#EZfj0~QeYf6gI!gI)Qc}~i_P><%qwso09U!| zJDwIwV|9+>?Xv|-Q~D8?yKbEHi+P3(Q9?6`i zX0v^g)s>hqteMD71`(czWqkeXFR#u_t zqoG?l4{=q`z-;XzeF-c&P0eXVU$xmtEAH)L!5TFJue%*fm%SQ#%;2p_kUU$ui=`=K zO*IKvHlFd}^S0XYhIy=&QH(&S583a za7q6ucWgohTa^d?GY!L`A=dMn#^s(CKK15DPS%o;Rv|}W^7F~ZqYJ$svs-h=vN>q!_9|ufy{VZ71S+GUqPtW?33r>06Q007 zJreZi4zBuigvZLT=UGaT&EwQ)N)kOwra&bq5|5^n1&7gHMS}rx!&vHP%X%sG+)S(; zl@ihnwd{H>7Q@4{Kk?)}zA3+cpAqG8@9Ratur;@E6_w|aO2o^dX-w3cR-}vV^++p< zl?qB&65=W6x4X&!1FQYY=a;c!E&KB4DV+rX`lNX>V~Ib7^^}@hJ7(vjGBK$SqXWqqpfl?4wiTXSh2QeKIqf{)?Ud zi<*579E%PX?)QTtpC?k`hgsr^q7uJ&yFTiI#1J~ac~&Kw7*5ROjOuxh;u|I^4y*W~ zi2}q%GX_K*XBj2QRNi|yV8F%#p!dAm4EZ;#T!9SU{&l)udrPV>o8jE5b^l@`oBE6T z)n$Z&?<9v_y}LMtyL z$V7GMX2>x_#0KKIHFohtHy90l{8ZjxFPqu!^Fj9H>go-sU|#mBLMJu0Nv1m|CXv_h zbjR*oDa)D}xyyD|j*5@7&^f2fvnO92`x1fJ)HS-o1bZc_tz3nl05YK&KV035taJ2` zH@B-K`i=oPUEapJ!QeC@C+VQHX5<_8kw`v=J^2~8pMSTq68y6K-z=s5vV+!|t@eWs z^a9k!oQNA{In3@V&uT5q;k4~q=&Hc$RxK=*5G|u(s(4H7X0sNHYfarwehspcCNuP=(93Q#;*RB<8hpL*tutIc3b@lYGvIW#GnZody~Xj!SJ)K_ zWqic3{#H{0Sn7hU(|R-NCHnPhaW;JZ+ZT!z_{WhN^X9R#`uPPNeN~f&F~9@mP+Gsd^%uuS&*5$$`n7|U~#}fWs#q?LYpfPwV1Ipkn)GSysAS-#Tul^ zm$}k-3D5k^)Zl`XA5)wNcyB%4aFW_iIfO8$9FMc1J{9SUwEcfGW!D}BBweB21vy6y ze#RKig>v-i6K|p>geQt(XA!f}{z`0aJ(qE60HbD_pYn^^O`ApD9`l3*SmOjovfFzn zeM@HiX=*9Vv}$5Q`Mwr8Ju0RbXiNHaRMs=v%xftrMyqN4fR@5SoN$N`+u&UDhSNj6 zvgMHLtowC)We+^N2^gL^4~=@-+{J!}hkX`67DN!rnB6@5SyR^gE;vQ7xtR9)E5FaQK^Wz=TqXH&Qs;WISu2R;Jk;2P-rd&d+Yda6US?8(pA3XocOh8iw>;gwmE7(0Zm~MBQQ8=Tz!u>aC7ZiRZil<(#tr; z^%`)PuZv{U)5_%Zw@F7^Gv3d1V`S-i`P#A2z+no@D#$M##!ui#RWS7X-M=}-do}!N zzYT~#tBM0rvsnHJ=1y0X5uB=ZRjaQ~vNnlYm4ZI*G2S^gbs(?-Oa~g0A)rZVlvlgV38`KCza|6tp|x;I%#pP)~lIDoCE<&$1OV zQSp}~AQ_kke}mw#ih}FSQk!$@Ip~OX_aW-OZCl62=@Fg=v#;PgWb|o3ZW6Bqu%=Y- zC+KqbHi>Ba47t{~(iQgRp*RL*7!aGuprBC+O_Ea_EzV@|(X#|Oo_s;DvxgsJ658ie z4q88!KvGh4w1IeHDyi!w5wBC71>6F!Gqk6t0DNoN=v%>eR@VA_WtTSCA zx{QpA7x7&pbaAc0>>;qz|r<(K<+CdWXfw zV~)_@78ld1i+Q5K>)_o#+4g06Iymb=n6~9&p#u9~XoEyKWF4)`Zs>khzSSOLI)U@o zD(&;(qw!)u2+ujYl;%5;oknGAqsAT z*GD)dpp*VqU!o?9kfyX(B+Y6IX?#96_@opaWB$kFmbaS|1@Pb*v#SQ#{bV}tBT9|$ z@pC{M0lty$#rwEMYn2NeYlbTl4dnf+pHR#QFBV`Z>%bbJiL1 z`-uW!8^v=C6E@{l6lqNUKY(wB>nyY^(rk*2vEkPC6cVGd|l6 zEk*_n=^-%=1|Qx7F=2{XqE(n8DXqTa)J0}u$0dz}wk1dH-9VQ$vhOVx5h$fulAe_* z5-?Of!I}>s_VLvmhtVoKh$Bx+nY}nTzH&3YK)%gs-mIMcbqMcAroq!!P7nSHDTbmh z3BbHTCx4Mv5&*3HW_7bGU)D~+?f6}%$B|4ro}A`6uF+nb>9Kurh#>F>zV@QsobOn5 z#};`ZhV)RE0KmJkT5BUf-a*a={}s^y>z&i!#dfjxf`7u|PdNWE@fWV*1qyEciul3V zQvj#?U$mHACU`i($T_1F1RFqq)VNUwck@{T_k;X^lJmNWl9+xTzdI-5ZKdH5$+Co- zJfA$@3%y2Sr~MB|Kz3c%W&Nv2JVr@&`kr0?A>P{jX~%6-SiO&*Tq>f%e43Zuwiz2zR!@hvd5t(7_wiKueWy1y zidEw~uXY}ERWwYUOvEj)k#Uw$XxAlA_lDrnw$W`i)oBenmn&5Idsw~|*3QgTzsa3H zjtBdneEZ=)3oVdX%5bH}ijj2l9}<^8W8~rOY;6A+;c9Xqq}DzvH&=cUg+{TS#|eIm zujvp~C4TQ?kj$FG-ppDE)AOAkrQf&PGr+TsZO~!zq*eI{(Fd14*!?xv(c;8;*uXQ? z91YpGI9NoJIN|N=cKxk}7YmA@I$Uoq_gV+NhqgSqK0su+PXkn-``=ms=kyLO%_HyG z5207jr52V06#DEkU*oM__!uFex=tC=9)@mC`OO}L87ns{a-u=Z*wT6}!c0k!JUVYn zrE>I?nPXJdsrO@s=g~Iu@eavzCtGTy2C<(6XV#v8=7DV>ej8I~Mup~7WKuRi!md}P zNA+ZBV_c!_^v|x&3`1^EmIDJXr$>PLh>NrQUGi-L_;YY?sR%`42 zh%@U~D|P#?q9>~4WkTD@d%KI@8SMmkd2=eiQDT))*%08bPxK}Zo8fEDyIl(GP8Uq% zgQsC{ro>)*y(bi0+(ckXkCELHn-2eRz?zz`&U3#kTSF5ZGxNY(oXJIeIZdqOi6~R~ zb$~nEGT5g#DCsFOrWP>`WDJenlqZz@jw^Nb8~|qihOWpd@{e zkTC9dyrVw6nktH-i@pZ6>Fc-Uv){G4p!jChg-pzLwe_a3ACtGYo6ShcU30!FVmcy% zUE~z3P(4LcMJuF1U{rRaUzIqavfuo@>{|wZnYj?$Ax-z$32zV}J7tVttT2G~YMQ+Q z5E|)X=S>4E#*}Pkrv;3lUDliVD?7Hvgpq;i)nqRhzDlinI;utEMYF-7Esi&GR!vu= zg=^=0ymOHx0f!K}e~`F%3o4V&_i&cQ>}GSk@K3%ESb!ipHumA}E?`qdm4n=Vsj24i zHMCA6yXso~QN0Kbfm^MJ%drgN1M-L>_!qCioyYHkE4(wicAYWQsh&uZCXBmm3m{kMvuHk0W=j~Xl=Ns+a}k||wlOGxC_w9RlgQhn zUBhnRtT!vr`=g$BrgJI689Sw!jn;k?jPU<`y_U`0g)AT(jJTmh3`kKa`CP3yZH)by4U!m_` zA=cHlz#>(V{>0n_a2`RnmxE^brwlSTj}l&hftt~uR#W7o&OntP+#aBI1ON9=08+sJ zYzy*VxAKA^8UEMx|M`0a#{EC~o&485xsZSVM^BRf`Y!;flr)riWQy;XXVCu0ZD76i z&G4=}i(+giIFiJ`1w&l?RLsb@nqAvOTI1Xyow)MnJ0_gU;7zk>*S zIcYU9#N6jL5nO+or7!BYg4st#FOkVW=4(QH(mNatojtKc4jAg%0C=}9G&28nKqz0b zSTUV_7^oRrd~>YK;b{TZ^S!oTtF?bIu4k@FUYOP(UY<_?qetxVt_Hsp|z20zC+-sA5?fnhu z(F4pYLFz1^nH+-_!Hp4fKr zYKz2KVot+M`!le)z8Icc>bXcj?K%xCJrMe_9K^mvKZO4?L2^hA-S=huev2Gas@xYse+j3kS@-7X|lc4%Q{O(+!B^Jr8)W>`wX=kk-xt0bTS>v#}k-b2p0L}dbXErg(f$qg=i0N0w zcztNj($uhN9NL+aoROb~fflKlUqo~p(@rBSY^_Ry#S#9GxQO85;9QZs1mE&~WGC8C zPO$GaN)g+xsHW0fu-541f+vVxP0Pnn;%ejj@GxCB=p3&qee-M8ygqaQHs>jf*Pd#$a;z zb(^l_?(*z%rn*aGllv`KdQ0#+ zt}8o42ALYBVjA~3Z4cF=YhfevRyyFIMV+MtFA)xQ7D97q+5~Z$QeA8LklFAYM6z%P zFFD=*?hbXI>W@UFIN!H;(XX4iG_G@a))^BnHW8a#zC>haX}7*p#2? z{|a0V>Yol{Let7Jm*hbUnhT<%<*lOQ5l6;m=W>8a#eP2-OI-ajY`ia9mp%k?u!z;V zZ7Q3jzxTTt5C6rWPo|_WuxjL(a?6NAlHk4+%^Q>f)R^lmAg!pdgQ1ylw81X zBQ*w?S&r+ltc#xUviW|P?ZNn1XmIoT-t~|FsAt}}j;cEGy32Nasyl1bHyPTTidFxf+aofzWC^j8O9dM<5k1Np`Uaf5L6(N2GjHMdxW4DXz8les&!Q6 z6TOcLl4~$V)lq1=mTaJq2K|1Td&K~^+pMko)wd!w!SCR?9y!0{?VZXxO9VPGiP0zl z+cgh;(4R_pB};R(GZ1Mc03)JtpDLDs6c2j1uJD0E-JAdoFvXLBEan3Mvm#PbYHcAm4W8~V2 z6t(;kzgr=VfSJ7vX2Rdd%QoJ%yUp)$^uSSRKv4zy(z>pK5Sz`)9AO`4bGfT%#{ww3 zvHo+sobO&__9Gk|u_~HdSM)T5&~L-B=jgmMq5w0`C?iOm*iyb6BLTAiP2AXaHAHTe zfrWOO(;d(wK=+mH8fU!$JTAi{Yi0v=NVtsre!01{#*E|_^d0dm@Z~Tm>ow=}LXVvm z3qq}UzSQ$R+}1ESv4Y4|q~r$@5R-PqAb&z!ue`_$c5&I)6udp;83*M+0NF3jUg6E9 z3_rA%*jL|K^PeUust!l|(<0jji~bqVO~l?eWWT5k!?J5E&BZi5w?XIe zP`5T(R>p+h{7^I4R~0JelTykGt@a4#=VGzh>Do~2i***x?=R@n@1Y1!&~oBS-SpR} z7%5lFSPQ!2*1`D^X_LWga*YrZ9lg#pKjmwT4zgfco`U+!u6VUg{!n2>{jbixS7*It zEK$Pf8KUz~4Eoyyj*~Op zA*qH_qCyrJxsum*v~ljvUZPkPAH*%H#4w9hnSozC(4fE)A%o3b$9d7FYh1OirE%n5 zMR0?{^lgrQL^NatnqLZdKZ`u2qcUWzcj+#-4u&-ZLnxJ{6#G%1jxMjINpeGd@$Nv> zt3RFI9v=;i)q!kly+1ng!Ak+~gd?ngUqw*3{p9G;OI`jQs^N^^aN%JiB4YD&vdtjp z`0IcTy2v`MfV76f_s$9zq%O=0JZFqQk;aKv z?BK)SmLTDqgQLisgkTpBk5KGG>E>b2g&Hm$OoY!_5OY<%C}bfu#Q;@Wyj9^@H|19e zN=#9rG#v{Ft#H=RPFMb&W`z>IRc$@2?DH+_=^e@d$xi4&MHqdMVRAnAACdx-KRJ&d zY#Wx%VBNnBZL8>e+w-7XJ@wKOtnBHxE@mF6X76PvldE?lRq|cBvkR;?tXoSgeBJ%p z`vAeE<1ob&VBxo1fR>2Ejv8LoASdj;kcvXN>OXi8oX=SW+IFPim^Mys z?5S089$*MOcVObP5uz3G;3TQk=c?XqZB=mx`D(gwN*@d=R?F9(DE5?P9JZ9{!>FdJ zBxG31*+}z-gGC1^^b%nFg9m{V;-ck6MzF;v-SJ>4c9-WMPxEB&XV6H);B}_~&#CO~ z#4o^A?(ipHD;#_`Ss?coPUh!!Dq-Kq^TWf#Hf~nOo9Brh<+~_OJX42AYwP>+a=sMF z=N6ei(+XA!DW$Z^o7t}#a6`T)j~TbZr;X;Ap`eA|VSyc`|Ezr!JHE5TyJ9PafgjEY z2|8^-k7itkb^n9~L5pHDZRNK(cTD(X@*E>ZhO|Vhemfh90v9O8LoZzYNCQx#T?2Q|h?G(T*LU({nyq`?~LyUjUigxJ z#6c%ca8z*x)Q_b~Mj&tn4c+~42dfL&m$v*IC?O=euIV->^iNED_x^>dV5@h0?h9GYQGiW$S+18Jzf^#-1L~;ruQQ03hE5{s(ZTgXMwab{6@W}Srt>jKKe?XEQxdAP zeRcp(gvGrQDo^M^5R1t)kUtEsw-S=&Rw64EPaf=U&Si!=iC#TTu#q@QruGxfqXXk# z##;`Yz@EwQyWE@)jJ`fwhX!t>Tz!|5CvB9d@J)fK22__*HkyHaxxQl~)`d#C z%W4v>H=q4b-N%As%polYpQ=no5F9a$xn9KRPpuCTo&^hZ*5e=H%3`nU^1=%zNQPlT zHpr|)+(dMi-G6Guge~>^E?r^OyDYhGr0f5xjoYmI&GRHNRhdMb1b+{vS+LG#tt-;K zKO%RzImoS4x<-NBod_mfP$8I~_w;1}XH`UO=;g6Z9&N4-emrci)ns(O)L7(7`VUKm zOIV8g78C^Q?|ONLKL3~^kRu=&>ZM)+YL<~|7@fYjJUpUCH}&>d^;cLgQ^NN%lchb2 zFa{;f@p&DXBO$2g8+~&NhpPD$+DP85-{#SC)TW=A+`UN|{AsSErO_YWzM0J#Z1h*X zIhp&JY?m(_B`w>7P0Qvl?q5^tuH=ne=aLsEMY#QTMM0N?x0lVu*+aW+bq;Gh<7WdQ zYymh0qT+qjK0+^>ws&&B%Rto4h1u$}5AU15x$2Wx&A<-DjU?(%6ULyC9OVM5Wvu?{ z=O}B4BAI8~P^8iXSG&t_b~KOhCJEr&-umXe_VB&JAKGt0;+6XX3K9%QlbOdm%L99eB zltJ=z?S{6X%3e3hgX0F0ABRJJc2AD8pNNYtmWb3F@57FBxas#WeCT6lPgR5O zXOp0X& zQ3%uiroZQ%eUjnQv<49Qjnbt?Q%ZA{vMfBzXYjXW#cys=O;a1Hc2ftwpVSoRqE0Fk zi;e;%!+X4U8Cd0JHR=oqfwL2-w$KUJW_hAI&Y&{j7W^iy&{XG7F#rP>E_(pu7raHg z@_}Thg#u$=PiJNX?^JWfyV-V;?Xue!qr6DRKVcsn1MZLK{@*}5P;wbwuQS*gUYA;U z*)}f_>CROV_Rco<21auWXx!Y+&5i3M%caM4Im&bCwC6$9yyugR14Ivj$FnBcamH+* z@OPjW0^WL&r7HvpxwLl_NXnE*rX z7|iS~jFOTf*{c&y_00Tq7;A5Q2;W3s%7zbtW~b5Sk<-{MzsU)7X(<&_wBPh-Ww89I z$&xT615J-`S-Y6L&~eWO`s-bO91jyh9mthLnCS0&>-U&~^&8lB9cjZKcj|s~1VheN z`e*0PHcKR;={5STk0Yn33D(j?MEnk0rE}YM$rGFVG@$X)_EoHDJJ0)6=zv`6Z(A!X z&3V+!Zz%CnjWF~q)d0i{QshAf+bS6Hrx%_Cu@D@#Usj8nhR*c2R+Qp5==907&*GPg zpEFc08*|#bG2)dcDtAzZH@AZg;iU^`{uDYg`h?3KCz9`F#=JOXcIxurfxy1FZw)iV zT*y(4!>A&qdZ9>7H|Z{0JvOIZz5XqIPd-Eva5uM@Uni!hEYtxuN$KE^bVx} zq|K-Kg#%jAh40EsKoN&f!3r`w|H+rPlZUd6(+>8?EH`qiI0M&8kE|ug#w2twFyA~I zGH{Ngnf8fNrt~HH(Zkpriq@j6=uDThcEr>A>Gwcn_66B8;#ITy<|Y+X5*35$8R>aI z+yIvU+i%Jt27BbYyO9J;t3}D0sEf-^WHnrGNqRctA0@LHml=~fj7VEkj2GKsYq zDuHn#p0E;RGyw6W2M3nfS`r0Cwy=EIkS@l0?ksv8+}($)Rb0aG@EG~es^sgYRBO*> z$oGs|6cHfg8~u{R;QGiZVXEV?Nc?iA1{IT_NV_Z%2gbCM-fu|yd@d|QTpWi%gKYgC zuckEF1nu7F3AM`JK}mHrkUyiTj-Mt_FCf$qh11rR^Uc%SeYhTqn!J zuOm<1Q^6ZHZ7xjx6YC|J9=s>_(UFHtLqqyeE1a&M<%5EelbP8wt0;yzF%IMUr;2j6 zW#ou<`Z^1j*riIS!oSh#T90DmxjLX4ks}XY4VnYTX5#S*|rel=b%&b6L9LB9)>04kHFr8%7RJp5$1kYK`V zBE8zl0m6S(#gtd>NLX!dl+%=9{>N>}jg^{|rxPjcPL`ShmAHDrJR?*7+MWHogF)H!rr<1Y!h{9`Z_L%7+L7Od-9E}_1#Q4)xb1NMi20gx0p2jheh zjWP;GkaH9#Qa9#ZGP=c+c;?kmBbhL;&+ zLbi(>CB3zuw6he@sSCa4dwhQ!&!p7eYF!!?$))*+T9yJ}i+=v$KQl~h`xZpa^_i5e z@u5p!qk*6oMw!q^`&pj+jZpY#poA*ByNB zhWb-aWN$^MaJbbEi1GN9g$>z2V>4vwIER&dq>{XX{$kxd_490Id)s}ZC#1*ujBh?g zzmmxyxGI5A_U7!o!v-x4n>KSo+W(PaYI=4wu*TW>s9hxew`W<|mXwLka^aq{c00~> zWu-g|x###qZaRlq?g6d16LIA!I{&@;`LVeI;~n4wT=74$3Pk@St3ckSsu`>zt^!{1 zhWR`JXOu|srzBXqY(BzozaHFqAEQM<`- zz-Cbxv$3}~K<1jZo@yL`rC$Y;Kq&FQ>-uSEqp4BZ;B@`Z@2GEnTym0OBV-@P)s}bB zgv?6-PD*5=ZLd=)>1AwLiD?p3q)OviP5<=6k72oMq^HL?9_qhDAB7P)Mx=x zI#}cpMizT-y~V2W$pH}{xvSOT$9Qtp`0Hh={pkV|C^1u6KwI1ScSvyV$vJ?;Y#elY z>@=;Y79sp^#s4f{b^P7D;;f#GX(A`=!WnM)uZz9!HB3<@c$7yz;WMC^YLyiww7=zu z6d5(<=2{nE2X}W5 z?(Xh1H16(h&F%dEUT3Yf@7?=6oO931nNQtSJ*%o`O&eo;V~qcj(Ql@@ciGF++avPU zjLD?fs4v&#+GqmiZq*o}k`W51s_-Porb};rRZD=G4xqnVeSFn|L6BBiTP+9EZ7v=L8_zWMd+`Q_I4*U{m| zu<(#b8(x{iI4bF-#>yp-Q zMoEe)JM0cG=qRT1P^FbfB6UXgYR|3AeNYTqZVoh*Jsp7cJWWbC0%A72mkCF!9j4Yl z<=*VB^!7&GKJ(j+##u*&>blFD^y)NIkd;}^QQlsQ`AbPd%%sD<5c@^k)R~h8OPnR9 z4KODeF~jX;+WO#F3cy#RDi=R{m|sXlHzw-VWI~~+YS@Q{SGvmon~mel!c`^Zm}hEa zFqo{${Hc?Dwp}@zPa~uH+~Q$3>4Y(KRlE6FMwD~Tp2RFzibATl(K(Wxwwudw={DPW z0?c96JrrdD&=+G;wXg~^_bk1xdPf>?i8`ED_AO$4<8OZc!>4Z9(&U=KGjsE5m#MI| zUQvOmU(1Z_W?ib01R^P$L}(;Py#l15caIVc`ApJ!ubBH&J357x-o_0F=#*`H%7?_h za(~7~s+iF_xf4&Mj?-mVOQJe^Aop{Fd6JfO$J|8k-)f6LYly{g_zIy5N&M{^mH)OHhr5Be-!#F9VIG8`R?-h+_tqs6D z&`dH}J^uc>aMZ(Nz+4wA^c3LpmtKN0BHt)7CV)@utumW<@;p_Z+?UZKrlV8R77b5< z7Ki=NiL0 z0@M9ZJ&FVL2T)V*^&!@=+QfX^m%;mUGUCf0z3Jh&BDb%Lx~JI| zn~8huwi3$vA6qp`SD}~7<80&%O}cvAH3F=7#A|h#zpYqOv;G9)gMs%1LZ##Z{sqtT zZTu8B{q1vjdKz~i0Z>2yWO!*m!P&icxPRZAq?n1;C;PvJdXE$SC-qKoZvMYay|;4w z@1fq$-pPAG`&4I_k-qWw$1u~G{}_?gXLx^Jzyk&`Q_s5VCz;-iKE2W|U1w**^PKpp z*ihe0&qgJ{bIZs)+Z_^H3lv(Xy%L#dBEH>kVN>DtHcwOPqU9nN;i zirpHyK?3C-kN^l;JjA~WxkxIGCvNu^`E4}38;9NlPp1X!zl*@#vIsbl9j(4fX^TC3 zSAomYU1~m?#N8J!vm=)R>V^=W1aT0=lRCflttuo0p=;x4PyF|S1t{03unOyiL1`j6k~Gr9IBfT%;U7L;N4%6 z{^V^y7x*58@uG7_?92RLG3YeR|G}WY$qMaZR8?H=zjhM)xY2)0h36hQn#^Amuew?j zD{Y!F5r3jYZB$7gn(8mi<`HO-ohGAQ^homWxOpKt1>I~(*n^&sX1P7b24Y|dJu$iv z)xT2Y+~3&{2{ziF?}7*MV(f_P_ls*HcNYY}}|6@|TW zpNS-WS_i+jg)dzqMFes7iU0`gsAu+dW!yN9lTA>j$csDLuPeYat4KDc#6y2$(iFo|n!D zhN~Ru;IE?(J8Rh}uPtajZ}(g#r~O*MB-NX_2@HDATF6-Rb%)6C9Gk<{n`c5smCF4i1f*>6%EM0ASXK8X%=r5pGo43MX!&T4fsy{daf>Nq|P@&6g6=z(cb1|e2_bA7fU?gxxXh%jzX3@TOD3Z zo7lx(Vn_O@g_T&Kz@&%y*W*2% zwf-cL->wC>5ndk{IKix1*7@AOlz(`Y3>%Ur4XqFI?i4^2YkcYz+QgF8$E0uCG!w5F zBI*V-ZcNGGq^GeC*}~9UBU9$elhc_JEv@!YdCezYI9ecD%#r9lBe>Zz-IPat8}gxn za$oIbcSlG5^bxA`Je0rSoBZGzH1tuJYZF!dpeeShsU(Z(J&NaT2NY_aiT#`hk&GE*h!r8${ zKOEwZ$F+u<{bM^|HATqB*43|Vf2@TN@>&SU^4zQ%H}v_-W~v}El`2ySu3{xQa&#}n zH0~mc8EPYvx-}+x_NjPQ7v*e=i=#SgF1S+Fq3@`1p?c+a8-_68)V1Pe-+0gEnZ9>> zbvmgsbzI{0 zs&ugU)f(3HG>znHsVdM#{wwTvaxs|#Y21H}% z|6r>j%>18r;Qr0?Z;162sXvrpQ0KN0!^@y0m)1ui17v$@Yh+NQ22=<<|p^r@%s#cJ-Lr1 zBTtW?J$G{UpJ8GPuYXelfxT}9hu&1eP;*%DwIZbW2&w!;Ivl7t^mB_W``q~+2+)l!x&y&c|5T&KgG_jnWE23& zwG7h46~>dN4wk2&IUhWG$Vt@?R*?BNA0#-_A0b@_5G1_ZOT5dh?#%nUCTe}L7GpZ?`$|N8O&KCqg)>|T+zPU!1yre!e+2D(+nyu%z;R0ih;&$pcXtq#c8K!`(}1mc@k;<(p0e13Kj3!%OI-2Kfvw0(jjmZqKzs6u#2)-3$GK)yJCwLlOmxIV?)u=XNdgJ%T zKb&M9_x>9~xLXBSXzL?Jjrkh|qCf^w_H{N(5ZP}|=nj>06c}0y&xyAQRbuk2WEEM2 z&pakN=~ukhfTbFh@p8PF^zD0**K>7da+b4%2Gt$T!2CQkR+N{OT)L9h0K?T&K>i-o zJ|Y4##8{q@!shJee#f^YVyV6xWcF*#wpRc>C*w=o^UPE}9%0Vy(UxI9Fs^)5r2iT8 z0Gv;+XZYsme(p8SMSPT<}vy0mr5p8kX<1vJ=p6Xd^* z3a9mEavFbUC1?8u>}DCmn8}uCKt8We?60u@t3HtKdERybpTkM(b6Pj#sVJ?#>&zpN zzD!oAP@>mF@BuUqvbFT%zc(Czx}>uh@fUlsp}S(GO=lnGN21~~&T~e5Yveidd6D3Q z{OQ_a(S70$GipcI2}f49h{JK*<^n~4FjMN7(V0&X9=eL{Y1{qP1(#oobRl%t$yFk= zg{%AaS`+q_!uC@0Z{f>RDkj z%=;-ZZ=e!yrYc#P4H_xc(m;8w<#ictVkg5^7SPe*Vu9SCiTRZ`qe@jL3gQ(ZkiZ8F z8-+ks12*@2a|1yXE>=d?y*nw~&4`+r_oQcYL&+%LmCfS9TY=7)~WQ~md3E$_^okfCCgg52Sl;1VF_4FgbnUXSHb?(?PDCwOU11O;X z4^XpMI?PT`W4I#0pMPJM(tKzFGHziEc9aPMAc?_K0_b(>0T=6_>K`NjfM%in3W1Ov zii#P;B0jn1)I>HyDTea%uV%R}HZ>n)sV&=MvDXonGAhE+=jK$ZH?mM+fPOJ!pV>?A z`a>u)P?Q}j0mCgp0=!xWAz3MXS&pdjKln^52o0_rU+#BYE?LoO%x6k+--d$B-sQlV zac~cRJK2;6vWLT)Z)lJC^srSIEmn{8mUDDSpip~4(N^9?Ycrp13mPJD;GjoWdRlWu zh@uiBl%jX+!jw>6_eV~PB(X6d#<(1LHPY^d*a>Sm#USIoH2U{6j=Ya@q-Sr~W=viF zCM*2Y;`>^q=vv&^yPN^0Dypx99pSgzZS6lc0**nMuuSSTA%R^&b#RY`Vh|J!$_cIH z5eak3GK`W$q64el$}z}2GNNTH4BWq={}B_?J8QpswP&G2Xyl;dHwM+r-%{6Qj|{#RVfSiR$6 z*MVP4yyraLLiVDxfkP$VB1;Cubwa zKIXkfWQBafawfC=_VrCFq489+(cdy<&YmR*=M$zb8hfC)Ub{V${a!2K%{}d&qA4>O zMC3HOEq}1-_jfz`?L(@Y4X#>$!#fou8M2vm;%R{2xk#&HJt%Z))b3@bfI=jzHv(`#8 zC_pQmGf_ThCSjsZjk)z1EIgOPPJ%%4nh3b@_kAwmG_JTs)| z5=+?jLOi9NK_;p6<0c9-Qcf5C>?7nJLqyTp^PDyZ9q0B^{Kc4mZTqo1Y+W-Rng7U{ z-fP_ay`jcDn=%Qwm(y@`(ES=Wf2CTLDf+owXSo|X(kL47w6k^)cyIC6g@Gh0>ivuoB7&_ zpM24K6G67vXXd_JYHi5*nA)SWZ&)p9kG{JQ+MKR-wTMl)wd5KYkkrduS9jVSby2X) z4pFEB3V%k4e^@|;u?7Pu<8_|_ehfi7%p8&u()O~lrNMAT*Mc~~)Hx^n zl%#c7u3HW8Xn2?B7B+_S1W~dQ!H9Z%tpQXvlb$BR?2<9vl|#rj2F8( zREfWd%G2eDp0?ccIlwe8T`@))U+K;?^pTrNLMS$U;z=koKmM%J64(e&H7Xc6DXHe% zU;M%UGn#Z*S5nElX7S#5xc{C(#&z!P|=r?wJBg?B0C`Ki(5jM+RZ9b5CULCLmN9Ba=fZ8Q|J6U zLCTQeBzL`EiD{K{yY1?3qq6buQsxxD|HaL13NbXM&ddGFrKdn(5F?qMbHER~r>ZKk zY*BEBI9Ruuj$TYAChvX>Mi08%`TSy{`d;$cMoZkV+d`rjIfe}0#-4Q9d7+pnU~Nwn zfqj6nDPG#u))_6qu~Ns?)I6`52a&eePj<36Cf3BHBnwb4Xs0SDN=j^F)ZeN znZ{mE+QTluGBZr73*V8ZcHcu(P59c=4Uhel@iay-o~!8BpW!E#|oLWrR`63W7=QR#J*Z7|9-E(*>Z~0hV*Ck(}v55sE&LW zs$6kYy-AEv;FYae;~(NZYx)T(cib2gz#u8W-iaw*{X0mr?fP0oM}8L)ash$ma*eDl zg0ur!*oiA3b}*axVI2ZH{LKP*e%uNPM((crV9&L}z1Uq7jZbU0pXtDD zKQ-@Od{`rq#M=n-RoNsL&ZT_>F!1fui38Dl)lwdRV(@RiL2QT%m}fS~v#)phqQ8B< zNEcHzNvUm@3QYu}CLL!FXyu}$QQ;NM!RJz77q5D#I_?hC`-J`4??R8?C!W<~qg2e*Kaw9g`%sCt#!t$Nqm406(Xwz1u> zHRWZc8=^Y4@~OfPb~IYjY0uFY>7v0h@~z3V?B@vQF>N?$|2?y5T5^m(bNO)tiLZVc zfZE~G_3_NYT;2JV&+Uf-jVb)n?n>dQRGc?679=1zgw?SM(+7I?v-v9JKi7+Vm>uG7 zxD&+UQOo-$ z^+Y()1<(?zQ9lsjy(*L6O0~GlBD^~jO^45})kaV6lfWkf3xzS7CU%4VhV^nT-4WWuYPZIHb zB*vA7MvU9TEQrMu9?Y_)5Kx}-X2M#NLLuqUi){Z=vD8%wBR6|>=P)iG962KVF+^`b z*OR^q;iKKwX9Du5LwGZ;O3b&IQxsvpGNZ!JxGw}#j^dyj)>_-`zIUO>f5<4Y|4ZT& zu9Z@S``&;68)xkJx69W?ZZBCp!<*>5-ZYQSON0xS9u-efiK{Z{XaQyj0!epqxx_Tj zf2Wvj;eXlg9T#&;%td8#)R-}2XixAc3tv8V9&pmUxB$e78_5Z*OQmMY3s>6zrnicZ z(34@Hh-Qo_Z+fLi&g-PBP3y>nggw;}cz@ddf((fV>@O-Q9>qtj8HD7lPo@4RSc3}n z{^-?ql$Ms7fH{wglOeyb%&Iea01z-0y;Gchwd-ikJTX|6tp0=~#0P_TI$g`{3tn5* z84!yG+d+Ao)F0SbSse>1pg;>vs2y`3xj1(&?j0nXrun)%b5I*(QD$K8FFKhv`257D zCa!9^dVf#C-SMst4Yvp5VQj9lc6lgcaVdvdzsO-SYxtK3&q@(Q~K&Ds%a* z=xJyuInIGDiv}88koa`0fG(@N2(%sEG}zj%g;0Eq926Plxdc(I6XiJC-6+aK1uWk*V4 z(-#lQ6-f)VnJa5j#DA+e*LPj*gf3n)vf2?AKcyxYiJC7k?HmaS8M@_lY#J!R*_XFw zAl-{>tAP1q$@wEOegRBm+#Q{EWc3_Q!|}R#t-!OR=(4|5e_}U7{^Fgra;p5tVqksz zP3Yt~jX9qu>O^`IDa+YvmxypM7>TfsjB*>G3hSQNd;M{(Q;_WGp7O9ept+)s=3$N% z$QYzg0_D?9xttA0?2o$0^KIv}mlEC*p8JkApPC$eJY<l(@oXy{oto>dpWcxs4%xp_de`O4 zk9;Z=D<>aY1T_h9Ilx8@x3ZS*3>rQd6K&oc1b@z$R<_X5L74)Eb~v?<*9k&`DqT=w}y zBF!{K5Tq5+slEs8E*UIx)nHOS+zzi2K$DW`_>pi#lvzI0kyt_b*cP~wzNzny>vZGN zD|1Ll)Bl*QLctdw#m!NGw1>}}kxG0U#LF|BV5q&Kom}oW2YO4JNMVM0t5ufTz%W<{oldwju z&6*6p`_5=Uo^QP=i9o8y6DuaFaPUHTdEr#T)h8F`)OPMCmq5i-B;BuzVd!2G3WF~t z1^82>0ZRLX*|d*@uDVYP^-f3g6Q`{|?Oy7BHUhkG(+9RYUrSE}QnISag>r}|O$+nv_GtJ0p;wb~x{cnA7Qvf(RlJBi0I zWx0Rz8MLb}Eqy@hV720CX~~V0SAGNKqZA}EDwtgVWLzn3ayuI1=5g{%XZ|Yjx>&zr zi&iLA<$C`%R=gq8ghkJW;dI%)zqP4LMC5}8SDXtfV-$0^CA(A2*=BIhX%LlBRQ9vtptScWWU5B|A@S5ypY$uoE)g&e2R8{?E zl*xF0w8fWm-*`Dll#7XGkXAbJj%e9r_k4Xl)dsf9Htpqun?u-5e9do$z0O1dzzcaY z@$MqxcwTSYLz0P^&bU<0-RY!DAU=~h6VoeGFTY3p<^2LD&Znh?6eUrxW&_gPssT(| zXso-7v3!S52hFSJ=Ado)H=E+OP?V*iG^h&d)qBP#-~3QO-P1o41n$oj6Yy>&G6me} z$0R#Jt;?W++8-;uY=ncRzkW3HNbGal-E8~OVZH-nhL19+h;Sl-q+W`1Hr+P zzlE9%6UHxLG@m--9}d~XQg_oH{m`c&y&** zRq%U&KP6Q&QM!jKuBu4>^AD9l^XQw-Uu;tPXS8#tlnM*6VNOor^c1h~uG%u{`!9w| zdH{qKG_a8U={a7QoVR0bH(v@Q)(~OhOj_LF!t!)C$x~)k@UEl`oeWu8WU<4yq}%E&ZWb#MNMot#v+^Lhp~>UzWh;M%!oC<= zbp|s@+maUp=)(8_`_t;6dO3oppI`*}MMuKfuIr1c2b!n>)f1jh*}KobnD4I5N#t5w z*UiE=565e1YP3{}r275E38dIq(Cd+&!?%rgz^>Kbj z?Iw3dZe3^O+V17m_I(4jmTM6CZZ`hdO@n8iE6lk_4;TqXb)r9-%k@Vg-{^JEFEFyz zwK-89v@y`pe)_y2v1~ScST(nGCzs*E)F2U{S0|;?nE;u|N(v+zkY3n%j2EX1t0+%| zf0$W~yG)yki<}P1Xg#tT6Fse&{k2>k9<-nQmP7PsuVdU}$uux@SQTUNzyLgEC-~v% zxY#U8b2V7%1p&OdjDURutp)tLp)=EQbuOuo+h9GdNbxTpQ=HEgpLdnFiG6j|dYl3*(B43y$5aGS<3gw=T>*Dq?mgO_AO}JoX)U5M9#h2H`Dh$U`^ia2YUA9qZ<2fQq<3?F*70+$nL8P@VGdp-8wRBOjK3Lz} z7)n_E=5F5n)cF$FgOriJ>ce2gZG4g!Tp?}h)*R?^wgb$AaI*rW7b`s@;&*||5@n3v z_i7WQkX2*(;I**OOS4JM!oKo7HZB(Fph_k`AfDRH07S4WFS@}_@TgeR7(t^1HA60o z_qWiw6pqE@;Z4l z-l&gH*IPr8(EWGL1+c`;NIq)(np)wq{Sn5#T6$6`;dTJj)`yU3thl^p+ss|pb8M(M zuRa1;lo*ZBUE^}jcXVTc2*ow#r&o6LQ&?`_ zDy#+(H40n;x4bICNwFV~0H)OM*6wJ5@ zONt2jT{%AcPxePn39SScJTF&9%YwE@!wNE`#<--5?XM5rNsnb(Rkb?4YJ4&)I23m- z8;Gi~a#Do#(}?;yf{~+fQ}wD&Wt|=A#ay|+F#BGmmjWde_Q;pmGOl}C!|f}et2<-) zZ4DxOLZ45@tn9I69d7mg^^s#=gJerf zFjX#p&aQCme_@E-4OWq8>{^+IrfSE?E#+QobFaYT>tC|3;T z!8G;*te6Cy3_QyZE$WPDm+!tJIdQ)ubWNl7YR|QGQm(4Xz1v($Mj#F2VqYDTBVn#D zJ?wzEF|v8N-+$yuQUCIpT12rwZcpDk{Ae{Np(eoBX8=uN^thDmMgRU8`VJK%z{H3_ zy>=2+<jcFCyZpv;0@K6H-Sy42XR za}RG}){Mvb>18_&of`=%c^=b{s)JYG(qjy(YkIy%&hCr_%4d52`QZwo2i==YA!m2z z+94O;98gybWQJG5y5$NRa>e1!b7(KYqHD~)=zf@W8j50MZ(qoUS-~ky6L@O%|1bOF9dD@6#E55HTSGM z*DQsv(8ewi7=BrLWJHbm5uuE_Yu*J$m0I@FlLCxjy(Sc?`J`Rs)+bJa zKdq+@;#43cT8mpRnWp11XAr?%ms*=ruEUNEbDT6>cc+pe3ejM7TfV6s^7;hWK>7+bx{aC`h~;p&x1->bBsxJ z9PvY^cxg=IXth|^3*sWmBf1r4#45?~2RQsDIbY@P*X zBN~CA8WIqIe&|Wscm8k>ui0odMI%)n7Wes9umr4%<0n~zAhL%kQ0qn&KWC?6A;ml} z!Up$!rvAiGJc7FU8KMf!A`1%-zrMVDoX@{s9n`C|(r~(LQQRYXooYo`?THe4AK-fd zogb_lWQmiB8~VXL&#F^23!%J=0Cf127X&cnw>em6eb!tvWYPj19efgoa`#WyvKwx_ zvfLt{u;-OCxbx450& z_TaT((elbYSf^e2;dBc?`JBD4Xr@tSmP4%DRn!blVelq?ozJKBXVw#f)FM!<=VSkR z{IW>OW@Pe;1Xuw#Jg2zbebG0?a>MmHig^Ro$?>x|J5MMI^5Zx*PFY0!d~NOhHS7IN zngVdC{4u`v8-YQ{LkkqCTF#>%z#Yz(sOCNRG4CD?G!VrY$+W{#R^`XGNghCuqvXoZ zkgNPhi8F#+4DuPISWGn}miSa)y)+PstLpdwi8~`GiHU=Au(NYhF8#(B4h91qrXM+u z1iJ##Xn9tG0@NawKZ5EX`LoxuM|(a8<@ngC24*{i5rJCch7UP;Umzhb|E&ljLnP{d zI|5My60VaV{HJ43e4S$0|M{4^PZ;BWK6bs{()!Wwp)tP=0w-?`h5s};f9UF33w&fN zF|Z{0bRK}p;*m&xB2LQ$<~H} zA#Fku2x!IT_@Gy%l%xE1rIgCco7gN)kI$5)O2NoTxvzX+cIB>}5yUbl&Bc;CG5(sJ z7@*pykayi3Fn23vB} z;Nzg61Jn0kzR=T6JO7Cy^mn1;yI*UD%16HXVqc;jl#e{N{Pe&1PHc#Ft3R!80(K~nWFJpQw#kdj756JkCIF20Rw-%~ zIxj_>56HdS%iLDhZCvQzt87#i(z{v8H1pO(oaK0rwgtjem|0A;w$wV@fTGJQ`*j)_ zbK5B?REPUw_moQ`t<*=_xq?*FLkt;R1y6!S(N1~Or-$S%|-Zhp!z;%Uwul>9o!pKYYPk68Qi)X{7hKcI+ zY=xX(CfibEma8%~uNN+}r4kh#`sbLXJsyDdbJ8E`pLxJa_x>qw$J`Ec)2NW|gcQ+x;tsy!{-d>_q*7+%jF4WzJY0RHHo4?Z)nGO}4@UNok)NgWW%^Q|-JVSZ zK3Q{Zi@FP8My~8z3gXmjz6h6G?!C%Il#S+=KM{a?84-F0|IF8UWZ*7yO^4Vv?I~eO zs|C=Au!3d(n9LkQtUOX3*4)MQ@Abuqj|>tT($H?*XZ*|_xC)Kuu zB{3g5aCX?lwQ0;~3gdZHdZx^{QDKp*?94d#$kWipVloX&D!5JWgtu}@WLDlVVF8oD zE*AWygR$$DDuQh7S-W}gdeyJL9?Z;X28Q{iZH(iIcShyMc@=x~Y`s)x@0IUgsW1Ey z0WAzW34(VPQfFhuH0xZ5a1WFdLs#33ri7Kt3s<<*<(2k-PMNOHI^P~(?NO-i!lu%B zk&l=fL!FnW33z9SP}iqhQ*|=&QiAO<0IRfi=zb*5UVe24k&L2)>FPrrEi7p$%XvFJFwZ^*zDN&LaR4rGZ z5_{z12+`9Y?xla0`57Li#lBGoZk#0O_L!XnJ65>NNfSSE7u$&I2dyVyMRuTRrB9M8)8|28- zp-#$}X=97L56X*)kN#$5wKZKfFx!S4V?Se(hGRal)$&evh|C{lF{oh*r`9b(Q;xdf zL2;MAL!3Xtt*NcFSdLBvB7*NvWtU%U@h#;}$4SRy*OQWEe-$S()o6#eAd70jzwU6Z zh%7+O?yZhVeLc>dh`*fE)X+&yaR^Mz%uHG-_pK?Gpk!xQtDQKDzCajq|JvMZ-#HZ* zUg}Hl39?#eibT#SEDGNXDs=qe#E2P!U|rIH+XUV<%^x50<}}yMy~QN;AIROLr#;zI z>yk-R1@~#}KveI#E52cvl__f-_TygpFcEx*u*tnHXZ8aMyC=WTk=de|s~ch5b?Ns~QHLGkycOWV$624wo=) zKdDe)JBl!FNT79;`~6wK4xIo|I@ga|Jq^>=0Ab=U z-bBpK=W;Wd&br{cgR$Hhiw+R)wC#TTH^j3BxhD38bl@c2)D49dFMxhiVsg-8;uU{= z(Ri+B^!Q!e!t(0W)VzuWBt~S1LC<4?%&71Cu!(`9*2@%TyD~G&Z@)e;vM5z0dVdIO z_|T9kJ^o^-y&oAnbmM;KsL$;5U`KY)J7J5C zdTNScU?DV2dGy^1%ESI#k3-ms0F3+*8`m|92;~*MkmAw@?yGlB zH(*{TUsaoTW_D$J0M3lvyl0ATdv5zoUJ3K9In6cy>eC9>HCgRw6^+lfH(iDw)R7zk zcHiAACBwM?rvt1(!xOSHDon2gnQT{a zMZitH*6FbD>Y-1yiEC-DghoWRA(*A2kfWibkkSLnojzl z*70a&$JJi5-yR)LkY%KAuQ)%q`xnG=5D2`g??2i(<+_AhtT?O18Hj6SooD3@@aDq- ze}&VjefoHh9&7I~H0zfj^19G&Y=ss*Z1D}#G{inW%y`jorLH6KF_=p6s88Kyt-ZBV ztCM+jVOY(6ODSEPd8t6iXkz~MGPUtaHU71n$KK$qyTZtHjZpeD4>en&$W=^(pD$hq zT07{^X8G^Z7dUey!LO$=IaJeBnOW#^q_*;P6y!$7o;)HgQ>`aX@(S7E1v}$oL_ckB zUVGm%d}}0T*6e3Qh;=r0%e+15C-kgzjrKi0!L|KlJWa#Nb>uL8N^c;%gv@`-#Umso z;q4I0Y*uq>eLM1j*>{?Bzo%C8{!O49zvso$30S9HaN zDWNMdCeYyr4I9AUg<<&;`1aV-RDeYggW|A!a;?Udb`|#lB)cL_yq?bERvkc%7ws%Q z%7L29tF_))vAel&Be@2>X)-s79=H3{Z2oaD*cEH`;scdH{CKw zvW3V>j)=IAPAD@QQjx=l5lbJ+_e zvHOTDr8TNdvcn$`C7oIWd>nzizEEb1np>yan9I8(FVWhQ^BH{H2)bd_L`+Eg^{(9I zGa@AFClp$_#iuo@G_5!i43(vR-L0dVyXP&`*AGps8rnj-tty1IzA^mft~Rm*REDU2 zkBE9ugwN5%Da9DIrE#w%7JUFJf%EG~);C_l9U{p=LP%n7wo3jX)8>yHN!(ZD&Yy8N zn!XlKLTh>3B>eW+{mBB0ds;dA!T4Kt4&4oSzdD}+{K9ZlA*3(+YOWmQ!9DrM%ADG` z*ITU0XNBQXIK_@ZgR6W1doesQjmds(1If7V?Hf`4e$2uAw4zZwqrR%4AbBE0ih}?S zxBjk5)r(Dd_r=m>HUnvl42)>^s|*yV6liVq z8jl*IGnlZ|VXW$~&An8+y=i`D)pVwx1nHrn2%5=^MwxPrZ@(@{^iH~ z6~2-D-DxmtEzazWMxhpVa_=Aua0W5O#Vq~`9VXZLNG~6Z#k3kQ&{Un4nqnFFlN?}e zudCcu<5IiF6apKvEUrNKGf<~RMWq(Oc&5afPiVKzvY`XD+lkZ8l1a?3@$NLahFkX` zbmx-B`>=-&`h`4$<)p=KX}=`&{r1#`zRSbuiB@v)DqzTyx7GEeqyD7;skqFIWj0MP zCG9zWuyX9AW?eScXN-cR`;Ot*;`%Q1P5Gm>2OWh!qq*J;HuYV-f*66QaCcsQKGQL* zgvn;h-UNC^hqTHX5~~_Z*s;bWGRBCy^UZP1z3yCRk15c(euCN~gZD;34vLZwuB~Ry zp{SGFn~8{YVs-qNtMk$K*c!B)^cO(UfPA|h7Hi|J)sLY!7%ti+BN8TytlLBtdLC3PR;PQ#uB_O`D+d&>_FOwt z)5@Xfko@i8k7&LBnDn5auA?TH(kFX`B|Z|9SORSX>Uo$>4?C1HG4a<3gnH@rPG0#; z){iv^EGO9QD~_hB>uiC57oPd5QGUB&vKNOR2i=I`xrD8euQBBDEaBQIUFSGCjwx}Q zczDwG0#mHn5#bT&6nRvk1yu1IED9tZA4-?6=HejnVq}?7H#-=66^hL+7aFFNcCYxo z!@S(2ULddqitl4)O)x$m%=jfXXBguFQRZrRGm4$T+Puz4P(UUC8?%mVZ|qatpu=OA z`Pkf5R`1T?&XD@#_f8-85$~n;*PSRdnxV6kRi~w*p4IXBW`>(D$(#$f%|R@wbY%; z@IEyPIO@_!Q`=@JTElBWx|0}ON6nn=pYEi1w$?XnMnfSUGOb>lp34`t>`L^MO#E;h zu+ZbDgnbqFgVuVS>%OY4Vd_UZ^wXx5Vg)9%O%H8HwVkd1u}U9d;vs8Hoh|O={oMt` zm!?auIPYyDF1@Ec&yin32FW}1@BgF!2v%*ne+oqXnnHyUJgX;Jj$ypZesG?v&rq4h z=fqC3X>}jylRmQRF~(!(X<;Zr@?YZy%~-O=>9xZL7ERi6tZncqIcCCIE_zO!S=pLe z5Nx3jUyZc7am38IcRXO5q3w!Ipru2YUz8`y6zE>PPw?+9>T(^PlHl8ME5i;y;JaA zdd>&?SGKIY>>m3WSJ6sE9Q2Gaaff0e~)*Zffge9_gH2vcVfUlTXDc}BSGn&3K7GPS+1jqvSRR4yY z?r}w&%Wf2`DoluVCG@qIy}lrBc+m!w-5TrT7N@UWQ=Ixf5pDUvmY#_3fn&zXv9h^j z?>AMA$tq&65W^Nf*>H%45P-N~$KJuazskMSZBdJq=n9e1DT_ z6qsbH9wZs!am6$?*_;+H3LLJkkXysC@6GG$eI7u0I#wOtGTcH>r|~Xd`Z4|gg#ygl zs>(LGGGCUC3_7c-*apXU!14gZbphH-hY!n?skUzcV=xs44{Zy`%rRH4a3o`K%>I}m z_x_W&H%C0bN*ip&M)+tb;bQIX&Ug-qcu@`5Pl z3%RBg4i9(Au8zN!>vyGQgACM2t<+S^ig|53x=c#-r&v zmmi4uhwX_LNxZ=OO{^5Y^BLV3|I}Y9q&23;8Kc^srtmCuIF#Okd(;KWxJ|kmzb*qZ z$o)}b{$Y^+MFkd2ZmhSlkT(T9qz$8xf8mPHj<702M5mBZwk_B6x~1tE%&QhoV;pM* zT)Wn8CTQ*UG_km_h#{u1DYQIAFVEZp-`B?gt+E?oUwWwnhj(D8cfHa_dGlkp z6e~3!OS}dvdg=~V8nig6h}6&N3U%0y|5^XA*q1+aZXvOaopoq=$t}U?;_!X+!GcG_ z+*eQC=P?^=r3Sh8qSM`%V^(AMY+4<@1#Ul;ya|!1P<;W>XMZjY^FXcrDwy;)BlKg2 z8nqJfF;vmfzFizjpN&6>oM8|^nU;KVYSB3#k`F3Z5~2>HX$uXz>kO#4yN?dmk{?h^4e0M!@ww}Ap6_= z*m!Y_2|~$9s_F*JvQ#L#>+x-Z$lZZ{O0`+>anz@v4*}*sSn&16GUFJMx>&Bkbu@># zI45LoJLO<4b;GA)NlzFs)Q}hf9e5z7Nr-jp86xL{ zfopNWYhxC>QPNlNhQhz2xQlFTuaBIKU_ju9Vb35CLPO*e*WtZ)+A{ick1eJNr!;`# z>DJaCj8GnZN`NbpEP*9ja_6jBXD_Q2FlkJZ5^k2;y%kQTvpr6d8L}gd8J&1?a-x%* zZ6%mXm%s0Cyzii%$VTH<_Ek&$nmclaPwG+aXx5C=Z6&>jB=5*o))|*WQ!s1jaMPL= zdJvv>8c(33527IM-QK^5+k`M9b%z_Q`=v5te>EQK0%npWV>d>RoRud|iZ^K^m(~(` z@}B{Xg)6G(m_(UTmpyigRaUkdU**)Qdf{QEh0E=T6<2T}E#*x)O4b)Jv@0XQ#&n;}riExGQsM{!p%#R0$7(H(X zT_`N!0|fQcb0Gh&o-MZ3YVGMkv54o_l-a^*GSgxEAPZ+4xv@~@beX3Yl=HS$C%1&* z{sv=T(B^Mdlct`a2`TI#P7-GgDaCY>}V^-jjdG3yesFS-q!IOY}6pz`I3T2t;MR4f2QY51Hi)SydecMa9O3 zhNf^JwUPsxc#Ye`-=cSkuI}peXan(Ky(37U?cOthA8;-m@HId3_+5`>K&9I@TBfm7zz6_|?Ob3{uW{5i|>q!e*`%-$^&nAQ@ zjAYS77o5@@4iqWucIK}+-?eLh5^?UuDNjz{@p5?`-pz%X)#h<}H(5*AfJ~uHx#sMM z&AxhUsc{k1*_?usGmYI ztAlw%_QiKKGKhgPqC#K3*15A_@+wQP- zB~?2Nv1pQ?<8VbbnV!8<`uDr3-J09$%73`}e{%b`gY|Gib^CU{N8hvme*(B=5)1l$ zWnFe=72A9yUty9i^CIr*>0@p!JI(+7HCgE_GpVIIomAy{N%6<*qVUqj{7!aQ^Yy=Gvs;btREfFp . + @prefix dcat: . + @prefix dcterms: . + @prefix emmo: . + @prefix oteio: . + @prefix owl: . + @prefix rdfs: . + @prefix skos: . + @prefix xsd: . + + a owl:Class ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onClass ; + owl:onProperty emmo:EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + [ a owl:Restriction ; + owl:onClass ; + owl:onProperty emmo:EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + [ a owl:Restriction ; + owl:onClass ; + owl:onProperty emmo:EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + [ a owl:Restriction ; + owl:onClass ; + owl:onProperty emmo:EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + [ a owl:Restriction ; + owl:onClass ; + owl:onProperty emmo:EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a ; + skos:prefLabel "SEMImage"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "SEM image with elemental mappings. Represented as a stack of elemental mapping\\nfollowed by the image formed by the back-scattered electrons (BSE).\\nSet `nelements=0` if you only have the back-scattered image.\\n"@en ; + oteio:hasURI "http://onto-ns.com/meta/matchmaker/0.2/SEMImage"^^xsd:anyURI . + + a dcat:Dataset, + , + emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a ; + dcterms:creator "Sigurd Wenner" ; + dcterms:description "Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound." ; + dcterms:title "SEM image of cement" ; + dcat:contactPoint "Sigurd Wenner " ; + dcat:distribution [ a dcat:Distribution ; + "http://sintef.no/dlite/parser#sem_hitachi" ; + dcat:downloadURL "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif" ; + dcat:mediaType "image/tiff" ] ; + dcat:inSeries ; + emmo:EMMO_f702bad4_fc77_41f0_a26d_79f6444fd4f3 ; + oteio:hasDatamodel "http://onto-ns.com/meta/matchmaker/0.2/SEMImage" ; + oteio:hasDatamodelStorage "https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml" . + + a dcat:Dataset, + emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a, + chameo:Sample ; + dcterms:title "Series for SEM images for sample 77600-23-001." . + + a owl:Class ; + rdfs:subClassOf [ a owl:Restriction ; + owl:hasValue ; + owl:onProperty emmo:EMMO_0a9ae0cb_526d_4377_9a11_63d1ce5b3499 ], + [ a owl:Restriction ; + owl:onProperty emmo:EMMO_e5a34647_a955_40bc_8d81_9b784f0ac527 ; + owl:someValuesFrom emmo:EMMO_ac9e518d_b403_4d8b_97e2_06f9d40bac01 ], + emmo:EMMO_28fbea28_2204_4613_87ff_6d877b855fcd, + emmo:EMMO_50d6236a_7667_4883_8ae1_9bb5d190423a ; + skos:prefLabel "Data"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Image data - a stack of images for each channel"@en ; + oteio:datasize "4"^^xsd:nonNegativeInteger . + + a emmo:EMMO_b4c97fa0_d82c_406a_bda7_597d6e190654 ; + skos:prefLabel "data_dimension0"@en ; + emmo:EMMO_23b579e1_8088_45b5_9975_064014026c42 "channels"^^xsd:string ; + emmo:EMMO_499e24a5_5072_4c83_8625_fe3f96ae4a8d ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Number of channels."@en . + + a emmo:EMMO_b4c97fa0_d82c_406a_bda7_597d6e190654 ; + skos:prefLabel "data_dimension1"@en ; + emmo:EMMO_23b579e1_8088_45b5_9975_064014026c42 "height"^^xsd:string ; + emmo:EMMO_499e24a5_5072_4c83_8625_fe3f96ae4a8d ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Number of pixels along the image height."@en . + + a emmo:EMMO_b4c97fa0_d82c_406a_bda7_597d6e190654 ; + skos:prefLabel "data_dimension2"@en ; + emmo:EMMO_23b579e1_8088_45b5_9975_064014026c42 "width"^^xsd:string ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Number of pixels along the image width."@en . + + a owl:Class ; + rdfs:subClassOf [ a owl:Restriction ; + owl:hasValue ; + owl:onProperty emmo:EMMO_0a9ae0cb_526d_4377_9a11_63d1ce5b3499 ], + [ a owl:Restriction ; + owl:onProperty emmo:EMMO_e5a34647_a955_40bc_8d81_9b784f0ac527 ; + owl:someValuesFrom emmo:EMMO_5f334606_f67d_4f0e_acb9_eeb21cb10c66 ], + emmo:EMMO_28fbea28_2204_4613_87ff_6d877b855fcd, + emmo:EMMO_50d6236a_7667_4883_8ae1_9bb5d190423a ; + skos:prefLabel "Labels"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "The label of each channel. For elemental mapping this should be the chemical symbol of the element or BSE for the back-scattered image."@en . + + a emmo:EMMO_b4c97fa0_d82c_406a_bda7_597d6e190654 ; + skos:prefLabel "labels_dimension0"@en ; + emmo:EMMO_23b579e1_8088_45b5_9975_064014026c42 "channels"^^xsd:string ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Number of channels."@en . + + a owl:Class ; + rdfs:subClassOf emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a, + emmo:EMMO_50d6236a_7667_4883_8ae1_9bb5d190423a ; + skos:prefLabel "Metadata"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Reference to data model for SEM metadata."@en . + + a owl:Class ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onClass emmo:Metre ; + owl:onProperty emmo:EMMO_bed1d005_b04e_4a90_94cf_02bc678a8569 ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + emmo:EMMO_50d6236a_7667_4883_8ae1_9bb5d190423a, + emmo:EMMO_52fa9c76_fc42_4eca_a5c1_6095a1c9caab ; + skos:prefLabel "Pixelheight"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Height of each pixel."@en ; + oteio:datasize "8"^^xsd:nonNegativeInteger . + + a owl:Class ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onClass emmo:Metre ; + owl:onProperty emmo:EMMO_bed1d005_b04e_4a90_94cf_02bc678a8569 ; + owl:qualifiedCardinality "1"^^xsd:nonNegativeInteger ], + emmo:EMMO_50d6236a_7667_4883_8ae1_9bb5d190423a, + emmo:EMMO_52fa9c76_fc42_4eca_a5c1_6095a1c9caab ; + skos:prefLabel "Pixelwidth"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Width of each pixel."@en ; + oteio:datasize "8"^^xsd:nonNegativeInteger . + + a dcat:Dataset, + , + emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a ; + dcterms:creator "Sigurd Wenner" ; + dcterms:description "..." ; + dcterms:title "Nested series of SEM images of cement batch2" ; + dcat:contactPoint "Sigurd Wenner " ; + dcat:distribution [ a dcat:Distribution ; + dcat:downloadURL "sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2" ; + dcat:mediaType "inode/directory" ] . + + a dcat:Dataset, + , + emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a ; + dcterms:creator "Sigurd Wenner" ; + dcterms:description "Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound." ; + dcterms:title "Series of SEM image of cement sample 77600" ; + dcat:contactPoint "Sigurd Wenner " ; + dcat:distribution [ a dcat:Distribution ; + dcat:downloadURL "sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001" ; + dcat:mediaType "inode/directory" ] ; + dcat:inSeries . + + ``` + + +### find +Search the triplestore for documented resources. +Running `datadoc find --help` will show the following help message: + +``` +usage: datadoc find [-h] [--type TYPE] + [--criteria KEYWORD=VALUE [KEYWORD=VALUE ...]] + [--output FILENAME] [--format {iris,json,turtle,csv}] + +options: + -h, --help show this help message and exit + --type TYPE, -t TYPE Either a resource type (ex: "dataset", "distribution", + ...) or the IRI of a class to limit the search to. + --criteria KEYWORD=VALUE [KEYWORD=VALUE ...], -c KEYWORD=VALUE [KEYWORD=VALUE ...] + One of more additional matching criteria for resources + to find. Only resources with the given KEYWORD and + VALUE will be matched. The match is exact. + --output FILENAME, -o FILENAME + Write matching output to the given file. The default + is to write to standard output. + --format {iris,json,turtle,csv}, -f {iris,json,turtle,csv} + Output format to list the matched resources. The + default is to infer from the file extension if + --output is given. Otherwise it defaults to "iris". +``` + +The `--type` and `--criteria` options provide search criteria. +The `--type` argument an be any of the recognised [resource types] to limit the search to. +Alternatively, it may be the IRI of a class. +This limits the search to only resources that are individuals of this class. + +The `--output` options allows to write the matching output to file instead of standard output. + +The `--format` option controls how the search result should be presented. +The following formats are currently available: + +- **iris**: Return the IRIs of matching resources. +- **json**: Return a JSON array with documentation of matching resources. +- **turtle**: Return a turtle representation of matching resources. + + *Note*: In case the matching resources are datasets with a `datamodel` keyword, the serialised data model will also be included in the turtle output. + +- **csv**: Return a CSV table with the matching resources. + +!!! example "Examples" + + For all the examples below, we use `--parse` option to pre-load the triplestore from the `kb.ttl` file that we generated in the previous example. + + **Ex 1**: List IRIs of all datasets: + + ```shell + $ datadoc --parse=kb.ttl find --type=dataset + https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001 + https://he-matchmaker.eu/sample/SEM_cement_batch2/77600-23-001 + https://he-matchmaker.eu/data/sem/SEM_cement_batch2 + https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001 + ``` + + **Ex 2**: List IRIs of all samples (individuals of `chameo:Sample`): + + ```shell + $ datadoc --parse=kb.ttl find --type=chameo:Sample + https://he-matchmaker.eu/sample/SEM_cement_batch2/77600-23-001 + ``` + + **Ex 3**: List IRIs of all resources with a given title: + + ```shell + $ datadoc --parse=kb.ttl find --criteria title="Series of SEM image of cement sample 77600" + https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001 + ``` + + **Ex 4**: List all sample individuals as JSON: + + ```shell + $ datadoc --parse=kb.ttl find --type=chameo:Sample --format=json + ``` + + ```json + [ + { + "@id": "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001", + "@type": [ + "http://www.w3.org/ns/dcat#Dataset", + "https://w3id.com/emmo/domain/sem/0.1#SEMImageSeries", + "https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a" + ], + "creator": "Sigurd Wenner", + "description": "Back-scattered SEM image of cement sample 77600, polished with 1 \u00b5m diamond compound.", + "title": "Series of SEM image of cement sample 77600", + "contactPoint": "Sigurd Wenner ", + "distribution": { + "@id": "_:nac2552b949a94ef391080807ca2a02e4b14", + "@type": "http://www.w3.org/ns/dcat#Distribution", + "downloadURL": "sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001", + "mediaType": "inode/directory" + }, + "inSeries": "https://he-matchmaker.eu/data/sem/SEM_cement_batch2" + } + ] + ``` + + + + +[resource types]: ../../dataset/introduction/#resource-types diff --git a/mkdocs.yml b/mkdocs.yml index 815211d4..0212137c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,7 +1,7 @@ site_name: "Tripper" site_description: Documentation for the Tripper package site_url: https://EMMC-ASBL.github.io/tripper -copyright: Copyright © 2022 SINTEF +copyright: Copyright © 2022-2025 SINTEF theme: name: material @@ -39,6 +39,7 @@ markdown_extensions: - admonition - attr_list - pymdownx.highlight + - pymdownx.details - pymdownx.superfences - pymdownx.inlinehilite - pymdownx.tabbed: @@ -85,6 +86,8 @@ nav: - Home: index.md - Tutorial: tutorial.md - Backend discovery: backend_discovery.md + - Tools: + - datadoc: tools/datadoc.md - Data documentation: - Introduction: dataset/introduction.md - Documenting a resource: dataset/documenting-a-resource.md diff --git a/tests/input/semdata-context.json b/tests/input/semdata-context.json new file mode 100644 index 00000000..74112f5f --- /dev/null +++ b/tests/input/semdata-context.json @@ -0,0 +1,18 @@ +{ + "@context": { + "sem": "https://w3id.com/emmo/domain/sem/0.1#", + "semdata": "https://he-matchmaker.eu/data/sem/", + "sample": "https://he-matchmaker.eu/sample/", + "mat": "https://he-matchmaker.eu/material/", + "dm": "http://onto-ns.com/meta/characterisation/0.1/SEMImage#", + "parser": "http://sintef.no/dlite/parser#", + "gen": "http://sintef.no/dlite/generator#", + "micro": "https://w3id.com/emmo/domain/microstructure/0.3#" + }, + + "fromSample": { + "@id": "micro:fromSample", + "@type": "@id" + } + +} diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv index 4d7a78c6..3f764831 100644 --- a/tests/input/semdata.csv +++ b/tests/input/semdata.csv @@ -1,4 +1,4 @@ -@id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf +@id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionFor semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; diff --git a/tripper/context/0.2/context.json b/tripper/context/0.2/context.json index b2837617..cf938e97 100644 --- a/tripper/context/0.2/context.json +++ b/tripper/context/0.2/context.json @@ -96,6 +96,10 @@ "@id": "dcat:inSeries", "@type": "@id" }, + "isDescriptionFor": { + "@id": "emmo:EMMO_f702bad4_fc77_41f0_a26d_79f6444fd4f3", + "@type": "@id" + }, "isInputOf": { "@id": "emmo:EMMO_1494c1a9_00e1_40c2_a9cc_9bbf302a1cac", "@type": "@id" diff --git a/tripper/dataset/dataaccess.py b/tripper/dataset/dataaccess.py index dbe0ee25..78a3c923 100644 --- a/tripper/dataset/dataaccess.py +++ b/tripper/dataset/dataaccess.py @@ -248,5 +248,10 @@ def load( # pylint: disable=no-member except (dlite.DLiteProtocolError, dlite.DLiteIOError): pass + except Exception as exc: + raise IOError( + f"cannot access dataset '{iri}' using scheme={scheme}, " + f"location={location} and optins={p.query}" + ) from exc raise IOError(f"Cannot access dataset: {iri}") diff --git a/tripper/dataset/datadoc.py b/tripper/dataset/datadoc.py index 8b7b0eec..a241ccf8 100644 --- a/tripper/dataset/datadoc.py +++ b/tripper/dataset/datadoc.py @@ -1,8 +1,12 @@ """A script for data documentation.""" +# pylint: disable=too-many-branches + import argparse import io import json +import os +from pathlib import Path from tripper import Triplestore from tripper.dataset import ( @@ -17,19 +21,27 @@ def subcommand_add(ts, args): """Subcommand for populating the triplestore""" - if args.yamlfile: - save_datadoc(ts, args.yamlfile) + infile = Path(args.input) + extension = args.input_format if args.input_format else infile.suffix + fmt = extension.lower().lstrip(".") - if args.table: + if fmt in ("yml", "yaml"): + save_datadoc(ts, infile) + elif fmt in ("csv",): + kw = {} + if args.csv_options: + for token in args.csv_options: + option, value = token.split("=", 1) + kw[option] = value td = TableDoc.parse_csv( - args.table, context=get_jsonld_context(args.context) + infile, context=get_jsonld_context(args.context), **kw ) td.save(ts) + else: + raise ValueError(f"Unknown input format: {fmt}") - save_datadoc(ts, args.yamlfile) - - if args.serialize: - ts.serialize(args.serialize, format=args.sformat) + if args.dump: + ts.serialize(args.dump, format=args.format) def subcommand_find(ts, args): @@ -40,30 +52,42 @@ def subcommand_find(ts, args): kwargs = {} iris = search_iris(ts, type=args.type, **kwargs) + # Infer format + if args.format: + fmt = args.format.lower() + elif args.output: + fmt = Path(args.output).suffix.lower().lstrip(".") + else: + fmt = "iris" + # Create output - if args.format == "iris": + if fmt in ("iris", "txt"): s = "\n".join(iris) - elif args.format == "json": + elif fmt == "json": s = json.dumps([load_dict(ts, iri) for iri in iris], indent=2) - elif args.format == "turtle": + elif fmt in ("turtle", "ttl"): ts2 = Triplestore("rdflib") for iri in iris: d = load_dict(ts, iri) save_dict(ts2, d) s = ts2.serialize() - elif args.format == "csv": + elif fmt == "csv": dicts = [load_dict(ts, iri) for iri in iris] td = TableDoc.fromdicts(dicts) with io.StringIO() as f: td.write_csv(f) s = f.getvalue() else: - raise ValueError(args.format) + raise ValueError(f"Unknown format: {fmt}") - print(s) + if args.output: + with open(args.output, "wt", encoding="utf-8") as f: + f.write(s + os.linesep) + else: + print(s) -def main(): +def main(argv=None): """Main function.""" parser = argparse.ArgumentParser( description=( @@ -73,64 +97,101 @@ def main(): ), ) - subparsers = parser.add_subparsers(required=True, help="subcommand help") + subparsers = parser.add_subparsers(required=True, help="Subcommands:") # Subcommand: add - parser_add = subparsers.add_parser("add", help="add help") - parser_add.set_defaults(func=subcommand_add) - parser_add.add_argument( - "--context", - help="Path or URL to custom JSON-LD context. Used with `--table`.", + parser_add = subparsers.add_parser( + "add", + help="Populate the triplestore with data documentation.", ) + parser_add.set_defaults(func=subcommand_add) parser_add.add_argument( - "--yamlfile", - help="Path or URL to YAML file to add to the triplestore.", + "input", + help=( + "Path or URL to the input the triplestore should be populated " + "from." + ), ) parser_add.add_argument( - "--table", help="Path to table to populate the triplestore from." + "--input-format", + "-i", + choices=["yaml", "csv"], + help=( + "Input format. By default it is inferred from the file " + "extension of the `input` argument." + ), ) parser_add.add_argument( - "--tformat", + "--csv-options", + action="extend", + nargs="+", + metavar="OPTION=VALUE", help=( - "Used with `--table`. Format of the table to load. " - "Only csv is currently supported." + "Options describing the CSV dialect for --input-format=csv. " + "Common options are 'dialect', 'delimiter' and 'quotechar'." ), ) parser_add.add_argument( - "--serialize", + "--context", + help="Path or URL to custom JSON-LD context for the input.", + ) + parser_add.add_argument( + "--dump", + "-d", metavar="FILENAME", - help="File to serialise the populated triplestore to.", + help="File to dump the populated triplestore to.", ) parser_add.add_argument( - "--sformat", + "--format", + "-f", default="turtle", - help='Format to use with `--serialize`. Default is "turtle".', + help='Format to use with `--dump`. Default is "turtle".', ) # Subcommand: find parser_find = subparsers.add_parser( - "find", help="Find IRIs of resources in the triplestore." + "find", help="Find documented resources in the triplestore." ) parser_find.set_defaults(func=subcommand_find) parser_find.add_argument( "--type", "-t", - help="The type of resources to find.", + help=( + 'Either a resource type (ex: "dataset", "distribution", ...) ' + "or the IRI of a class to limit the search to." + ), ) parser_find.add_argument( "--criteria", "-c", action="extend", nargs="+", - metavar="KEY=VALUE", - help=("Matching criteria for resources to find. "), + metavar="KEYWORD=VALUE", + help=( + "One of more additional matching criteria for resources to find. " + "Only resources with the given KEYWORD and VALUE will be matched. " + "The match is exact." + ), + ) + parser_find.add_argument( + "--output", + "-o", + metavar="FILENAME", + help=( + "Write matching output to the given file. The default is to " + "write to standard output." + ), ) parser_find.add_argument( "--format", "-f", default="iris", choices=["iris", "json", "turtle", "csv"], - help="Output format to list the matched resources.", + help=( + "Output format to list the matched resources. The default is " + "to infer from the file extension if --output is given. " + 'Otherwise it defaults to "iris".' + ), ) # General: options @@ -138,11 +199,16 @@ def main(): "--backend", "-b", default="rdflib", - help="Triplestore backend to use.", + help=( + 'Triplestore backend to use. Defaults to "rdflib" - an ' + "in-memory rdflib triplestore, that can be pre-loaded with " + "--parse." + ), ) parser.add_argument( - "--base_iri", - help="Base IRI of the triplestore (seldom needed).", + "--base-iri", + "-B", + help="Base IRI of the triplestore.", ) parser.add_argument( "--database", @@ -160,12 +226,20 @@ def main(): help="Load triplestore from this location.", ) parser.add_argument( - "--pformat", - "-f", + "--parse-format", + "-F", help="Used with `--parse`. Format to use when parsing triplestore.", ) + parser.add_argument( + "--prefixes", + "-P", + action="extend", + nargs="+", + metavar="PREFIX=URL", + help="Namespace prefixes to add to bind to the triplestore.", + ) - args = parser.parse_args() + args = parser.parse_args(argv) ts = Triplestore( backend=args.backend, @@ -174,7 +248,12 @@ def main(): package=args.package, ) if args.parse: - ts.parse(args.parse, format=args.pformat) + ts.parse(args.parse, format=args.parse_format) + + if args.prefixes: + for token in args.prefixes: + prefix, ns = token.split("=", 1) + ts.bind(prefix, ns) # Call subcommand handler args.func(ts, args) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 774b49b9..7fd8b1c4 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -174,7 +174,8 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None: # Save statements and mappings statements = get_values(dct, "statements") statements.extend(get_values(dct, "mappings")) - ts.add_triples(statements) + if statements: + ts.add_triples(statements) # Save data models datamodels = get_values(dct, "datamodel") @@ -325,8 +326,8 @@ def get_values( values.extend(val) elif val: values.append(val) - for v in data.values(): - if isinstance(v, (dict, list)): + for k, v in data.items(): + if k != "@context" and isinstance(v, (dict, list)): values.extend(get_values(v, key)) elif isinstance(data, list): for ele in data: @@ -370,8 +371,9 @@ def get_jsonld_context( if context: for token in context: if isinstance(token, str): - r = requests.get(token, allow_redirects=True, timeout=timeout) - ctx.update(json.loads(r.content)["@context"]) + with openfile(token, timeout=timeout, mode="rt") as f: + content = f.read() + ctx.update(json.loads(content)["@context"]) elif isinstance(token, dict): ctx.update(token) else: @@ -723,10 +725,10 @@ def as_jsonld( v[i] = expand_iri(e, all_prefixes) elif isinstance(e, dict) and k in nested: v[i] = as_jsonld( - e, k, _entryid=_entryid, prefixes=prefixes + e, k, _entryid=_entryid, prefixes=all_prefixes ) elif isinstance(v, dict) and k in nested: - d[k] = as_jsonld(v, k, _entryid=_entryid, prefixes=prefixes) + d[k] = as_jsonld(v, k, _entryid=_entryid, prefixes=all_prefixes) return d @@ -857,8 +859,8 @@ def search_iris(ts: Triplestore, type=None, **kwargs) -> "List[str]": Arguments: ts: Triplestore to search. - type: Search for entries that are individuals of the class with - this IRI. + type: Either a [resource type] (ex: "dataset", "distribution", ...) + or the IRI of a class to limit the search to. kwargs: Match criterias. Returns: @@ -883,6 +885,9 @@ def search_iris(ts: Triplestore, type=None, **kwargs) -> "List[str]": search_iris( ts, contactPoint="John Doe", fromSample=SAMPLE.batch2/sample3 ) + + SeeAlso: + [resource type]: https://emmc-asbl.github.io/tripper/latest/dataset/introduction/#resource-types """ crit = [] @@ -923,7 +928,7 @@ def search_iris(ts: Triplestore, type=None, **kwargs) -> "List[str]": criterias = "\n".join(crit) query = f""" PREFIX rdf: <{RDF}> - SELECT ?iri + SELECT DISTINCT ?iri WHERE {{ {criterias} }} diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 6624afe8..4f4d676f 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -34,7 +34,11 @@ class TableDoc: "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. - context: Dict with user-defined JSON-LD context. + context: Additional user-defined context that should be + returned on top of the default context. It may be a + string with an URL to the user-defined context, a dict + with the user-defined context or a sequence of strings and + dicts. strip: Whether to strip leading and trailing whitespaces from cells. """ @@ -47,7 +51,7 @@ def __init__( data: "Sequence[Sequence[str]]", type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, - context: "Optional[Union[dict, list]]" = None, + context: "Optional[Union[str, dict, list]]" = None, strip: bool = True, ): self.header = list(header) @@ -64,7 +68,7 @@ def save(self, ts: Triplestore) -> None: def asdicts(self) -> "List[dict]": """Return the table as a list of dicts.""" - kw = {"@context": self.context} if self.context else {} + kw = {"_context": self.context} if self.context else {} results = [] for row in self.data: @@ -86,7 +90,7 @@ def fromdicts( dicts: "Sequence[dict]", type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, - context: "Optional[Union[dict, list]]" = None, + context: "Optional[Union[str, dict, list]]" = None, strip: bool = True, ) -> "TableDoc": """Create new TableDoc instance from a sequence of dicts. @@ -95,14 +99,19 @@ def fromdicts( dicts: Sequence of single-resource dicts. type: Type of data to save (applies to all rows). Should either be one of the pre-defined names: "dataset", - "distribution", "accessService", "parser" and "generator" - or an IRI to a class in an ontology. Defaults to - "dataset". - prefixes: Dict with prefixes in addition to those included in - the JSON-LD context. Should map namespace prefixes to IRIs. - context: Dict with user-defined JSON-LD context. - strip: Whether to strip leading and trailing whitespaces from - cells. + "distribution", "accessService", "parser" and + "generator" or an IRI to a class in an ontology. + Defaults to "dataset". + prefixes: Dict with prefixes in addition to those included + in the JSON-LD context. Should map namespace prefixes + to IRIs. + context: Additional user-defined context that should be + returned on top of the default context. It may be a + string with an URL to the user-defined context, a dict + with the user-defined context or a sequence of strings + and dicts. + strip: Whether to strip leading and trailing whitespaces + from cells. Returns: New TableDoc instance. @@ -153,7 +162,7 @@ def parse_csv( prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, encoding: str = "utf-8", - dialect: "Union[csv.Dialect, str]" = "excel", + dialect: "Optional[Union[csv.Dialect, str]]" = None, **kwargs, ) -> "TableDoc": # pylint: disable=line-too-long @@ -170,7 +179,7 @@ def parse_csv( JSON-LD context. Should map namespace prefixes to IRIs. context: Dict with user-defined JSON-LD context. encoding: The encoding of the csv file. Note that Excel may - encode as "ISO-8859" (commonly used in 1990th). + encode as "ISO-8859" (which was commonly used in 1990th). dialect: A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, see [Dialects and Formatting Parameters]. @@ -184,14 +193,22 @@ def parse_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ + + def read(f, dialect): + """Return csv reader from file-like object `f`.""" + if dialect is None and not kwargs: + dialect = csv.Sniffer().sniff(f.read(1024), delimiters=",;\t ") + f.seek(0) + reader = csv.reader(f, dialect=dialect, **kwargs) + header = next(reader) + data = list(reader) + return header, data + if isinstance(csvfile, (str, Path)): with openfile(csvfile, mode="rt", encoding=encoding) as f: - reader = csv.reader(f, dialect=dialect, **kwargs) + header, data = read(f, dialect) else: - reader = csv.reader(csvfile, dialect=dialect, **kwargs) - - header = next(reader) - data = list(reader) + header, data = read(csvfile, dialect) return TableDoc( header=header, diff --git a/tripper/utils.py b/tripper/utils.py index 76e9cc0f..e44650b1 100644 --- a/tripper/utils.py +++ b/tripper/utils.py @@ -113,8 +113,10 @@ def openfile( fname = url try: - yield open(fname, **kwargs) # pylint: disable=unspecified-encoding + f = open(fname, **kwargs) # pylint: disable=unspecified-encoding + yield f finally: + f.close() if tmpfile: Path(fname).unlink() From 9851fa871f87d910e5194a03728aaf63d96e850e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 21:01:50 +0100 Subject: [PATCH 53/59] Added documentation of isDescriptionFor --- docs/dataset/keywords.md | 2 ++ tripper/dataset/tabledoc.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/dataset/keywords.md b/docs/dataset/keywords.md index 3739cb7e..8978af1e 100644 --- a/docs/dataset/keywords.md +++ b/docs/dataset/keywords.md @@ -73,6 +73,7 @@ Properties specific for datasets - **[distribution]** (*IRI*): An available distribution of the dataset. - **[hasDatum]** (*IRI*): Relates a dataset to its datum parts. `hasDatum` relations are normally specified manually, since they are generated from the DLite data model. - **[inSeries]** (*IRI*): A dataset series of which the dataset is part. +- **[isDescriptionFor]** (*IRI*): An object (e.g. a material) that this dataset describes. - **[isInputOf]** (*IRI*): A process that this dataset is the input to. - **[isOutputOf]** (*IRI*): A process that this dataset is the output of. - **[mappings]** (*Literal JSON*): A list of subject-predicate-object triples mapping the datamodel to ontological concepts. @@ -173,6 +174,7 @@ Properties for parsers and generators [distribution]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_distribution [hasDatum]: https://w3id.org/emmo#EMMO_b19aacfc_5f73_4c33_9456_469c1e89a53e [inSeries]: https://www.w3.org/TR/vocab-dcat-3/#Property:dataset_in_series +[isDescriptionFor]: https://w3id.org/emmo#EMMO_f702bad4_fc77_41f0_a26d_79f6444fd4f3 [isInputOf]: https://w3id.org/emmo#EMMO_1494c1a9_00e1_40c2_a9cc_9bbf302a1cac [isOutputOf]: https://w3id.org/emmo#EMMO_2bb50428_568d_46e8_b8bf_59a4c5656461 [mappings]: https://w3id.org/emmo/domain/oteio#mapping diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 4f4d676f..532cbaa7 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -179,7 +179,7 @@ def parse_csv( JSON-LD context. Should map namespace prefixes to IRIs. context: Dict with user-defined JSON-LD context. encoding: The encoding of the csv file. Note that Excel may - encode as "ISO-8859" (which was commonly used in 1990th). + encode as "ISO-8859" (which was commonly used in the 1990th). dialect: A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, see [Dialects and Formatting Parameters]. From 59c7f0966254b094554960c17fc512e53180cc51 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 21:37:06 +0100 Subject: [PATCH 54/59] Updated mkdocs and fixed link warnings --- docs/dataset/customisation.md | 19 +++++++++---------- docs/dataset/documenting-a-resource.md | 8 ++++---- docs/dataset/keywords.md | 8 +++----- docs/dataset/prefixes.md | 2 +- docs/tools/datadoc.md | 4 ++-- mkdocs.yml | 12 ++++++++++++ pyproject.toml | 14 +++++++------- 7 files changed, 38 insertions(+), 29 deletions(-) diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 71013c49..52f28660 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -237,15 +237,14 @@ Instead, the list of available resource types should be stored and retrieved fro [With custom context]: #with-custom-context [User-defined keywords]: #user-defined-keywords [Python dict]: #python-dict -[resource types]: ../introduction#resource-types -[Documenting a resource]: ../documenting-a-resource -[predefined prefixes]: ../prefixes/ -[predefined keywords]: ../keywords/ +[resource types]: introduction.md#resource-types +[Documenting a resource]: documenting-a-resource.md +[predefined prefixes]: prefixes.md +[predefined keywords]: keywords.md [default context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json -[save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict -[as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld -[save_datadoc()]: -../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc -[TableDoc]: ../../api_reference/dataset/tabledoc/#tripper.dataset.tabledoc.TableDoc -[TableDoc.parse_csv()]: ../../api_reference/dataset/tabledoc/#tripper.dataset.tabledoc.TableDoc.parse_csv +[save_dict()]: ../api_reference/dataset/dataset.md#tripper.dataset.dataset.save_dict +[as_jsonld()]: ../api_reference/dataset/dataset.md#tripper.dataset.dataset.as_jsonld +[save_datadoc()]: ../api_reference/dataset/dataset.md#tripper.dataset.dataset.save_datadoc +[TableDoc]: ../api_reference/dataset/tabledoc.md/#tripper.dataset.tabledoc.TableDoc +[TableDoc.parse_csv()]: ../api_reference/dataset/tabledoc.md/#tripper.dataset.tabledoc.TableDoc.parse_csv [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json diff --git a/docs/dataset/documenting-a-resource.md b/docs/dataset/documenting-a-resource.md index a85342dd..ad8b6c34 100644 --- a/docs/dataset/documenting-a-resource.md +++ b/docs/dataset/documenting-a-resource.md @@ -124,7 +124,7 @@ This dict representation accepts the following keywords: - **@context**: Optional user-defined context to be appended to the documentation of all resources. - **prefixes**: A dict mapping namespace prefixes to their corresponding URLs. -- **datasets**/**distributions**/**accessServices**/**generators**/**parsers**/**resources**: A list of valid [single-resource](#single-resource-dict) dict of the given [resource type](#resource-types). +- **datasets**/**distributions**/**accessServices**/**generators**/**parsers**/**resources**: A list of valid [single-resource](#single-resource-dict) dict of the given [resource type](introduction.md#resource-types). See [semdata.yaml] for an example of a [YAML] representation of a multi-resource dict documentation. @@ -207,10 +207,10 @@ The below example shows how to save all datasets listed in the CSV file [semdata [emmo:DataSet]: https://w3id.org/emmo#EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a [oteio:Generator]: https://w3id.org/emmo/domain/oteio/Generator [oteio:Parser]: https://w3id.org/emmo/domain/oteio/Parser -[save_dict()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_dict -[as_jsonld()]: ../../api_reference/dataset/dataset/#tripper.dataset.dataset.as_jsonld +[save_dict()]: ../api_reference/dataset/dataset.md/#tripper.dataset.dataset.save_dict +[as_jsonld()]: ../api_reference/dataset/dataset.md/#tripper.dataset.dataset.as_jsonld [save_datadoc()]: -../../api_reference/dataset/dataset/#tripper.dataset.dataset.save_datadoc +../api_reference/dataset/dataset.md/#tripper.dataset.dataset.save_datadoc [semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml [semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv [TableDoc]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.tabledoc.TableDoc diff --git a/docs/dataset/keywords.md b/docs/dataset/keywords.md index 8978af1e..de9f8b6c 100644 --- a/docs/dataset/keywords.md +++ b/docs/dataset/keywords.md @@ -125,9 +125,6 @@ Properties for parsers and generators - **[prefixes]**: --> -[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json -[JSON-LD documentation]: https://www.w3.org/TR/json-ld/#syntax-tokens-and-keywords - [accessRights]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_access_rights [conformsTo]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_conforms_to [contactPoint]: https://www.w3.org/TR/vocab-dcat-3/#Property:resource_contact_point @@ -219,11 +216,12 @@ Properties for parsers and generators [prefixes]: --> - [DCAT]: https://www.w3.org/TR/vocab-dcat-3/ [dcat:Dataset]: https://www.w3.org/TR/vocab-dcat-3/#Class:Dataset [dcat:Distribution]: https://www.w3.org/TR/vocab-dcat-3/#Class:Distribution [vCard]: https://www.w3.org/TR/vcard-rdf/ [IANA]: https://www.iana.org/assignments/media-types/media-types.xhtml +[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json +[JSON-LD documentation]: https://www.w3.org/TR/json-ld/#syntax-tokens-and-keywords -[User-defined keywords]: ../customisation/#user-defined-keywords +[User-defined keywords]: customisation.md/#user-defined-keywords diff --git a/docs/dataset/prefixes.md b/docs/dataset/prefixes.md index 5af69c85..2caaf8d4 100644 --- a/docs/dataset/prefixes.md +++ b/docs/dataset/prefixes.md @@ -25,4 +25,4 @@ See [User-defined prefixes] for how to extend this list with additional namespac [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json -[User-defined prefixes]: ../customisation/#user-defined-prefixes +[User-defined prefixes]: customisation.md/#user-defined-prefixes diff --git a/docs/tools/datadoc.md b/docs/tools/datadoc.md index 9bd2fe5a..3785a9fc 100644 --- a/docs/tools/datadoc.md +++ b/docs/tools/datadoc.md @@ -94,7 +94,7 @@ This is useful if you are working with an in-memory triplestore. The `tests/input/` folder in the source code contain the `semdata.csv` CSV file documenting four datasets, a SEM image, two nested dataset series and the sample the image was acquired from. - [![semdata.csv](../../figs/semdata.png)](../../figs/semdata.png) + [![semdata.csv](/figs/semdata.png)](/figs/semdata.png) Running the following command from the root folder of the source code will populate an in-memory rdflib store with the data documented in the `semdata.csv` file. @@ -375,4 +375,4 @@ The following formats are currently available: -[resource types]: ../../dataset/introduction/#resource-types +[resource types]: ../dataset/introduction.md/#resource-types diff --git a/mkdocs.yml b/mkdocs.yml index 0212137c..e1ebbbd2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -102,3 +102,15 @@ nav: watch: - tripper + +#validation: +# omitted_files: warn +# absolute_links: warn # Or 'relative_to_docs' - new in MkDocs 1.6 +# unrecognized_links: warn +# anchors: warn # New in MkDocs 1.6 + +validation: + links: + absolute_links: relative_to_docs + anchors: warn + unrecognized_links: warn diff --git a/pyproject.toml b/pyproject.toml index 1e871728..349636e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,13 +63,13 @@ testing = [ ] docs = [ "tripper[testing]", - "mike==2.1.2", - "mkdocs==1.5.3", - "mkdocs-autorefs==0.4.1", - "mkdocs-awesome-pages-plugin==2.9.2", - "mkdocs-material==9.5.17", - "mkdocstrings==0.22.0", - "mkdocstrings-python-legacy==0.2.3", + "mike==2.1.3", + "mkdocs==1.6.1", + "mkdocs-autorefs==1.2.0", + "mkdocs-awesome-pages-plugin==2.10.1", + "mkdocs-material==9.5.49", + "mkdocstrings==0.27.0", + "mkdocstrings-python-legacy==0.2.4", ] dev = [ "tripper[pre-commit,docs]", From 462742f2782588fe2dbd9988c431e8902dae2f41 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 21:39:33 +0100 Subject: [PATCH 55/59] Minor documentation update --- docs/tools/datadoc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/datadoc.md b/docs/tools/datadoc.md index 3785a9fc..03ba1af2 100644 --- a/docs/tools/datadoc.md +++ b/docs/tools/datadoc.md @@ -92,7 +92,7 @@ This is useful if you are working with an in-memory triplestore. !!! example - The `tests/input/` folder in the source code contain the `semdata.csv` CSV file documenting four datasets, a SEM image, two nested dataset series and the sample the image was acquired from. + The `tests/input/` folder in the source code contain the `semdata.csv` CSV file documenting four datasets, a SEM image, two nested dataset series and the sample the image was acquired from as shown in the figure below (click on it to see it in full size). [![semdata.csv](/figs/semdata.png)](/figs/semdata.png) From 2ec83716ba96518acb31d95450b6e8d654c058be Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 21:45:30 +0100 Subject: [PATCH 56/59] Fix failing test --- tests/dataset/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 93f57a47..5fe34ba8 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -29,7 +29,7 @@ def test_get_jsonld_context(): assert online_context["status"] == "adms:status" # Test context argument - context2 = get_jsonld_context(context=CONTEXT_URL) + context2 = get_jsonld_context(context=CONTEXT_URL, fromfile=False) assert context2 == online_context assert "newkey" not in context From 883fe419edfec3cd0db6e174b1c3555b25319622 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 4 Jan 2025 21:45:30 +0100 Subject: [PATCH 57/59] Fix failing test --- tests/dataset/test_dataset.py | 2 +- tripper/dataset/datadoc.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 93f57a47..5fe34ba8 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -29,7 +29,7 @@ def test_get_jsonld_context(): assert online_context["status"] == "adms:status" # Test context argument - context2 = get_jsonld_context(context=CONTEXT_URL) + context2 = get_jsonld_context(context=CONTEXT_URL, fromfile=False) assert context2 == online_context assert "newkey" not in context diff --git a/tripper/dataset/datadoc.py b/tripper/dataset/datadoc.py index a241ccf8..89a98eb6 100644 --- a/tripper/dataset/datadoc.py +++ b/tripper/dataset/datadoc.py @@ -87,6 +87,11 @@ def subcommand_find(ts, args): print(s) +def subcommand_load(ts, args): + """Subcommand for loading data from a documented storage.""" + print(ts, args) + + def main(argv=None): """Main function.""" parser = argparse.ArgumentParser( @@ -236,7 +241,7 @@ def main(argv=None): action="extend", nargs="+", metavar="PREFIX=URL", - help="Namespace prefixes to add to bind to the triplestore.", + help="Namespace prefixes to bind to the triplestore.", ) args = parser.parse_args(argv) From 1c6ee69999de3d04e7a7eee866e642740e11a2d6 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 6 Jan 2025 00:25:43 +0100 Subject: [PATCH 58/59] Added load sub-command to tabledoc --- docs/dataset/documenting-a-resource.md | 5 +- docs/dataset/keywords.md | 7 +-- docs/tools/datadoc.md | 65 ++++++++++++++++++++++---- tests/dataset/test_tabledoc.py | 3 +- tests/input/semdata.csv | 10 ++-- tripper/dataset/datadoc.py | 30 +++++++++++- tripper/dataset/dataset.py | 6 ++- 7 files changed, 99 insertions(+), 27 deletions(-) diff --git a/docs/dataset/documenting-a-resource.md b/docs/dataset/documenting-a-resource.md index ad8b6c34..c6b04a54 100644 --- a/docs/dataset/documenting-a-resource.md +++ b/docs/dataset/documenting-a-resource.md @@ -175,8 +175,7 @@ The below example shows how to save all datasets listed in the CSV file [semdata >>> from tripper.dataset import TableDoc >>> td = TableDoc.parse_csv( -... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv", -... delimiter=";", +... "https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.csv", ... prefixes={ ... "sem": "https://w3id.com/emmo/domain/sem/0.1#", ... "semdata": "https://he-matchmaker.eu/data/sem/", @@ -212,5 +211,5 @@ The below example shows how to save all datasets listed in the CSV file [semdata [save_datadoc()]: ../api_reference/dataset/dataset.md/#tripper.dataset.dataset.save_datadoc [semdata.yaml]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.yaml -[semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/tabledoc-csv/tests/input/semdata.csv +[semdata.csv]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tests/input/semdata.csv [TableDoc]: https://emmc-asbl.github.io/tripper/latest/api_reference/dataset/dataset/#tripper.dataset.tabledoc.TableDoc diff --git a/docs/dataset/keywords.md b/docs/dataset/keywords.md index de9f8b6c..c8a6c366 100644 --- a/docs/dataset/keywords.md +++ b/docs/dataset/keywords.md @@ -11,8 +11,8 @@ Here we only list those that are commonly used for data documentation with Tripp - **@context** (*IRI*): URL to or dict with user-defined JSON-LD context. Used to extend the keywords listed on this page with domain- or application-specific keywords. -- **@id** (*IRI*): IRI of the documented resource. -- **@type** (*IRI*): IRI of ontological class that the resource is an individual of. +- **@id** (*IRI*): IRI identifying the documented resource. +- **@type** (*IRI*): IRI of ontological class that defines what the resource *is*. General properties on resources used by DCAT @@ -56,10 +56,11 @@ Other general properties on resources - **[abstract]** (*Literal*): A summary of the resource. - **[bibliographicCitation]** (*Literal*): A bibliographic reference for the resource. Recommended practice is to include sufficient bibliographic detail to identify the resource as unambiguously as possible. -- **[comment]** (*Literal*): A description of the subject resource. +- **[comment]** (*Literal*): A description of the subject resource. Use `description` instead. - **[deprecated]** (*Literal*): The annotation property that indicates that a given entity has been deprecated. It should equal to `"true"^^xsd:boolean`. - **[isDefinedBy]** (*Literal*): Indicate a resource defining the subject resource. This property may be used to indicate an RDF vocabulary in which a resource is described. - **[label]** (*Literal*): Provides a human-readable version of a resource's name. +- **[scopeNote]** (*Literal*): A note that helps to clarify the meaning and/or the use of a concept. - **[seeAlso]** (*Literal*): Indicates a resource that might provide additional information about the subject resource. - **[source]** (*Literal*): A related resource from which the described resource is derived. - **[statements]** (*Literal JSON*): A list of subject-predicate-object triples with additional RDF statements documenting the resource. diff --git a/docs/tools/datadoc.md b/docs/tools/datadoc.md index 03ba1af2..950881a6 100644 --- a/docs/tools/datadoc.md +++ b/docs/tools/datadoc.md @@ -10,15 +10,16 @@ usage: datadoc [-h] [--backend BACKEND] [--base-iri BASE_IRI] [--database DATABASE] [--package PACKAGE] [--parse LOCATION] [--parse-format PARSE_FORMAT] [--prefixes PREFIX=URL [PREFIX=URL ...]] - {add,find} ... + {add,find,load} ... Tool for data documentation. It allows populating and searching a triplestore for existing documentation. positional arguments: - {add,find} Subcommands: + {add,find,load} Subcommands: add Populate the triplestore with data documentation. find Find documented resources in the triplestore. + load Load documented dataset from a storage. options: -h, --help show this help message and exit @@ -41,7 +42,7 @@ options: Namespace prefixes to add to bind to the triplestore. ``` -Currently, `datadoc` has two sub-commands, `add` and `find` for populating and searching the triplestore, respectively. +Currently, `datadoc` has currently three sub-commands, `add`, `find` and `load` for populating the triplestore, searching the triplestore and accessing a dataset documented the triplestore, respectively. ### General options @@ -51,7 +52,7 @@ Currently, `datadoc` has two sub-commands, `add` and `find` for populating and s They are typically used with the default "rdflib" in-memory backend. -Subcomands +Subcommands ---------- ### add @@ -99,14 +100,12 @@ This is useful if you are working with an in-memory triplestore. Running the following command from the root folder of the source code will populate an in-memory rdflib store with the data documented in the `semdata.csv` file. ```shell - datadoc add tests/input/semdata.csv --csv-options delimiter=';' --context tests/input/semdata-context.json --dump kb.ttl + datadoc add tests/input/semdata.csv --context tests/input/semdata-context.json --dump kb.ttl ``` - The `--csv-options` tells that the csv file is delimited by semi-colon, which is the default delimiter used by excel in Scandinavia. - The `--context` option provides a user-defined context defining prefixes and keywords used by the input. - Finally the `--dump` option dumps the in-memory triplestore to the file `kb.ttl`. + The `--dump` option dumps the in-memory triplestore to the file `kb.ttl`. If you open the file, you will notice that it in addition to the four datasets listed in the input, also include the `SEMImage` class and its properties, providing structural documentation of the `SEMImage` individuals. ??? abstract "Generated turtle file" @@ -157,7 +156,7 @@ This is useful if you are working with an in-memory triplestore. dcat:contactPoint "Sigurd Wenner " ; dcat:distribution [ a dcat:Distribution ; "http://sintef.no/dlite/parser#sem_hitachi" ; - dcat:downloadURL "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif" ; + dcat:downloadURL "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/tests/input/77600-23-001_5kV_400x_m001.tif" ; dcat:mediaType "image/tiff" ] ; dcat:inSeries ; emmo:EMMO_f702bad4_fc77_41f0_a26d_79f6444fd4f3 ; @@ -297,7 +296,7 @@ options: ``` The `--type` and `--criteria` options provide search criteria. -The `--type` argument an be any of the recognised [resource types] to limit the search to. +The `--type` option an be any of the recognised [resource types] to limit the search to. Alternatively, it may be the IRI of a class. This limits the search to only resources that are individuals of this class. @@ -372,6 +371,52 @@ The following formats are currently available: ] ``` + **Ex 5**: Show the documentation of a resource with a given IRI as JSON: + + ```shell + $ datadoc --parse=kb.ttl find --criteria @id=https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001 --format=json + ``` + + This will show the same output as in Ex 4. + + +### load +Loads documented dataset from a storage. +Running `datadoc load --help` will show the following help message: + +``` +usage: datadoc load [-h] [--output FILENAME] iri + +positional arguments: + iri IRI of dataset to load. + +options: + -h, --help show this help message and exit + --output FILENAME, -o FILENAME + Write the dataset to the given file. The default is to + write to standard output. +``` + +!!! note + The `load` subcommand is specific for datasets since it uses DCAT documentation of how to fetch the dataset. + +The positional `iri` argument is the IRI of the documented dataset to load. + +The `--output` option allows to write the dataset to a local file. + + +!!! example + + Save the dataset `https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001` documented in `kb.ttl` to file: + + ```shell + $ datadoc -p kb.ttl load https://he-matchmaker.eu/data/sem/SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001 -o cement.tif + ``` + + This should create the file `cement.tif` containing the image data. + + + diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 4f9dfd93..56d66512 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -123,7 +123,6 @@ def test_csv(): # Read csv file td = TableDoc.parse_csv( indir / "semdata.csv", - delimiter=";", prefixes={ "sem": "https://w3id.com/emmo/domain/sem/0.1#", "semdata": "https://he-matchmaker.eu/data/sem/", @@ -143,7 +142,7 @@ def test_csv(): "77600-23-001/77600-23-001_5kV_400x_m001" ) assert img.distribution.downloadURL == ( - "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/" + "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/" "tests/input/77600-23-001_5kV_400x_m001.tif" ) diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv index 3f764831..9dca3036 100644 --- a/tests/input/semdata.csv +++ b/tests/input/semdata.csv @@ -1,5 +1,5 @@ -@id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionFor -semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 -semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; -semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; -sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; +@id,@type,title,description,creator,contactPoint,inSeries,datamodel,datamodelStorage,distribution.downloadURL,distribution.mediaType,distribution.parser,fromSample,isDescriptionFor +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001,sem:SEMImage,SEM image of cement,"Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.",Sigurd Wenner,Sigurd Wenner ,semdata:SEM_cement_batch2/77600-23-001,http://onto-ns.com/meta/matchmaker/0.2/SEMImage,https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml,https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/tests/input/77600-23-001_5kV_400x_m001.tif,image/tiff,parser:sem_hitachi,sample:SEM_cement_batch2/77600-23-001,mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001,sem:SEMImageSeries,Series of SEM image of cement sample 77600,"Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.",Sigurd Wenner,Sigurd Wenner ,semdata:SEM_cement_batch2, ,,sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001,inode/directory,,, +semdata:SEM_cement_batch2,sem:SEMImageSeries,Nested series of SEM images of cement batch2,...,Sigurd Wenner,Sigurd Wenner , ,,,sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2,inode/directory,,, +sample:SEM_cement_batch2/77600-23-001,chameo:Sample,Series for SEM images for sample 77600-23-001., ,,,,,,,,,, diff --git a/tripper/dataset/datadoc.py b/tripper/dataset/datadoc.py index 89a98eb6..bb666f4e 100644 --- a/tripper/dataset/datadoc.py +++ b/tripper/dataset/datadoc.py @@ -12,6 +12,7 @@ from tripper.dataset import ( TableDoc, get_jsonld_context, + load, load_dict, save_datadoc, save_dict, @@ -88,8 +89,14 @@ def subcommand_find(ts, args): def subcommand_load(ts, args): - """Subcommand for loading data from a documented storage.""" - print(ts, args) + """Subcommand for loading a documented dataset from a storage.""" + data = load(ts, args.iri) + + if args.output: + with open(args.output, "wb") as f: + f.write(data) + else: + print(data) def main(argv=None): @@ -199,6 +206,25 @@ def main(argv=None): ), ) + # Subcommand: load + parser_load = subparsers.add_parser( + "load", help="Load documented dataset from a storage." + ) + parser_load.set_defaults(func=subcommand_load) + parser_load.add_argument( + "iri", + help="IRI of dataset to load.", + ) + parser_load.add_argument( + "--output", + "-o", + metavar="FILENAME", + help=( + "Write the dataset to the given file. The default is to write " + "to standard output." + ), + ) + # General: options parser.add_argument( "--backend", diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 7fd8b1c4..9f680ffc 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -889,8 +889,10 @@ def search_iris(ts: Triplestore, type=None, **kwargs) -> "List[str]": SeeAlso: [resource type]: https://emmc-asbl.github.io/tripper/latest/dataset/introduction/#resource-types """ - crit = [] + # Special handling of @id + id = kwargs.pop("@id") if "@id" in kwargs else kwargs.pop("_id", None) + crit = [] if type: if ":" in type: expanded = ts.expand_iri(type) @@ -933,4 +935,4 @@ def search_iris(ts: Triplestore, type=None, **kwargs) -> "List[str]": {criterias} }} """ - return [r[0] for r in ts.query(query)] # type: ignore + return [r[0] for r in ts.query(query) if not id or r[0] == id] # type: ignore From 54523e21a74865c4fd1c9b1a283e7085b24b13ef Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 6 Jan 2025 14:14:36 +0100 Subject: [PATCH 59/59] Apply suggestions from code review Co-authored-by: Tor S. Haugland --- docs/dataset/customisation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dataset/customisation.md b/docs/dataset/customisation.md index 52f28660..19279615 100644 --- a/docs/dataset/customisation.md +++ b/docs/dataset/customisation.md @@ -94,7 +94,7 @@ Both for the single-resource and multi-resource dicts, you can add a `"@context" For example -```json +```python { "@context": [ # URL to a JSON file, typically a domain-specific context