diff --git a/ontopy/excelparser.py b/ontopy/excelparser.py index 71775f48f..13bdcbd76 100755 --- a/ontopy/excelparser.py +++ b/ontopy/excelparser.py @@ -21,6 +21,7 @@ from ontopy import get_ontology from ontopy.utils import EMMOntoPyException, NoSuchLabelError from ontopy.utils import ReadCatalogError, read_catalog +from ontopy.ontology import LabelDefinitionError from ontopy.manchester import evaluate import owlready2 # pylint: disable=C0411 @@ -276,7 +277,12 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran if not parents: parents = [owlready2.Thing] - concept = onto.new_entity(name, parents) + try: + concept = onto.new_entity(name, parents) + except LabelDefinitionError: + concepts_with_errors["wrongly_defined"].append(name) + continue + added_rows.add(index) # Add elucidation try: diff --git a/ontopy/ontology.py b/ontopy/ontology.py index a7f35262d..f6c499024 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -186,20 +186,28 @@ def __init__(self, *args, **kwargs): doc="Whether to include imported ontologies in dir() listing.", ) + # Other settings + _colon_in_label = False + colon_in_label = property( + fget=lambda self: self._colon_in_label, + fset=lambda self, v: setattr(self, "_colon_in_label", bool(v)), + doc="Whether to accept colon in name-part of IRI. " + "If true, the name cannot be prefixed.", + ) + def __dir__(self): - set_dir = set(super().__dir__()) + dirset = set(super().__dir__()) lst = list(self.get_entities(imported=self._dir_imported)) if self._dir_preflabel: - set_dir.update( + dirset.update( _.prefLabel.first() for _ in lst if hasattr(_, "prefLabel") ) if self._dir_label: - set_dir.update(_.label.first() for _ in lst if hasattr(_, "label")) + dirset.update(_.label.first() for _ in lst if hasattr(_, "label")) if self._dir_name: - set_dir.update(_.name for _ in lst if hasattr(_, "name")) - - set_dir.difference_update({None}) # get rid of possible None - return sorted(set_dir) + dirset.update(_.name for _ in lst if hasattr(_, "name")) + dirset.difference_update({None}) # get rid of possible None + return sorted(dirset) def __getitem__(self, name): item = super().__getitem__(name) @@ -257,7 +265,12 @@ def get_unabbreviated_triples( ) def get_by_label( - self, label: str, label_annotations: str = None, prefix: str = None + self, + label: str, + label_annotations: str = None, + prefix: str = None, + imported: bool = True, + colon_in_label: bool = None, ): """Returns entity with label annotation `label`. @@ -272,26 +285,26 @@ def get_by_label( the base iri of an ontology (with trailing slash (/) or hash (#) stripped off). The search for a matching label will be limited to this namespace. + imported: Whether to also look for `label` in imported ontologies. + colon_in_label: Whether to accept colon (:) in a label or name-part + of IRI. Defaults to the `colon_in_label` property of `self`. + Setting this true cannot be combined with `prefix`. If several entities have the same label, only the one which is found first is returned.Use get_by_label_all() to get all matches. - A NoSuchLabelError is raised if `label` cannot be found. + Note, if different prefixes are provided in the label and via + the `prefix` argument a warning will be issued and the + `prefix` argument will take precedence. - Note - ---- - The current implementation also supports "*" as a wildcard - matching any number of characters. This may change in the future. + A NoSuchLabelError is raised if `label` cannot be found. """ - # pylint: disable=too-many-arguments,too-many-branches + # pylint: disable=too-many-arguments,too-many-branches,invalid-name if not isinstance(label, str): raise TypeError( f"Invalid label definition, must be a string: {label!r}" ) - if " " in label: - raise ValueError( - f"Invalid label definition, {label!r} contains spaces." - ) + if self._label_annotations is None: for iri in DEFAULT_LABEL_ANNOTATIONS: try: @@ -299,23 +312,25 @@ def get_by_label( except ValueError: pass - splitlabel = label.split(":", 1) - if len(splitlabel) > 2: - raise ValueError( - f"Invalid label definition, {label!r}" - " contains more than one ':' ." - "The string before ':' indicates the prefix. " - "The string after ':' indicates the label." - ) - if len(splitlabel) == 2: - label = splitlabel[1] - if prefix and prefix != splitlabel[0]: - warnings.warn( - f"Prefix given both as argument ({prefix}) " - f"and in label ({splitlabel[0]}). " - "Prefix given in label takes presendence " + if colon_in_label is None: + colon_in_label = self._colon_in_label + if colon_in_label: + if prefix: + raise ValueError( + "`prefix` cannot be combined with `colon_in_label`" ) - prefix = splitlabel[0] + else: + splitlabel = label.split(":", 1) + if len(splitlabel) == 2 and not splitlabel[1].startswith("//"): + label = splitlabel[1] + if prefix and prefix != splitlabel[0]: + warnings.warn( + f"Prefix given both as argument ({prefix}) " + f"and in label ({splitlabel[0]}). " + "Prefix given in argument takes presendence " + ) + if not prefix: + prefix = splitlabel[0] if prefix: entitylist = self.get_by_label_all( @@ -327,36 +342,56 @@ def get_by_label( return entitylist[0] raise NoSuchLabelError( - f"No label annotations matches {label!r} with prefix " + f"No label annotations matches {label!r} with prefix " f"{prefix!r}" ) - # if label in self._namespaces: - # return self._namespaces[label] - if label_annotations is None: - annotations = (a.name for a in self.label_annotations) - else: - annotations = ( - a.name if hasattr(a, "storid") else a for a in label_annotations - ) - for key in annotations: - entity = self.search_one(**{key: label}) - if entity: - return entity + # Label is a full IRI + entity = self.world[label] + if entity: + return entity + + # First entity with matching label annotation + annotation_ids = ( + (self._abbreviate(ann, False) for ann in label_annotations) + if label_annotations + else (ann.storid for ann in self.label_annotations) + ) + get_triples = ( + self.world._get_data_triples_spod_spod + if imported + else self._get_data_triples_spod_spod + ) + for annotation_id in annotation_ids: + for s, _, _, _ in get_triples(None, annotation_id, label, None): + return self.world[self._unabbreviate(s)] + # Special labels if self._special_labels and label in self._special_labels: return self._special_labels[label] + # Check if label is a name under base_iri entity = self.world[self.base_iri + label] if entity: return entity - raise NoSuchLabelError(f"No label annotations matches {label!r}") + # Check if label is a name in any namespace + for namespace in self._namespaces.keys(): + entity = self.world[namespace + label] + if entity: + return entity + + raise NoSuchLabelError(f"No label annotations matches '{label}'") def get_by_label_all(self, label, label_annotations=None, prefix=None): """Like get_by_label(), but returns a list with all matching labels. Returns an empty list if no matches could be found. + + Note + ---- + The current implementation also supports "*" as a wildcard + matching any number of characters. This may change in the future. """ if not isinstance(label, str): raise TypeError( @@ -1582,7 +1617,7 @@ def new_entity( Throws exception if name consists of more than one word. """ - if len(name.split(" ")) > 1: + if " " in name: raise LabelDefinitionError( f"Error in label name definition '{name}': " f"Label consists of more than one word." @@ -1684,7 +1719,7 @@ def _get_unabbreviated_triples( _unabbreviate(self, p, blank=blank), _unabbreviate(self, o, blank=blank), ) - for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=""): + for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=None): yield ( _unabbreviate(self, s, blank=blank), _unabbreviate(self, p, blank=blank), diff --git a/tests/test_dir.py b/tests/test_dir.py new file mode 100644 index 000000000..867fa7758 --- /dev/null +++ b/tests/test_dir.py @@ -0,0 +1,28 @@ +from pathlib import Path + +from ontopy import get_ontology + + +thisdir = Path(__file__).resolve().parent + +onto = get_ontology( + thisdir / "test_excelparser/imported_onto/ontology.ttl" +).load() +onto.dir_imported = False +onto.dir_preflabel = False +onto.dir_label = False +onto.dir_name = False +assert "TestClass2" not in dir(onto) + +onto.dir_imported = True +onto.dir_preflabel = True +assert onto._dir_imported +assert onto.TestClass2 +assert "TestClass2" in dir(onto) +assert "testclass" not in dir(onto) +assert "testclass2" not in dir(onto) + +onto.dir_name = True +assert "TestClass2" in dir(onto) +assert "testclass" in dir(onto) +assert "testclass2" in dir(onto) diff --git a/tests/test_get_by_label.py b/tests/test_get_by_label.py new file mode 100644 index 000000000..5bbf1375c --- /dev/null +++ b/tests/test_get_by_label.py @@ -0,0 +1,24 @@ +import pytest + +from ontopy import get_ontology +from ontopy.ontology import NoSuchLabelError + + +# Loading emmo-inferred where everything is sqashed into one ontology +emmo = get_ontology().load() +assert emmo[emmo.Atom.name] == emmo.Atom +assert emmo[emmo.Atom.iri] == emmo.Atom + +# Load an ontology with imported sub-ontologies +onto = get_ontology( + "https://raw.githubusercontent.com/BIG-MAP/BattINFO/master/battinfo.ttl" +).load() +assert onto.Electrolyte.prefLabel.first() == "Electrolyte" + + +# Check colon_in_name argument +onto.Atom.altLabel.append("Element:X") +with pytest.raises(NoSuchLabelError): + onto.get_by_label("Element:X") + +assert onto.get_by_label("Element:X", colon_in_label=True) == onto.Atom