Skip to content

Commit

Permalink
expand simple dict processing (#1860)
Browse files Browse the repository at this point in the history
* Extends jsonschemagen's interpretation of SimpleDicts beyond tuples.

This is necessary for parsing of SimpleDict form of annotations in schemas
(the canonical way to do this is as a SimpleDict)

See linkml/generators/jsonschemagen.py

* owlgen: fixed handling of has_member and all_member

* Added tests for SimpleDict inlining, and for has_member/all_member

* Adding docs for mapping-commons/sssom#225

* formatted

* regenerating snapshot to account for relaxing conditions under which something is a SimpleDict

* error message now changes with relaxed SimpleDict

* regenerate-snapshots
  • Loading branch information
cmungall authored Feb 5, 2024
1 parent 453a4cb commit 729ae82
Show file tree
Hide file tree
Showing 14 changed files with 1,357 additions and 182 deletions.
32 changes: 32 additions & 0 deletions docs/schemas/inlining.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,38 @@ data:

```

## Inlining as simple dictionaries

If a collection of objects is inlined as a dict, and the objects have a "primary" value, then
a more compact simple key-value pair inlining can be used.

One example of this is [prefixes](https://w3id.org/linkml/prefixes) in the LinkML metamodel. This is an
inlined collection of [Prefix](https://w3id.org/linkml/Prefix) classes which are essentially tuples of
a key (the prefix itself, e.g. `dcterms`) and a value (the expansion, e.g. `http://purl.org/dc/terms/`).

These can be serialized in the standard compact form like this:

```yaml
prefixes:
dcterms:
prefix_reference: http://purl.org/dc/terms/
...
```

However, the canonical encoding is the more compact SimpleDict form:

```yaml
prefixes:
dcterms: http://purl.org/dc/terms/
```

The procedure for determining whether a SimpleDict serialization can be used
on a collection of classes is as follows:

1. There must be a key or identifier slot (this is true for all slots inlined as dict)
2. One of the following must hold of the set of remaining slots:
- There is exactly one additional non-key slot (this forms the "primary" value)
- If there are multiple candidates for the primary value, if exactly one is `required`, it is used.

## Inlining with non-JSON serializations

Expand Down
25 changes: 25 additions & 0 deletions linkml/generators/jsonschemagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,9 +579,34 @@ def _get_range_associated_slots(
]
non_id_required_slots = [s for s in non_id_slots if s.required]

# Some lists of objects can be serialized as SimpleDicts.
# A SimpleDict is serialized as simple key-value pairs where the value is atomic.
# The key must be declared as a key, and the value must satisfy one of the following conditions:
# 1. The value slot is the only other slot in the object other than the key
# 2. The value slot is explicitly annotated as a simple_dict_value
# 3. The value slot is the only non-key that is required
# See also: https://github.com/linkml/linkml/issues/1250
range_simple_dict_value_slot = None
if len(non_id_slots) == 1:
range_simple_dict_value_slot = non_id_slots[0]
elif len(non_id_slots) > 1:
candidate_non_id_slots = []
for non_id_slot in non_id_slots:
if isinstance(non_id_slot.annotations, dict):
is_simple_dict_value = non_id_slot.annotations.get("simple_dict_value", False)
else:
is_simple_dict_value = getattr(non_id_slot.annotations, "simple_dict_value", False)
if is_simple_dict_value:
candidate_non_id_slots.append(non_id_slot)
if len(candidate_non_id_slots) == 1:
range_simple_dict_value_slot = candidate_non_id_slots[0]
else:
candidate_non_id_slots = []
for non_id_slot in non_id_slots:
if non_id_slot.required:
candidate_non_id_slots.append(non_id_slot)
if len(candidate_non_id_slots) == 1:
range_simple_dict_value_slot = candidate_non_id_slots[0]

return range_class_id_slot, range_simple_dict_value_slot, non_id_required_slots

Expand Down
17 changes: 5 additions & 12 deletions linkml/generators/owlgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ def transform_class_expression(
elif range in sv.all_enums():
x = self._enum_uri(range)
elif range in sv.all_classes():
x = self._enum_uri(range)
x = self._class_uri(range)
else:
raise ValueError(f"Unknown range {range}")
# x = self._class_uri(range)
Expand Down Expand Up @@ -531,6 +531,9 @@ def transform_class_expression(
graph.add((max_card_expr, RDF.type, OWL.Restriction))
graph.add((max_card_expr, OWL.maxCardinality, Literal(1)))
graph.add((max_card_expr, OWL.onProperty, slot_uri))
if slot.has_member:
has_member_expr = self.transform_class_slot_expression(cls, slot.has_member, slot)
owl_exprs.append(self._some_values_from(slot_uri, has_member_expr))
return self._intersection_of(owl_exprs)

def slot_node_owltypes(self, slot: Union[SlotDefinition, AnonymousSlotExpression]) -> Set[URIRef]:
Expand Down Expand Up @@ -572,18 +575,8 @@ def transform_class_slot_expression(

owl_exprs = []

if slot.has_member:
owl_exprs.append(
self._union_of(
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in slot.has_member]
)
)
if slot.all_members:
owl_exprs.append(
self._union_of(
[self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in slot.all_members]
)
)
owl_exprs.append(self.transform_class_slot_expression(cls, slot.all_members, main_slot, owl_types))

if slot.any_of:
owl_exprs.append(
Expand Down
25 changes: 25 additions & 0 deletions tests/test_compliance/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ def _make_schema(
stream.write(f"# {test.__name__}\n\n")
stream.write(f"{desc}\n\n")
stream.write("## Elements Tested\n\n")
if not core_elements:
raise AssertionError(f"No core elements defined for for {schema_name}")
for el in core_elements:
stream.write(f"* [{el}](https://w3id.org/linkml/{el})\n")
# Write README for this schema combo
Expand Down Expand Up @@ -543,6 +545,8 @@ def validated_schema(test: Callable, local_name: str, framework: str, **kwargs)
"""
test_name = test.__name__
if test_name not in feature_dict:
if not test.__doc__:
raise AssertionError(f"Test {test_name} has no docstring")
feature_dict[test_name] = Feature(
name=test_name,
description=test.__doc__,
Expand Down Expand Up @@ -858,6 +862,26 @@ def _convert_data_to_rdf(schema: dict, instance: dict, target_class: str, ttl_pa

@lru_cache
def robot_is_on_path():
"""
Check if robot is on the path.

If robot is not on the path, then OWL checks will be skipped.

To ensure robot is on the path for pycharm

.. code-block:: bash

poetry run which python
which robot

Then execute something like this, ensuring you substitute your robot path and virtualenv path:

.. code-block:: bash

ln -s ~/repos/robot/bin/robot ~/Library/Caches/pypoetry/virtualenvs/linkml-lavaHNw6-py3.9/bin/robot

:return:
"""
return shutil.which("robot") is not None


Expand Down Expand Up @@ -895,6 +919,7 @@ def robot_check_coherency(data_path: str, ontology_path: str, output_path: str =
if output_path:
cmd.extend(["-o", output_path])
try:
# print(f"Running robot: {' '.join(cmd)}")
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
if result.stderr:
logging.warning(result.stderr)
Expand Down
82 changes: 75 additions & 7 deletions tests/test_compliance/test_boolean_slot_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,13 +368,10 @@ def test_slot_none_of(framework, data_name, value, is_valid):
],
)
@pytest.mark.parametrize("framework", CORE_FRAMEWORKS)
@pytest.mark.skip("requires metamodel changes")
def test_cardinality_in_exactly_one_of(framework, data_name, instance, is_valid):
"""
Tests intersection of cardinality and exactly_one_of.

TODO: unskip this test when metamodel allows attributes or slot usage on anon expressions.

:param framework:
:param data_name:
:param instance:
Expand All @@ -383,16 +380,20 @@ def test_cardinality_in_exactly_one_of(framework, data_name, instance, is_valid)
"""
classes = {
CLASS_C: {
"attributes": {
SLOT_S1: {},
SLOT_S2: {},
},
"exactly_one_of": [
{
"attributes": {
"slot_conditions": {
SLOT_S1: {
"required": True,
},
},
},
{
"attributes": {
"slot_conditions": {
SLOT_S2: {
"required": True,
},
Expand All @@ -408,8 +409,9 @@ def test_cardinality_in_exactly_one_of(framework, data_name, instance, is_valid)
classes=classes,
core_elements=["exactly_one_of", "minimum_value", "maximum_value"],
)
expected_behavior = ValidationBehavior.IMPLEMENTS
if framework != JSON_SCHEMA:
expected_behavior = ValidationBehavior.INCOMPLETE
if framework == JSON_SCHEMA:
# TODO: this should be possible in json schema
expected_behavior = ValidationBehavior.INCOMPLETE
check_data(
schema,
Expand Down Expand Up @@ -1811,3 +1813,69 @@ def test_value_presence_in_rules(framework, multivalued, data_name, instance, is
expected_behavior=expected_behavior,
description=f"validity {is_valid} check for value {instance}",
)


@pytest.mark.parametrize(
"name,quantification,expression,instance,is_valid",
[
("all_members_min_10", "all_members", {"range": "integer", "minimum_value": 10}, [10, 11, 12], True),
("all_members_min_10", "all_members", {"range": "integer", "minimum_value": 10}, [9, 10], False),
("all_members_min_10", "all_members", {"range": "integer", "minimum_value": 10}, [9], False),
("all_members_min_10", "all_members", {"range": "integer", "minimum_value": 10}, [10], True),
("all_members_min_10", "all_members", {"range": "integer", "minimum_value": 10}, [], True),
("has_member_min_10", "has_member", {"range": "integer", "minimum_value": 10}, [10, 11, 12], True),
("has_member_min_10", "has_member", {"range": "integer", "minimum_value": 10}, [9, 10], True),
("has_member_min_10", "has_member", {"range": "integer", "minimum_value": 10}, [8, 9], False),
("has_member_min_10", "has_member", {"range": "integer", "minimum_value": 10}, [9], False),
("has_member_min_10", "has_member", {"range": "integer", "minimum_value": 10}, [10], True),
("has_member_min_10", "has_member", {"range": "integer", "minimum_value": 10}, [], False),
],
)
@pytest.mark.parametrize("framework", CORE_FRAMEWORKS)
def test_membership(framework, name, quantification, expression, instance, is_valid):
"""
Tests behavior of membership.

:param framework:
:param name:
:param quantification:
:param expression:
:param instance:
:param is_valid:
:return:
"""
classes = {
CLASS_C: {
"attributes": {
SLOT_S1: {
"range": "integer",
"multivalued": True,
quantification: expression,
},
},
},
}
schema = validated_schema(
test_membership,
name,
framework,
classes=classes,
core_elements=[quantification],
)
expected_behavior = ValidationBehavior.IMPLEMENTS
if framework not in [JSON_SCHEMA, OWL]:
if not is_valid:
expected_behavior = ValidationBehavior.INCOMPLETE
if framework == OWL and quantification == "has_member" and not is_valid:
# OWL is open world, existential checks succeed without closure axioms
expected_behavior = ValidationBehavior.INCOMPLETE
check_data(
schema,
"_".join([str(x) for x in instance]),
framework,
{SLOT_S1: instance},
is_valid,
target_class=CLASS_C,
expected_behavior=expected_behavior,
description=f"validity {is_valid} check for value {instance}",
)
4 changes: 2 additions & 2 deletions tests/test_compliance/test_core_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def test_non_standard_names(framework, class_name, safe_class_name, slot_name, s
},
}
name = ensafeify(f"ClassNameEQ_{class_name}__SlotNameEQ_{slot_name}__TypeNameEQ_{type_name}")
schema = validated_schema(test_cardinality, name, framework, classes=classes, types=types, core_elements=[])
schema = validated_schema(test_cardinality, name, framework, classes=classes, types=types, core_elements=["name"])
expected_behavior = ValidationBehavior.IMPLEMENTS
instance = {
safe_slot_name: "x",
Expand Down Expand Up @@ -777,7 +777,7 @@ def test_non_standard_num_names(framework, enum_name, pv_name):
framework,
classes=classes,
enums=enums,
core_elements=[],
core_elements=["name"],
)
expected_behavior = ValidationBehavior.IMPLEMENTS
instance = {
Expand Down
Loading

0 comments on commit 729ae82

Please sign in to comment.