Skip to content

Commit

Permalink
Pick the deepest error among the most relevant ones in each separate …
Browse files Browse the repository at this point in the history
…subschema

Improves `best_match` in the presence of `anyOf` / `oneOf`. Calculate the most relevant error in each separate subschema and choose the deepest one.

In particular for `anyOf` / `oneOf` keywords with the only subschema, the best error is resolved as if the subschema was not enclosed by these keywords.

Revert main code changes in commit b20234e preserving the tests.
  • Loading branch information
ilia1243 committed Sep 15, 2024
1 parent 18cd162 commit 5a9f2ac
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 9 deletions.
16 changes: 13 additions & 3 deletions jsonschema/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ def relevance(error):
validator = error.validator
return ( # prefer errors which are ...
-len(error.path), # 'deeper' and thereby more specific
error.path, # earlier (for sibling errors)
validator not in weak, # for a non-low-priority keyword
validator in strong, # for a high priority keyword
not error._matches_type(), # at least match the instance's type
Expand Down Expand Up @@ -427,7 +426,8 @@ def best_match(errors, key=relevance):
since they indicate "more" is wrong with the instance.
If the resulting match is either :kw:`oneOf` or :kw:`anyOf`, the
*opposite* assumption is made -- i.e. the deepest error is picked,
*opposite* assumption is made -- i.e. the deepest error is picked
among the most relevant errors in each separate subschema,
since these keywords only need to match once, and any other errors
may not be relevant.
Expand Down Expand Up @@ -464,9 +464,19 @@ def best_match(errors, key=relevance):
best = max(itertools.chain([best], errors), key=key)

while best.context:
# Calculate the most relevant error in each separate subschema
best_in_subschemas = []
for error in best.context:
index = error.schema_path[0]
if index == len(best_in_subschemas):
best_in_subschemas.append(error)
else:
prev = best_in_subschemas[index]
best_in_subschemas[index] = max(prev, error, key=key)

# Calculate the minimum via nsmallest, because we don't recurse if
# all nested errors have the same relevance (i.e. if min == max == all)
smallest = heapq.nsmallest(2, best.context, key=key)
smallest = heapq.nsmallest(2, best_in_subschemas, key=key)
if len(smallest) == 2 and key(smallest[0]) == key(smallest[1]): # noqa: PLR2004
return best
best = smallest[0]
Expand Down
2 changes: 1 addition & 1 deletion jsonschema/tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_RefResolver(self):
self.assertEqual(w.filename, __file__)

with self.assertWarnsRegex(DeprecationWarning, message) as w:
from jsonschema.validators import RefResolver # noqa: F401, F811
from jsonschema.validators import RefResolver # noqa: F401
self.assertEqual(w.filename, __file__)

def test_RefResolutionError(self):
Expand Down
198 changes: 194 additions & 4 deletions jsonschema/tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,53 @@ def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self):
best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
self.assertEqual(best.validator_value, "array")

def test_anyOf_traversal_for_single_shallower_errors_better_match(self):
"""
Traverse the context of an anyOf error with the only subschema,
and select the most relevant error.
"""

schema = {
"anyOf": [
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_anyOf_traversal_least_relevant_among_most_relevant_errors(self):
"""
Traverse the context of an anyOf error, and select
the *least* relevant error among the most relevant errors
in each separate subschema.
I.e. since only one of the schemas must match, we look for the most
specific one, and choose the most relevant error produced by it.
"""

schema = {
"anyOf": [
{"type": "string"},
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_no_anyOf_traversal_for_equally_relevant_errors(self):
"""
We don't traverse into an anyOf (as above) if all of its context errors
Expand All @@ -86,6 +133,53 @@ def test_no_anyOf_traversal_for_equally_relevant_errors(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_no_anyOf_traversal_for_two_properties_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"anyOf": [
{"properties": {"foo": {"type": "string"}}},
{"properties": {"bar": {"type": "string"}}},
],
}
best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_no_anyOf_traversal_for_two_items_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"anyOf": [
{
"type": "array",
"items": {
"$ref": "#/$defs/int_array",
},
},
{
"$ref": "#/$defs/int_array",
},
],
"$defs": {
"int_array": {
"type": "array",
"items": {
"type": "integer",
},
},
},
}
best = self.best_match_of(instance=["not an int", 0], schema=schema)
self.assertEqual(best.validator, "anyOf")
best = self.best_match_of(instance=[0, "not an int"], schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_anyOf_traversal_for_single_equally_relevant_error(self):
"""
We *do* traverse anyOf with a single nested error, even though it is
Expand All @@ -100,7 +194,7 @@ def test_anyOf_traversal_for_single_equally_relevant_error(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "type")

def test_anyOf_traversal_for_single_sibling_errors(self):
def test_anyOf_traversal_for_single_sibling_errors_choose_first(self):
"""
We *do* traverse anyOf with a single subschema that fails multiple
times (e.g. on multiple items).
Expand All @@ -111,8 +205,9 @@ def test_anyOf_traversal_for_single_sibling_errors(self):
{"items": {"const": 37}},
],
}
best = self.best_match_of(instance=[12, 12], schema=schema)
best = self.best_match_of(instance=[12, 13], schema=schema)
self.assertEqual(best.validator, "const")
self.assertEqual(best.instance, 12)

def test_anyOf_traversal_for_non_type_matching_sibling_errors(self):
"""
Expand Down Expand Up @@ -152,6 +247,53 @@ def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self):
best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
self.assertEqual(best.validator_value, "array")

def test_oneOf_traversal_for_single_shallower_errors_better_match(self):
"""
Traverse the context of an oneOf error with the only subschema,
and select the most relevant error.
"""

schema = {
"oneOf": [
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_oneOf_traversal_least_relevant_among_most_relevant_errors(self):
"""
Traverse the context of an oneOf error, and select
the *least* relevant error among the most relevant errors
in each separate subschema.
I.e. since only one of the schemas must match, we look for the most
specific one, and choose the most relevant error produced by it.
"""

schema = {
"oneOf": [
{"type": "string"},
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_no_oneOf_traversal_for_equally_relevant_errors(self):
"""
We don't traverse into an oneOf (as above) if all of its context errors
Expand All @@ -168,6 +310,53 @@ def test_no_oneOf_traversal_for_equally_relevant_errors(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_no_oneOf_traversal_for_two_properties_sibling_errors(self):
"""
We don't traverse into an oneOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"oneOf": [
{"properties": {"foo": {"type": "string"}}},
{"properties": {"bar": {"type": "string"}}},
],
}
best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_no_oneOf_traversal_for_two_items_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"oneOf": [
{
"type": "array",
"items": {
"$ref": "#/$defs/int_array",
},
},
{
"$ref": "#/$defs/int_array",
},
],
"$defs": {
"int_array": {
"type": "array",
"items": {
"type": "integer",
},
},
},
}
best = self.best_match_of(instance=["not an int", 0], schema=schema)
self.assertEqual(best.validator, "oneOf")
best = self.best_match_of(instance=[0, "not an int"], schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_oneOf_traversal_for_single_equally_relevant_error(self):
"""
We *do* traverse oneOf with a single nested error, even though it is
Expand All @@ -182,7 +371,7 @@ def test_oneOf_traversal_for_single_equally_relevant_error(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "type")

def test_oneOf_traversal_for_single_sibling_errors(self):
def test_oneOf_traversal_for_single_sibling_errors_choose_first(self):
"""
We *do* traverse oneOf with a single subschema that fails multiple
times (e.g. on multiple items).
Expand All @@ -193,8 +382,9 @@ def test_oneOf_traversal_for_single_sibling_errors(self):
{"items": {"const": 37}},
],
}
best = self.best_match_of(instance=[12, 12], schema=schema)
best = self.best_match_of(instance=[12, 13], schema=schema)
self.assertEqual(best.validator, "const")
self.assertEqual(best.instance, 12)

def test_oneOf_traversal_for_non_type_matching_sibling_errors(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion jsonschema/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ def is_valid(self, instance, _schema=None):
DeprecationWarning,
stacklevel=2,
)
self = self.evolve(schema=_schema)
self = self.evolve(schema=_schema) # noqa: PLW0642

error = next(self.iter_errors(instance), None)
return error is None
Expand Down

0 comments on commit 5a9f2ac

Please sign in to comment.