Pick the deepest error among the most relevant ones in each separate …

…subschema Improves `best_match` in the presence of `anyOf` / `oneOf`. Calculate the most relevant error in each separate subschema and choose the deepest one. In particular for `anyOf` / `oneOf` keywords with the only subschema, the best error is resolved as if the subschema was not enclosed by these keywords. Revert main code changes in commit b20234e preserving the tests.
python-jsonschema · Sep 15, 2024 · 5a9f2ac · 5a9f2ac
1 parent 18cd162
commit 5a9f2ac
Show file tree

Hide file tree

Showing 4 changed files with 209 additions and 9 deletions.
diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py
@@ -397,7 +397,6 @@ def relevance(error):
         validator = error.validator
         return (                        # prefer errors which are ...
             -len(error.path),           # 'deeper' and thereby more specific
-            error.path,                 # earlier (for sibling errors)
             validator not in weak,      # for a non-low-priority keyword
             validator in strong,        # for a high priority keyword
             not error._matches_type(),  # at least match the instance's type
@@ -427,7 +426,8 @@ def best_match(errors, key=relevance):
     since they indicate "more" is wrong with the instance.
 
     If the resulting match is either :kw:`oneOf` or :kw:`anyOf`, the
-    *opposite* assumption is made -- i.e. the deepest error is picked,
+    *opposite* assumption is made -- i.e. the deepest error is picked
+    among the most relevant errors in each separate subschema,
     since these keywords only need to match once, and any other errors
     may not be relevant.
 
@@ -464,9 +464,19 @@ def best_match(errors, key=relevance):
     best = max(itertools.chain([best], errors), key=key)
 
     while best.context:
+        # Calculate the most relevant error in each separate subschema
+        best_in_subschemas = []
+        for error in best.context:
+            index = error.schema_path[0]
+            if index == len(best_in_subschemas):
+                best_in_subschemas.append(error)
+            else:
+                prev = best_in_subschemas[index]
+                best_in_subschemas[index] = max(prev, error, key=key)
+
         # Calculate the minimum via nsmallest, because we don't recurse if
         # all nested errors have the same relevance (i.e. if min == max == all)
-        smallest = heapq.nsmallest(2, best.context, key=key)
+        smallest = heapq.nsmallest(2, best_in_subschemas, key=key)
         if len(smallest) == 2 and key(smallest[0]) == key(smallest[1]):  # noqa: PLR2004
             return best
         best = smallest[0]

diff --git a/jsonschema/tests/test_deprecations.py b/jsonschema/tests/test_deprecations.py
@@ -183,7 +183,7 @@ def test_RefResolver(self):
         self.assertEqual(w.filename, __file__)
 
         with self.assertWarnsRegex(DeprecationWarning, message) as w:
-            from jsonschema.validators import RefResolver  # noqa: F401, F811
+            from jsonschema.validators import RefResolver  # noqa: F401
         self.assertEqual(w.filename, __file__)
 
     def test_RefResolutionError(self):

diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py
@@ -70,6 +70,53 @@ def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self):
         best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
         self.assertEqual(best.validator_value, "array")
 
+    def test_anyOf_traversal_for_single_shallower_errors_better_match(self):
+        """
+        Traverse the context of an anyOf error with the only subschema,
+        and select the most relevant error.
+        """
+
+        schema = {
+            "anyOf": [
+                {
+                    "properties": {
+                        "foo": {
+                            "minProperties": 2,
+                            "properties": {"bar": {"type": "object"}},
+                        },
+                    },
+                },
+            ],
+        }
+        best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
+        self.assertEqual(best.validator, "minProperties")
+
+    def test_anyOf_traversal_least_relevant_among_most_relevant_errors(self):
+        """
+        Traverse the context of an anyOf error, and select
+        the *least* relevant error among the most relevant errors
+        in each separate subschema.
+
+        I.e. since only one of the schemas must match, we look for the most
+        specific one, and choose the most relevant error produced by it.
+        """
+
+        schema = {
+            "anyOf": [
+                {"type": "string"},
+                {
+                    "properties": {
+                        "foo": {
+                            "minProperties": 2,
+                            "properties": {"bar": {"type": "object"}},
+                        },
+                    },
+                },
+            ],
+        }
+        best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
+        self.assertEqual(best.validator, "minProperties")
+
     def test_no_anyOf_traversal_for_equally_relevant_errors(self):
         """
         We don't traverse into an anyOf (as above) if all of its context errors
@@ -86,6 +133,53 @@ def test_no_anyOf_traversal_for_equally_relevant_errors(self):
         best = self.best_match_of(instance=[], schema=schema)
         self.assertEqual(best.validator, "anyOf")
 
+    def test_no_anyOf_traversal_for_two_properties_sibling_errors(self):
+        """
+        We don't traverse into an anyOf if all of its context errors
+        seem to be equally "wrong" against the instance.
+        """
+
+        schema = {
+            "anyOf": [
+                {"properties": {"foo": {"type": "string"}}},
+                {"properties": {"bar": {"type": "string"}}},
+            ],
+        }
+        best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
+        self.assertEqual(best.validator, "anyOf")
+
+    def test_no_anyOf_traversal_for_two_items_sibling_errors(self):
+        """
+        We don't traverse into an anyOf if all of its context errors
+        seem to be equally "wrong" against the instance.
+        """
+
+        schema = {
+            "anyOf": [
+                {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/$defs/int_array",
+                    },
+                },
+                {
+                    "$ref": "#/$defs/int_array",
+                },
+            ],
+            "$defs": {
+                "int_array": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer",
+                    },
+                },
+            },
+        }
+        best = self.best_match_of(instance=["not an int", 0], schema=schema)
+        self.assertEqual(best.validator, "anyOf")
+        best = self.best_match_of(instance=[0, "not an int"], schema=schema)
+        self.assertEqual(best.validator, "anyOf")
+
     def test_anyOf_traversal_for_single_equally_relevant_error(self):
         """
         We *do* traverse anyOf with a single nested error, even though it is
@@ -100,7 +194,7 @@ def test_anyOf_traversal_for_single_equally_relevant_error(self):
         best = self.best_match_of(instance=[], schema=schema)
         self.assertEqual(best.validator, "type")
 
-    def test_anyOf_traversal_for_single_sibling_errors(self):
+    def test_anyOf_traversal_for_single_sibling_errors_choose_first(self):
         """
         We *do* traverse anyOf with a single subschema that fails multiple
         times (e.g. on multiple items).
@@ -111,8 +205,9 @@ def test_anyOf_traversal_for_single_sibling_errors(self):
                 {"items": {"const": 37}},
             ],
         }
-        best = self.best_match_of(instance=[12, 12], schema=schema)
+        best = self.best_match_of(instance=[12, 13], schema=schema)
         self.assertEqual(best.validator, "const")
+        self.assertEqual(best.instance, 12)
 
     def test_anyOf_traversal_for_non_type_matching_sibling_errors(self):
         """
@@ -152,6 +247,53 @@ def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self):
         best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
         self.assertEqual(best.validator_value, "array")
 
+    def test_oneOf_traversal_for_single_shallower_errors_better_match(self):
+        """
+        Traverse the context of an oneOf error with the only subschema,
+        and select the most relevant error.
+        """
+
+        schema = {
+            "oneOf": [
+                {
+                    "properties": {
+                        "foo": {
+                            "minProperties": 2,
+                            "properties": {"bar": {"type": "object"}},
+                        },
+                    },
+                },
+            ],
+        }
+        best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
+        self.assertEqual(best.validator, "minProperties")
+
+    def test_oneOf_traversal_least_relevant_among_most_relevant_errors(self):
+        """
+        Traverse the context of an oneOf error, and select
+        the *least* relevant error among the most relevant errors
+        in each separate subschema.
+
+        I.e. since only one of the schemas must match, we look for the most
+        specific one, and choose the most relevant error produced by it.
+        """
+
+        schema = {
+            "oneOf": [
+                {"type": "string"},
+                {
+                    "properties": {
+                        "foo": {
+                            "minProperties": 2,
+                            "properties": {"bar": {"type": "object"}},
+                        },
+                    },
+                },
+            ],
+        }
+        best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
+        self.assertEqual(best.validator, "minProperties")
+
     def test_no_oneOf_traversal_for_equally_relevant_errors(self):
         """
         We don't traverse into an oneOf (as above) if all of its context errors
@@ -168,6 +310,53 @@ def test_no_oneOf_traversal_for_equally_relevant_errors(self):
         best = self.best_match_of(instance=[], schema=schema)
         self.assertEqual(best.validator, "oneOf")
 
+    def test_no_oneOf_traversal_for_two_properties_sibling_errors(self):
+        """
+        We don't traverse into an oneOf if all of its context errors
+        seem to be equally "wrong" against the instance.
+        """
+
+        schema = {
+            "oneOf": [
+                {"properties": {"foo": {"type": "string"}}},
+                {"properties": {"bar": {"type": "string"}}},
+            ],
+        }
+        best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
+        self.assertEqual(best.validator, "oneOf")
+
+    def test_no_oneOf_traversal_for_two_items_sibling_errors(self):
+        """
+        We don't traverse into an anyOf if all of its context errors
+        seem to be equally "wrong" against the instance.
+        """
+
+        schema = {
+            "oneOf": [
+                {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/$defs/int_array",
+                    },
+                },
+                {
+                    "$ref": "#/$defs/int_array",
+                },
+            ],
+            "$defs": {
+                "int_array": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer",
+                    },
+                },
+            },
+        }
+        best = self.best_match_of(instance=["not an int", 0], schema=schema)
+        self.assertEqual(best.validator, "oneOf")
+        best = self.best_match_of(instance=[0, "not an int"], schema=schema)
+        self.assertEqual(best.validator, "oneOf")
+
     def test_oneOf_traversal_for_single_equally_relevant_error(self):
         """
         We *do* traverse oneOf with a single nested error, even though it is
@@ -182,7 +371,7 @@ def test_oneOf_traversal_for_single_equally_relevant_error(self):
         best = self.best_match_of(instance=[], schema=schema)
         self.assertEqual(best.validator, "type")
 
-    def test_oneOf_traversal_for_single_sibling_errors(self):
+    def test_oneOf_traversal_for_single_sibling_errors_choose_first(self):
         """
         We *do* traverse oneOf with a single subschema that fails multiple
         times (e.g. on multiple items).
@@ -193,8 +382,9 @@ def test_oneOf_traversal_for_single_sibling_errors(self):
                 {"items": {"const": 37}},
             ],
         }
-        best = self.best_match_of(instance=[12, 12], schema=schema)
+        best = self.best_match_of(instance=[12, 13], schema=schema)
         self.assertEqual(best.validator, "const")
+        self.assertEqual(best.instance, 12)
 
     def test_oneOf_traversal_for_non_type_matching_sibling_errors(self):
         """

diff --git a/jsonschema/validators.py b/jsonschema/validators.py
@@ -495,7 +495,7 @@ def is_valid(self, instance, _schema=None):
                     DeprecationWarning,
                     stacklevel=2,
                 )
-                self = self.evolve(schema=_schema)
+                self = self.evolve(schema=_schema)  # noqa: PLW0642
 
             error = next(self.iter_errors(instance), None)
             return error is None