Merge branch 'best-match'

Julian · Julian · commit bab6e745c2ca · 2013-10-28T08:12:03.000-04:00
* best-match:
  And docs for the arguments.
  And add by_relevance docs.
  Remove __eq__, since it causes hashability issues on Py3 that I don't want to deal with at the moment.
  Update best_match docs.
  Different strategy that's a lot more robust.
  Use ._contents in create_from
  Initial stab at best_match.
  Sort errors based on their paths.
diff --git a/docs/errors.rst b/docs/errors.rst
@@ -2,7 +2,7 @@
 Handling Validation Errors
 ==========================
 
-.. currentmodule:: jsonschema
+.. currentmodule:: jsonschema.exceptions
 
 When an invalid instance is encountered, a :exc:`ValidationError` will be
 raised or returned, depending on which method or function is used.
@@ -194,7 +194,7 @@ If you want to programmatically be able to query which properties or validators
 failed when validating a given instance, you probably will want to do so using
 :class:`ErrorTree` objects.
 
-.. autoclass:: ErrorTree
+.. autoclass:: jsonschema.validators.ErrorTree
     :members:
     :special-members:
     :exclude-members: __dict__,__weakref__
@@ -301,3 +301,87 @@ To summarize, each tree contains child trees that can be accessed by indexing
 the tree to get the corresponding child tree for a given index into the
 instance. Each tree and child has a :attr:`~ErrorTree.errors` attribute, a
 dict, that maps the failed validator to the corresponding validation error.
+
+
+best_match and by_relevance
+---------------------------
+
+The :func:`best_match` function is a simple but useful function for attempting
+to guess the most relevant error in a given bunch.
+
+.. autofunction:: best_match
+
+    Try to find an error that appears to be the best match among given errors.
+
+    In general, errors that are higher up in the instance (i.e. for which
+    :attr:`ValidationError.path` is shorter) are considered better matches,
+    since they indicate "more" is wrong with the instance.
+
+.. doctest::
+
+        >>> from jsonschema import Draft4Validator
+        >>> from jsonschema.exceptions import best_match
+
+        >>> schema = {
+        ...     "type": "array",
+        ...     "minItems": 3,
+        ... }
+        >>> print(best_match(Draft4Validator(schema).iter_errors(11)).message)
+        11 is not of type 'array'
+
+    If the resulting match is either :validator:`oneOf` or :validator:`anyOf`,
+    the *opposite* assumption is made -- i.e. the deepest error is picked,
+    since these validators only need to match once, and any other errors may
+    not be relevant.
+
+    :argument iterable errors: the errors to select from. Do not provide a
+        mixture of errors from different validation attempts (i.e. from
+        different instances or schemas), since it won't produce sensical
+        output.
+    :argument callable key: the key to use when sorting errors. See
+        :func:`by_relevance` for more details (the default is to sort with the
+        defaults of that function).
+    :returns: the best matching error, or ``None`` if the iterable was empty
+
+    .. note::
+
+        This function is a heuristic. Its return value may change for a given
+        set of inputs from version to version if better heuristics are added.
+
+
+.. autofunction:: by_relevance
+
+    Create a key function that can be used to sort errors by relevance.
+
+    If you want to sort a bunch of errors entirely, you can use this function
+    to do so. Using the return value of this function as a key to e.g.
+    :func:`sorted` or :func:`max` will cause more relevant errors to be
+    considered greater than less relevant ones.
+
+.. doctest::
+
+    >>> schema = {
+    ...     "properties": {
+    ...         "name": {"type": "string"},
+    ...         "phones": {
+    ...             "properties": {
+    ...                 "home": {"type": "string"}
+    ...             },
+    ...         },
+    ...     },
+    ... }
+    >>> instance = {"name": 123, "phones": {"home": [123]}}
+    >>> errors = Draft4Validator(schema).iter_errors(instance)
+    >>> [
+    ...     e.path[-1]
+    ...     for e in sorted(errors, key=exceptions.by_relevance())
+    ... ]
+    ['home', 'name']
+
+    :argument set weak: a collection of validators to consider to be "weak". If
+        there are two errors at the same level of the instance and one is in
+        the set of weak validators, the other error will take priority. By
+        default, :validator:`anyOf` and :validator:`oneOf` are considered weak
+        validators and will be superceded by other same-level validation
+        errors.
+    :argument set strong a collection of validators to consider to be "strong".
diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py
@@ -1,11 +1,15 @@
 import collections
+import itertools
 import pprint
 import textwrap
 
 from jsonschema import _utils
 from jsonschema.compat import PY3, iteritems
 
 
+WEAK_MATCHES = frozenset(["anyOf", "oneOf"])
+STRONG_MATCHES = frozenset()
+
 _unset = _utils.Unset()
 
 
@@ -24,25 +28,6 @@ def __init__(
         self.instance = instance
         self.schema = schema
 
-    @classmethod
-    def create_from(cls, other):
-        return cls(
-            message=other.message,
-            cause=other.cause,
-            context=other.context,
-            path=other.path,
-            schema_path=other.schema_path,
-            validator=other.validator,
-            validator_value=other.validator_value,
-            instance=other.instance,
-            schema=other.schema,
-        )
-
-    def _set(self, **kwargs):
-        for k, v in iteritems(kwargs):
-            if getattr(self, k) is _unset:
-                setattr(self, k, v)
-
     def __repr__(self):
         return "<%s: %r>" % (self.__class__.__name__, self.message)
 
@@ -79,6 +64,23 @@ def __unicode__(self):
     if PY3:
         __str__ = __unicode__
 
+    @classmethod
+    def create_from(cls, other):
+        return cls(**other._contents())
+
+    def _set(self, **kwargs):
+        for k, v in iteritems(kwargs):
+            if getattr(self, k) is _unset:
+                setattr(self, k, v)
+
+    def _contents(self):
+        return dict(
+            (attr, getattr(self, attr)) for attr in (
+                "message", "cause", "context", "path", "schema_path",
+                "validator", "validator_value", "instance", "schema"
+            )
+        )
+
 
 class ValidationError(_Error):
     pass
@@ -132,3 +134,22 @@ def __unicode__(self):
 
     if PY3:
         __str__ = __unicode__
+
+
+def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES):
+    def relevance(error):
+        validator = error.validator
+        return -len(error.path), validator not in weak, validator in strong
+    return relevance
+
+
+def best_match(errors, key=by_relevance()):
+    errors = iter(errors)
+    best = next(errors, None)
+    if best is None:
+        return
+    best = max(itertools.chain([best], errors), key=key)
+
+    while best.context:
+        best = min(best.context, key=key)
+    return best
diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py