Merge pull request #523 from seperman/dev

8.3.0 Better summary for repr
seperman · Mar 6, 2025 · 65714eb · 65714eb
2 parents ed25202 + 75c0cd9
commit 65714eb
Show file tree

Hide file tree

Showing 26 changed files with 15,288 additions and 156 deletions.
diff --git a/.gitignore b/.gitignore
@@ -69,3 +69,5 @@ temp*
 
 # env file
 .env
+
+pyrightconfig.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # DeepDiff Change log
 
+- v8-3-0
+    - Fixed some static typing issues
+    - Added the summarize module for better repr of nested values
+
 - v8-2-0
     - Small optimizations so we don't load functions that are not needed
     - Updated the minimum version of Orderly-set 

diff --git a/README.md b/README.md
@@ -23,6 +23,11 @@ Tested on Python 3.8+ and PyPy3.
 
 Please check the [ChangeLog](CHANGELOG.md) file for the detailed information.
 
+DeepDiff 8-3-0
+
+- Fixed some static typing issues
+- Added the summarize module for better repr of nested values
+
 DeepDiff 8-2-0
 
 - Small optimizations so we don't load functions that are not needed

diff --git a/conftest.py b/conftest.py
@@ -46,6 +46,12 @@ def nested_a_result():
         return json.load(the_file)
 
 
+@pytest.fixture(scope='function')
+def compounds():
+    with open(os.path.join(FIXTURES_DIR, 'compounds.json')) as the_file:
+        return json.load(the_file)
+
+
 @pytest.fixture(scope='class')
 def nested_a_affected_paths():
     return {

diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py
@@ -7,8 +7,8 @@
     logging.basicConfig(format='%(asctime)s %(levelname)8s %(message)s')
 
 
-from .diff import DeepDiff
-from .search import DeepSearch, grep
-from .deephash import DeepHash
-from .delta import Delta
-from .path import extract, parse_path
+from .diff import DeepDiff as DeepDiff
+from .search import DeepSearch as DeepSearch, grep as grep
+from .deephash import DeepHash as DeepHash
+from .delta import Delta as Delta
+from .path import extract as extract, parse_path as parse_path
diff --git a/deepdiff/commands.py b/deepdiff/commands.py
@@ -112,10 +112,7 @@ def diff(
         sys.stdout.buffer.write(delta.dumps())
     else:
         try:
-            if orjson:
-                print(diff.to_json(option=orjson.OPT_INDENT_2))
-            else:
-                print(diff.to_json(indent=2))
+            print(diff.to_json(indent=2))
         except Exception:
             pprint(diff, indent=2)
 

diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py
@@ -11,8 +11,10 @@
                              convert_item_or_items_into_set_else_none, get_doc,
                              convert_item_or_items_into_compiled_regexes_else_none,
                              get_id, type_is_subclass_of_type_group, type_in_type_group,
-                             number_to_string, datetime_normalize, KEY_TO_VAL_STR, short_repr,
+                             number_to_string, datetime_normalize, KEY_TO_VAL_STR,
                              get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel)
+
+from deepdiff.summarize import summarize
 from deepdiff.base import Base
 
 try:
@@ -105,8 +107,8 @@ def prepare_string_for_hashing(
                 break
             except UnicodeDecodeError as er:
                 err = er
-        if not encoded:
-            obj_decoded = obj.decode('utf-8', errors='ignore')
+        if not encoded and err is not None:
+            obj_decoded = obj.decode('utf-8', errors='ignore')  # type: ignore
             start = max(err.start - 20, 0)
             start_prefix = ''
             if start > 0:
@@ -315,9 +317,10 @@ def __repr__(self):
         """
         Hide the counts since it will be confusing to see them when they are hidden everywhere else.
         """
-        return short_repr(self._get_objects_to_hashes_dict(extract_index=0), max_length=500)
+        return summarize(self._get_objects_to_hashes_dict(extract_index=0), max_length=500)
 
-    __str__ = __repr__
+    def __str__(self):
+        return str(self._get_objects_to_hashes_dict(extract_index=0))
 
     def __bool__(self):
         return bool(self.hashes)
@@ -376,7 +379,7 @@ def _skip_this(self, obj, parent):
                         skip = False
                         break
         elif self.exclude_regex_paths and any(
-                [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
+                [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):  # type: ignore
             skip = True
         elif self.exclude_types_tuple and isinstance(obj, self.exclude_types_tuple):
             skip = True
@@ -537,7 +540,7 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
         elif isinstance(obj, datetime.date):
             result = self._prep_date(obj)
 
-        elif isinstance(obj, numbers):
+        elif isinstance(obj, numbers):  # type: ignore
             result = self._prep_number(obj)
 
         elif isinstance(obj, MutableMapping):
@@ -546,17 +549,17 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
         elif isinstance(obj, tuple):
             result, counts = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)
 
-        elif (pandas and isinstance(obj, pandas.DataFrame)):
-            def gen():
-                yield ('dtype', obj.dtypes)
-                yield ('index', obj.index)
-                yield from obj.items()  # which contains (column name, series tuples)
+        elif (pandas and isinstance(obj, pandas.DataFrame)):  # type: ignore
+            def gen():  # type: ignore
+                yield ('dtype', obj.dtypes)  # type: ignore
+                yield ('index', obj.index)  # type: ignore
+                yield from obj.items()  # type: ignore  # which contains (column name, series tuples)
             result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
-        elif (polars and isinstance(obj, polars.DataFrame)):
+        elif (polars and isinstance(obj, polars.DataFrame)):  # type: ignore
             def gen():
-                yield from obj.columns
-                yield from list(obj.schema.items())
-                yield from obj.rows()
+                yield from obj.columns  # type: ignore
+                yield from list(obj.schema.items())  # type: ignore
+                yield from obj.rows()  # type: ignore
             result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
 
         elif isinstance(obj, Iterable):

diff --git a/deepdiff/delta.py b/deepdiff/delta.py
@@ -7,7 +7,7 @@
 from deepdiff import DeepDiff
 from deepdiff.serialization import pickle_load, pickle_dump
 from deepdiff.helper import (
-    strings, short_repr, numbers,
+    strings, numbers,
     np_ndarray, np_array_factory, numpy_dtypes, get_doc,
     not_found, numpy_dtype_string_to_type, dict_,
     Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
@@ -20,7 +20,7 @@
     GET, GETATTR, parse_path, stringify_path,
 )
 from deepdiff.anyset import AnySet
-
+from deepdiff.summarize import summarize
 
 logger = logging.getLogger(__name__)
 
@@ -165,13 +165,13 @@ def _deserializer(obj, safe_to_import=None):
         self.reset()
 
     def __repr__(self):
-        return "<Delta: {}>".format(short_repr(self.diff, max_length=100))
+        return "<Delta: {}>".format(summarize(self.diff, max_length=100))
 
     def reset(self):
         self.post_process_paths_to_convert = dict_()
 
     def __add__(self, other):
-        if isinstance(other, numbers) and self._numpy_paths:
+        if isinstance(other, numbers) and self._numpy_paths:  # type: ignore
             raise DeltaNumpyOperatorOverrideError(DELTA_NUMPY_OPERATOR_OVERRIDE_MSG)
         if self.mutate:
             self.root = other
@@ -240,7 +240,7 @@ def _get_elem_and_compare_to_old_value(
             if action == GET:
                 current_old_value = obj[elem]
             elif action == GETATTR:
-                current_old_value = getattr(obj, elem)
+                current_old_value = getattr(obj, elem)  # type: ignore
             else:
                 raise DeltaError(INVALID_ACTION_WHEN_CALLING_GET_ELEM.format(action))
         except (KeyError, IndexError, AttributeError, TypeError) as e:
@@ -261,7 +261,7 @@ def _get_elem_and_compare_to_old_value(
                     else:
                         obj[elem] = _forced_old_value
                 elif action == GETATTR:
-                    setattr(obj, elem, _forced_old_value)
+                    setattr(obj, elem, _forced_old_value)  # type: ignore
                 return _forced_old_value
             current_old_value = not_found
             if isinstance(path_for_err_reporting, (list, tuple)):
@@ -289,7 +289,7 @@ def _simple_set_elem_value(self, obj, path_for_err_reporting, elem=None, value=N
                     else:
                         self._raise_or_log(ELEM_NOT_FOUND_TO_ADD_MSG.format(elem, path_for_err_reporting))
             elif action == GETATTR:
-                setattr(obj, elem, value)
+                setattr(obj, elem, value)  # type: ignore
             else:
                 raise DeltaError(INVALID_ACTION_WHEN_CALLING_SIMPLE_SET_ELEM.format(action))
         except (KeyError, IndexError, AttributeError, TypeError) as e:
@@ -457,8 +457,8 @@ def _do_item_added(self, items, sort=True, insert=False):
                 continue  # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
 
             # Insert is only true for iterables, make sure it is a valid index.
-            if(insert and elem < len(obj)):
-                obj.insert(elem, None)
+            if(insert and elem < len(obj)):  # type: ignore
+                obj.insert(elem, None)  # type: ignore
 
             self._set_new_value(parent, parent_to_obj_elem, parent_to_obj_action,
                                 obj, elements, path, elem, action, new_value)
@@ -482,7 +482,7 @@ def _do_post_process(self):
     def _do_pre_process(self):
         if self._numpy_paths and ('iterable_item_added' in self.diff or 'iterable_item_removed' in self.diff):
             preprocess_paths = dict_()
-            for path, type_ in self._numpy_paths.items():
+            for path, type_ in self._numpy_paths.items():  # type: ignore
                 preprocess_paths[path] = {'old_type': np_ndarray, 'new_type': list}
                 try:
                     type_ = numpy_dtype_string_to_type(type_)
@@ -507,7 +507,7 @@ def _get_elements_and_details(self, path):
                 parent_to_obj_elem, parent_to_obj_action = elements[-2]
                 obj = self._get_elem_and_compare_to_old_value(
                     obj=parent, path_for_err_reporting=path, expected_old_value=None,
-                    elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element)
+                    elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element)  # type: ignore
             else:
                 # parent = self
                 # obj = self.root
@@ -516,7 +516,7 @@ def _get_elements_and_details(self, path):
                 parent = parent_to_obj_elem = parent_to_obj_action = None
                 obj = self
                 # obj = self.get_nested_obj(obj=self, elements=elements[:-1])
-            elem, action = elements[-1]
+            elem, action = elements[-1]  # type: ignore
         except Exception as e:
             self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path, e))
             return None
@@ -550,7 +550,7 @@ def _do_values_or_type_changed(self, changes, is_type_change=False, verify_chang
                     else:
                         new_value = new_type(current_old_value)
                 except Exception as e:
-                    self._raise_or_log(TYPE_CHANGE_FAIL_MSG.format(obj[elem], value.get('new_type', 'unknown'), e))
+                    self._raise_or_log(TYPE_CHANGE_FAIL_MSG.format(obj[elem], value.get('new_type', 'unknown'), e))  # type: ignore
                     continue
             else:
                 new_value = value['new_value']
@@ -582,7 +582,7 @@ def _do_item_removed(self, items):
             current_old_value = not_found
             try:
                 if action == GET:
-                    current_old_value = obj[elem]
+                    current_old_value = obj[elem]  # type: ignore
                 elif action == GETATTR:
                     current_old_value = getattr(obj, elem)
                 look_for_expected_old_value = current_old_value != expected_old_value
@@ -644,15 +644,15 @@ def _do_iterable_opcodes(self):
                         transformed.extend(opcode.new_values)
                     elif opcode.tag == 'equal':
                         # Items are the same in both lists, so we add them to the result
-                        transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index])
+                        transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index])  # type: ignore
                 if is_obj_tuple:
-                    obj = tuple(obj)
+                    obj = tuple(obj)  # type: ignore
                     # Making sure that the object is re-instated inside the parent especially if it was immutable
                     # and we had to turn it into a mutable one. In such cases the object has a new id.
                     self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
                                                 value=obj, action=parent_to_obj_action)
                 else:
-                    obj[:] = transformed
+                    obj[:] = transformed  # type: ignore
 
 
 
@@ -745,7 +745,7 @@ def _do_ignore_order(self):
         fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
         remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
         paths = SetOrdered(fixed_indexes.keys()) | SetOrdered(remove_indexes.keys())
-        for path in paths:
+        for path in paths:  # type: ignore
             # In the case of ignore_order reports, we are pointing to the container object.
             # Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.
             elem_and_details = self._get_elements_and_details("{}[0]".format(path))
@@ -1021,7 +1021,7 @@ def _from_flat_dicts(flat_dict_list):
                     result['_iterable_opcodes'][path_str] = []
                 result['_iterable_opcodes'][path_str].append(
                     Opcode(
-                        tag=FLAT_DATA_ACTION_TO_OPCODE_TAG[action],
+                        tag=FLAT_DATA_ACTION_TO_OPCODE_TAG[action],  # type: ignore
                         t1_from_index=flat_dict.get('t1_from_index'),
                         t1_to_index=flat_dict.get('t1_to_index'),
                         t2_from_index=flat_dict.get('t2_from_index'),
@@ -1091,7 +1091,7 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True)
         """
         return [
             i._asdict() for i in self.to_flat_rows(include_action_in_path=False, report_type_changes=True)
-        ]
+        ]  # type: ignore
 
     def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -> List[FlatDeltaRow]:
         """
@@ -1141,13 +1141,13 @@ def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -
                     for index, value in index_to_value.items():
                         path2 = path.copy()
                         if include_action_in_path:
-                            path2.append((index, 'GET'))
+                            path2.append((index, 'GET'))  # type: ignore
                         else:
                             path2.append(index)
                         if report_type_changes:
-                            row = FlatDeltaRow(path=path2, value=value, action=new_action, type=type(value))
+                            row = FlatDeltaRow(path=path2, value=value, action=new_action, type=type(value))  # type: ignore
                         else:
-                            row = FlatDeltaRow(path=path2, value=value, action=new_action)
+                            row = FlatDeltaRow(path=path2, value=value, action=new_action)  # type: ignore
                         result.append(row)
             elif action in {'set_item_added', 'set_item_removed'}:
                 for path, values in info.items():
@@ -1167,15 +1167,15 @@ def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -
                         value = value[new_key]
                     elif isinstance(value, (list, tuple)) and len(value) == 1:
                         value = value[0]
-                        path.append(0)
+                        path.append(0)  # type: ignore
                         action = 'iterable_item_added'
                     elif isinstance(value, set) and len(value) == 1:
                         value = value.pop()
                         action = 'set_item_added'
                     if report_type_changes:
-                        row = FlatDeltaRow(path=path, value=value, action=action, type=type(value))
+                        row = FlatDeltaRow(path=path, value=value, action=action, type=type(value))  # type: ignore
                     else:
-                        row = FlatDeltaRow(path=path, value=value, action=action)
+                        row = FlatDeltaRow(path=path, value=value, action=action)  # type: ignore
                     result.append(row)
             elif action in {
                 'dictionary_item_removed', 'iterable_item_added',

diff --git a/deepdiff/diff.py b/deepdiff/diff.py
@@ -18,7 +18,7 @@
 from inspect import getmembers
 from itertools import zip_longest
 from functools import lru_cache
-from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent,
+from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent,
                              IndexedHash, unprocessed, add_to_frozen_set, basic_types,
                              convert_item_or_items_into_set_else_none, get_type,
                              convert_item_or_items_into_compiled_regexes_else_none,
@@ -131,7 +131,7 @@ def __init__(self,
                  encodings: Optional[List[str]]=None,
                  exclude_obj_callback: Optional[Callable]=None,
                  exclude_obj_callback_strict: Optional[Callable]=None,
-                 exclude_paths: Union[str, List[str]]=None,
+                 exclude_paths: Union[str, List[str], None]=None,
                  exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None,
                  exclude_types: Optional[List[Any]]=None,
                  get_deep_distance: bool=False,
@@ -151,7 +151,7 @@ def __init__(self,
                  ignore_type_subclasses: bool=False,
                  include_obj_callback: Optional[Callable]=None,
                  include_obj_callback_strict: Optional[Callable]=None,
-                 include_paths: Union[str, List[str]]=None,
+                 include_paths: Union[str, List[str], None]=None,
                  iterable_compare_func: Optional[Callable]=None,
                  log_frequency_in_sec: int=0,
                  math_epsilon: Optional[float]=None,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -69,3 +69,5 @@ temp*

		# env file
		.env

		pyrightconfig.json