Skip to content

Commit

Permalink
Merge pull request #523 from seperman/dev
Browse files Browse the repository at this point in the history
8.3.0 Better summary for repr
  • Loading branch information
seperman authored Mar 6, 2025
2 parents ed25202 + 75c0cd9 commit 65714eb
Show file tree
Hide file tree
Showing 26 changed files with 15,288 additions and 156 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,5 @@ temp*

# env file
.env

pyrightconfig.json
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# DeepDiff Change log

- v8-3-0
- Fixed some static typing issues
- Added the summarize module for better repr of nested values

- v8-2-0
- Small optimizations so we don't load functions that are not needed
- Updated the minimum version of Orderly-set
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ Tested on Python 3.8+ and PyPy3.

Please check the [ChangeLog](CHANGELOG.md) file for the detailed information.

DeepDiff 8-3-0

- Fixed some static typing issues
- Added the summarize module for better repr of nested values

DeepDiff 8-2-0

- Small optimizations so we don't load functions that are not needed
Expand Down
6 changes: 6 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ def nested_a_result():
return json.load(the_file)


@pytest.fixture(scope='function')
def compounds():
with open(os.path.join(FIXTURES_DIR, 'compounds.json')) as the_file:
return json.load(the_file)


@pytest.fixture(scope='class')
def nested_a_affected_paths():
return {
Expand Down
10 changes: 5 additions & 5 deletions deepdiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
logging.basicConfig(format='%(asctime)s %(levelname)8s %(message)s')


from .diff import DeepDiff
from .search import DeepSearch, grep
from .deephash import DeepHash
from .delta import Delta
from .path import extract, parse_path
from .diff import DeepDiff as DeepDiff
from .search import DeepSearch as DeepSearch, grep as grep
from .deephash import DeepHash as DeepHash
from .delta import Delta as Delta
from .path import extract as extract, parse_path as parse_path
5 changes: 1 addition & 4 deletions deepdiff/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,7 @@ def diff(
sys.stdout.buffer.write(delta.dumps())
else:
try:
if orjson:
print(diff.to_json(option=orjson.OPT_INDENT_2))
else:
print(diff.to_json(indent=2))
print(diff.to_json(indent=2))
except Exception:
pprint(diff, indent=2)

Expand Down
35 changes: 19 additions & 16 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
convert_item_or_items_into_set_else_none, get_doc,
convert_item_or_items_into_compiled_regexes_else_none,
get_id, type_is_subclass_of_type_group, type_in_type_group,
number_to_string, datetime_normalize, KEY_TO_VAL_STR, short_repr,
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel)

from deepdiff.summarize import summarize
from deepdiff.base import Base

try:
Expand Down Expand Up @@ -105,8 +107,8 @@ def prepare_string_for_hashing(
break
except UnicodeDecodeError as er:
err = er
if not encoded:
obj_decoded = obj.decode('utf-8', errors='ignore')
if not encoded and err is not None:
obj_decoded = obj.decode('utf-8', errors='ignore') # type: ignore
start = max(err.start - 20, 0)
start_prefix = ''
if start > 0:
Expand Down Expand Up @@ -315,9 +317,10 @@ def __repr__(self):
"""
Hide the counts since it will be confusing to see them when they are hidden everywhere else.
"""
return short_repr(self._get_objects_to_hashes_dict(extract_index=0), max_length=500)
return summarize(self._get_objects_to_hashes_dict(extract_index=0), max_length=500)

__str__ = __repr__
def __str__(self):
return str(self._get_objects_to_hashes_dict(extract_index=0))

def __bool__(self):
return bool(self.hashes)
Expand Down Expand Up @@ -376,7 +379,7 @@ def _skip_this(self, obj, parent):
skip = False
break
elif self.exclude_regex_paths and any(
[exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
[exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): # type: ignore
skip = True
elif self.exclude_types_tuple and isinstance(obj, self.exclude_types_tuple):
skip = True
Expand Down Expand Up @@ -537,7 +540,7 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
elif isinstance(obj, datetime.date):
result = self._prep_date(obj)

elif isinstance(obj, numbers):
elif isinstance(obj, numbers): # type: ignore
result = self._prep_number(obj)

elif isinstance(obj, MutableMapping):
Expand All @@ -546,17 +549,17 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
elif isinstance(obj, tuple):
result, counts = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)

elif (pandas and isinstance(obj, pandas.DataFrame)):
def gen():
yield ('dtype', obj.dtypes)
yield ('index', obj.index)
yield from obj.items() # which contains (column name, series tuples)
elif (pandas and isinstance(obj, pandas.DataFrame)): # type: ignore
def gen(): # type: ignore
yield ('dtype', obj.dtypes) # type: ignore
yield ('index', obj.index) # type: ignore
yield from obj.items() # type: ignore # which contains (column name, series tuples)
result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
elif (polars and isinstance(obj, polars.DataFrame)):
elif (polars and isinstance(obj, polars.DataFrame)): # type: ignore
def gen():
yield from obj.columns
yield from list(obj.schema.items())
yield from obj.rows()
yield from obj.columns # type: ignore
yield from list(obj.schema.items()) # type: ignore
yield from obj.rows() # type: ignore
result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)

elif isinstance(obj, Iterable):
Expand Down
52 changes: 26 additions & 26 deletions deepdiff/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from deepdiff import DeepDiff
from deepdiff.serialization import pickle_load, pickle_dump
from deepdiff.helper import (
strings, short_repr, numbers,
strings, numbers,
np_ndarray, np_array_factory, numpy_dtypes, get_doc,
not_found, numpy_dtype_string_to_type, dict_,
Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
Expand All @@ -20,7 +20,7 @@
GET, GETATTR, parse_path, stringify_path,
)
from deepdiff.anyset import AnySet

from deepdiff.summarize import summarize

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -165,13 +165,13 @@ def _deserializer(obj, safe_to_import=None):
self.reset()

def __repr__(self):
return "<Delta: {}>".format(short_repr(self.diff, max_length=100))
return "<Delta: {}>".format(summarize(self.diff, max_length=100))

def reset(self):
self.post_process_paths_to_convert = dict_()

def __add__(self, other):
if isinstance(other, numbers) and self._numpy_paths:
if isinstance(other, numbers) and self._numpy_paths: # type: ignore
raise DeltaNumpyOperatorOverrideError(DELTA_NUMPY_OPERATOR_OVERRIDE_MSG)
if self.mutate:
self.root = other
Expand Down Expand Up @@ -240,7 +240,7 @@ def _get_elem_and_compare_to_old_value(
if action == GET:
current_old_value = obj[elem]
elif action == GETATTR:
current_old_value = getattr(obj, elem)
current_old_value = getattr(obj, elem) # type: ignore
else:
raise DeltaError(INVALID_ACTION_WHEN_CALLING_GET_ELEM.format(action))
except (KeyError, IndexError, AttributeError, TypeError) as e:
Expand All @@ -261,7 +261,7 @@ def _get_elem_and_compare_to_old_value(
else:
obj[elem] = _forced_old_value
elif action == GETATTR:
setattr(obj, elem, _forced_old_value)
setattr(obj, elem, _forced_old_value) # type: ignore
return _forced_old_value
current_old_value = not_found
if isinstance(path_for_err_reporting, (list, tuple)):
Expand Down Expand Up @@ -289,7 +289,7 @@ def _simple_set_elem_value(self, obj, path_for_err_reporting, elem=None, value=N
else:
self._raise_or_log(ELEM_NOT_FOUND_TO_ADD_MSG.format(elem, path_for_err_reporting))
elif action == GETATTR:
setattr(obj, elem, value)
setattr(obj, elem, value) # type: ignore
else:
raise DeltaError(INVALID_ACTION_WHEN_CALLING_SIMPLE_SET_ELEM.format(action))
except (KeyError, IndexError, AttributeError, TypeError) as e:
Expand Down Expand Up @@ -457,8 +457,8 @@ def _do_item_added(self, items, sort=True, insert=False):
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198

# Insert is only true for iterables, make sure it is a valid index.
if(insert and elem < len(obj)):
obj.insert(elem, None)
if(insert and elem < len(obj)): # type: ignore
obj.insert(elem, None) # type: ignore

self._set_new_value(parent, parent_to_obj_elem, parent_to_obj_action,
obj, elements, path, elem, action, new_value)
Expand All @@ -482,7 +482,7 @@ def _do_post_process(self):
def _do_pre_process(self):
if self._numpy_paths and ('iterable_item_added' in self.diff or 'iterable_item_removed' in self.diff):
preprocess_paths = dict_()
for path, type_ in self._numpy_paths.items():
for path, type_ in self._numpy_paths.items(): # type: ignore
preprocess_paths[path] = {'old_type': np_ndarray, 'new_type': list}
try:
type_ = numpy_dtype_string_to_type(type_)
Expand All @@ -507,7 +507,7 @@ def _get_elements_and_details(self, path):
parent_to_obj_elem, parent_to_obj_action = elements[-2]
obj = self._get_elem_and_compare_to_old_value(
obj=parent, path_for_err_reporting=path, expected_old_value=None,
elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element)
elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element) # type: ignore
else:
# parent = self
# obj = self.root
Expand All @@ -516,7 +516,7 @@ def _get_elements_and_details(self, path):
parent = parent_to_obj_elem = parent_to_obj_action = None
obj = self
# obj = self.get_nested_obj(obj=self, elements=elements[:-1])
elem, action = elements[-1]
elem, action = elements[-1] # type: ignore
except Exception as e:
self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path, e))
return None
Expand Down Expand Up @@ -550,7 +550,7 @@ def _do_values_or_type_changed(self, changes, is_type_change=False, verify_chang
else:
new_value = new_type(current_old_value)
except Exception as e:
self._raise_or_log(TYPE_CHANGE_FAIL_MSG.format(obj[elem], value.get('new_type', 'unknown'), e))
self._raise_or_log(TYPE_CHANGE_FAIL_MSG.format(obj[elem], value.get('new_type', 'unknown'), e)) # type: ignore
continue
else:
new_value = value['new_value']
Expand Down Expand Up @@ -582,7 +582,7 @@ def _do_item_removed(self, items):
current_old_value = not_found
try:
if action == GET:
current_old_value = obj[elem]
current_old_value = obj[elem] # type: ignore
elif action == GETATTR:
current_old_value = getattr(obj, elem)
look_for_expected_old_value = current_old_value != expected_old_value
Expand Down Expand Up @@ -644,15 +644,15 @@ def _do_iterable_opcodes(self):
transformed.extend(opcode.new_values)
elif opcode.tag == 'equal':
# Items are the same in both lists, so we add them to the result
transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index])
transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index]) # type: ignore
if is_obj_tuple:
obj = tuple(obj)
obj = tuple(obj) # type: ignore
# Making sure that the object is re-instated inside the parent especially if it was immutable
# and we had to turn it into a mutable one. In such cases the object has a new id.
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
value=obj, action=parent_to_obj_action)
else:
obj[:] = transformed
obj[:] = transformed # type: ignore



Expand Down Expand Up @@ -745,7 +745,7 @@ def _do_ignore_order(self):
fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
paths = SetOrdered(fixed_indexes.keys()) | SetOrdered(remove_indexes.keys())
for path in paths:
for path in paths: # type: ignore
# In the case of ignore_order reports, we are pointing to the container object.
# Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.
elem_and_details = self._get_elements_and_details("{}[0]".format(path))
Expand Down Expand Up @@ -1021,7 +1021,7 @@ def _from_flat_dicts(flat_dict_list):
result['_iterable_opcodes'][path_str] = []
result['_iterable_opcodes'][path_str].append(
Opcode(
tag=FLAT_DATA_ACTION_TO_OPCODE_TAG[action],
tag=FLAT_DATA_ACTION_TO_OPCODE_TAG[action], # type: ignore
t1_from_index=flat_dict.get('t1_from_index'),
t1_to_index=flat_dict.get('t1_to_index'),
t2_from_index=flat_dict.get('t2_from_index'),
Expand Down Expand Up @@ -1091,7 +1091,7 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True)
"""
return [
i._asdict() for i in self.to_flat_rows(include_action_in_path=False, report_type_changes=True)
]
] # type: ignore

def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -> List[FlatDeltaRow]:
"""
Expand Down Expand Up @@ -1141,13 +1141,13 @@ def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -
for index, value in index_to_value.items():
path2 = path.copy()
if include_action_in_path:
path2.append((index, 'GET'))
path2.append((index, 'GET')) # type: ignore
else:
path2.append(index)
if report_type_changes:
row = FlatDeltaRow(path=path2, value=value, action=new_action, type=type(value))
row = FlatDeltaRow(path=path2, value=value, action=new_action, type=type(value)) # type: ignore
else:
row = FlatDeltaRow(path=path2, value=value, action=new_action)
row = FlatDeltaRow(path=path2, value=value, action=new_action) # type: ignore
result.append(row)
elif action in {'set_item_added', 'set_item_removed'}:
for path, values in info.items():
Expand All @@ -1167,15 +1167,15 @@ def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -
value = value[new_key]
elif isinstance(value, (list, tuple)) and len(value) == 1:
value = value[0]
path.append(0)
path.append(0) # type: ignore
action = 'iterable_item_added'
elif isinstance(value, set) and len(value) == 1:
value = value.pop()
action = 'set_item_added'
if report_type_changes:
row = FlatDeltaRow(path=path, value=value, action=action, type=type(value))
row = FlatDeltaRow(path=path, value=value, action=action, type=type(value)) # type: ignore
else:
row = FlatDeltaRow(path=path, value=value, action=action)
row = FlatDeltaRow(path=path, value=value, action=action) # type: ignore
result.append(row)
elif action in {
'dictionary_item_removed', 'iterable_item_added',
Expand Down
6 changes: 3 additions & 3 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from inspect import getmembers
from itertools import zip_longest
from functools import lru_cache
from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent,
from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent,
IndexedHash, unprocessed, add_to_frozen_set, basic_types,
convert_item_or_items_into_set_else_none, get_type,
convert_item_or_items_into_compiled_regexes_else_none,
Expand Down Expand Up @@ -131,7 +131,7 @@ def __init__(self,
encodings: Optional[List[str]]=None,
exclude_obj_callback: Optional[Callable]=None,
exclude_obj_callback_strict: Optional[Callable]=None,
exclude_paths: Union[str, List[str]]=None,
exclude_paths: Union[str, List[str], None]=None,
exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None,
exclude_types: Optional[List[Any]]=None,
get_deep_distance: bool=False,
Expand All @@ -151,7 +151,7 @@ def __init__(self,
ignore_type_subclasses: bool=False,
include_obj_callback: Optional[Callable]=None,
include_obj_callback_strict: Optional[Callable]=None,
include_paths: Union[str, List[str]]=None,
include_paths: Union[str, List[str], None]=None,
iterable_compare_func: Optional[Callable]=None,
log_frequency_in_sec: int=0,
math_epsilon: Optional[float]=None,
Expand Down
Loading

0 comments on commit 65714eb

Please sign in to comment.