Skip to content

Commit

Permalink
make iris.common.metadata._hexdigest public (SciTools#4020)
Browse files Browse the repository at this point in the history
  • Loading branch information
bjlittle authored Feb 22, 2021
1 parent 55be054 commit b8ae910
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 31 deletions.
7 changes: 7 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ This document explains the changes made to Iris for this release
``iris.plot.plot(z_cube)`` will produce a z-vs-phenomenon plot, where before
it would have produced a phenomenon-vs-z plot. (:pull:`3906`)

#. `@bjlittle`_ introduced :func:`iris.common.metadata.hexdigest` to the
public API. Previously it was a private function introduced in ``v3.0.0``.
Given any object, :func:`~iris.common.metadata.hexdigest` returns a string
representation of the 64-bit non-cryptographic hash of the object using the
extremely fast `xxhash`_ hashing algorithm. (:pull:`4020`)


🐛 Bugs Fixed
=============
Expand Down Expand Up @@ -150,3 +156,4 @@ This document explains the changes made to Iris for this release
.. _PyPI: https://pypi.org/project/scitools-iris/
.. _Python 3.8: https://www.python.org/downloads/release/python-380/
.. _README.md: https://github.com/SciTools/iris#-----
.. _xxhash: http://cyan4973.github.io/xxHash/
4 changes: 2 additions & 2 deletions lib/iris/_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import re

import iris.util
from iris.common.metadata import _hexdigest as quickhash
from iris.common.metadata import hexdigest


class DimensionHeader:
Expand Down Expand Up @@ -101,7 +101,7 @@ def _summary_coord_extra(self, cube, coord):
# ..except setdefault fails if values are numpy arrays.
if key not in attributes:
attributes[key] = value
elif quickhash(attributes[key]) != quickhash(value):
elif hexdigest(attributes[key]) != hexdigest(value):
# NOTE: fast and array-safe comparison, as used in
# :mod:`iris.common.metadata`.
vary.add(key)
Expand Down
67 changes: 40 additions & 27 deletions lib/iris/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"CoordMetadata",
"CubeMetadata",
"DimCoordMetadata",
"hexdigest",
"metadata_manager_factory",
]

Expand All @@ -48,34 +49,46 @@
logger = get_logger(__name__, fmt="[%(cls)s.%(funcName)s]")


def _hexdigest(value):
def hexdigest(item):
"""
Return a hexidecimal string hash representation of the provided value.
Calculate a hexidecimal string hash representation of the provided item.
Calculates a 64-bit non-cryptographic hash of the provided value,
and returns the hexdigest string representation of the calculated hash.
Calculates a 64-bit non-cryptographic hash of the provided item, using
the extremely fast ``xxhash`` hashing algorithm, and returns the hexdigest
string representation of the hash.
This provides a means to compare large and/or complex objects through
simple string hexdigest comparison.
Args:
* item (object):
The item that requires to have its hexdigest calculated.
Returns:
The string hexidecimal representation of the item's 64-bit hash.
"""
# Special case: deal with numpy arrays.
if ma.isMaskedArray(value):
if ma.isMaskedArray(item):
parts = (
value.shape,
xxh64_hexdigest(value.data),
xxh64_hexdigest(value.mask),
item.shape,
xxh64_hexdigest(item.data),
xxh64_hexdigest(item.mask),
)
value = str(parts)
elif isinstance(value, np.ndarray):
parts = (value.shape, xxh64_hexdigest(value))
value = str(parts)
item = str(parts)
elif isinstance(item, np.ndarray):
parts = (item.shape, xxh64_hexdigest(item))
item = str(parts)

try:
# Calculate single-shot hash to avoid allocating state on the heap
result = xxh64_hexdigest(value)
result = xxh64_hexdigest(item)
except TypeError:
# xxhash expects a bytes-like object, so try hashing the
# string representation of the provided value instead, but
# string representation of the provided item instead, but
# also fold in the object type...
parts = (type(value), value)
parts = (type(item), item)
result = xxh64_hexdigest(str(parts))

return result
Expand Down Expand Up @@ -338,8 +351,8 @@ def _combine_lenient_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Intersection of common items.
common = sleft & sright
# Items in sleft different from sright.
Expand Down Expand Up @@ -367,8 +380,8 @@ def _combine_strict_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Intersection of common items.
common = sleft & sright
# Now bring the result together.
Expand Down Expand Up @@ -426,8 +439,8 @@ def _compare_lenient_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Items in sleft different from sright.
dsleft = dict(sleft - sright)
# Items in sright different from sleft.
Expand All @@ -443,8 +456,8 @@ def _compare_strict_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}

return sleft == sright

Expand Down Expand Up @@ -512,8 +525,8 @@ def _difference_lenient_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Items in sleft different from sright.
dsleft = dict(sleft - sright)
# Items in sright different from sleft.
Expand All @@ -540,8 +553,8 @@ def _difference_strict_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Items in sleft different from sright.
dsleft = dict(sleft - sright)
# Items in sright different from sleft.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Unit tests for the :func:`iris.common.metadata._hexdigest`.
Unit tests for the :func:`iris.common.metadata.hexdigest`.
"""

Expand All @@ -18,7 +18,7 @@
import numpy as np
from xxhash import xxh64, xxh64_hexdigest

from iris.common.metadata import _hexdigest as hexdigest
from iris.common.metadata import hexdigest


class TestBytesLikeObject(tests.IrisTest):
Expand Down

0 comments on commit b8ae910

Please sign in to comment.