From 1f50a7346749f262d03169e2eef210780510d2a8 Mon Sep 17 00:00:00 2001 From: Guillaume Bouchard Date: Fri, 1 Sep 2023 10:20:24 +0400 Subject: [PATCH] feat: stable ContentHashable for Hash{Map, Set} Close https://github.com/tweag/funflow/issues/215 `unordered-containers` structures, such as `HashMap` and `HashSet` uses `Hashable` `hash` function to maintain their internal structure. Hence the order of the items (when converting using `toList`, or `elems`, or just `traverse` or `fold`) depends on the version of `hashable` used. This results on a change of hash of theses structure when `hashable` library is updated. It is not possible to ensure that hash will never change for any type, because it relies on other instances, `Ord/Eq` for `containers` `Map` and `Set`, or even the implementation of `ContentHashable` itself. However `hashable` is known for its frequent changes when on the other hand, `Ord/Eq` instances are most of the time generated by `deriving` and are hence perfectly reproducible. This commit removes the dependency on the implicit order from `hashable` at the cost of an introduce `Ord` constraint on the keys of the `Hash{Map,Set}`. This is a tradeoff: - The user can still use `Hash{Map,Set}` based structure in their type for whatever reason they want (usually performance). - They will need to provide an `Ord` instance for `ContentHashable` - If an `Ord` instance already exists, this change will just bring stability. If the instance does not exist, user will now be aware of the problem. Note: this invalidate previous hash computed on `Hash{Map, Set}`. --- cas/hashable/src/Data/CAS/ContentHashable.hs | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/cas/hashable/src/Data/CAS/ContentHashable.hs b/cas/hashable/src/Data/CAS/ContentHashable.hs index b6f0a482..bde4b914 100644 --- a/cas/hashable/src/Data/CAS/ContentHashable.hs +++ b/cas/hashable/src/Data/CAS/ContentHashable.hs @@ -83,7 +83,7 @@ import qualified Data.HashMap.Lazy as HashMap import qualified Data.HashSet as HashSet import qualified Data.Hashable import Data.Int -import Data.List (sort) +import Data.List (sort, sortOn) import Data.List.NonEmpty (NonEmpty) import Data.Map (Map) import qualified Data.Map as Map @@ -358,24 +358,34 @@ instance >=> flip contentHashUpdate (Map.toList m) $ ctx +-- | This instance for 'HashMap.HashMap' needs an additional 'Ord' constraint +-- on the key of the map in order to guarantee that the hash does not depend on +-- the internal of the map and will stay the same regardless of internal +-- changes (e.g. version of @hashable@) instance - (Typeable k, Typeable v, ContentHashable m k, ContentHashable m v) => + (Typeable k, Typeable v, ContentHashable m k, ContentHashable m v, Ord k) => ContentHashable m (HashMap.HashMap k v) where contentHashUpdate ctx m = flip contentHashUpdate_fingerprint m - -- XXX: The order of the list is unspecified. - >=> flip contentHashUpdate (HashMap.toList m) + -- Note: the keys are sorted to ensure a robust hash + -- See https://github.com/tweag/funflow/issues/215 + >=> flip contentHashUpdate (sortOn fst $ HashMap.toList m) $ ctx +-- | This instance for 'HashSet.HashSet' needs an additional 'Ord' constraint +-- on the values in order to guarantee that the hash does not depend on +-- the internal of the map and will stay the same regardless of internal +-- changes (e.g. version of @hashable@) instance (Typeable v, ContentHashable m v) => ContentHashable m (HashSet.HashSet v) where contentHashUpdate ctx s = flip contentHashUpdate_fingerprint s - -- XXX: The order of the list is unspecified. - >=> flip contentHashUpdate (HashSet.toList s) + -- Note: the keys are sorted to ensure a robust hash + -- See https://github.com/tweag/funflow/issues/215 + >=> flip contentHashUpdate (sort $ HashSet.toList s) $ ctx instance