From 1a16db65fa2344f4bdbad28b4ecdfd815b828a87 Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 11 Apr 2023 11:22:25 -0500 Subject: [PATCH 01/13] Move `get_hash` to `strutil` --- .../docker/util_rules/docker_build_context.py | 6 ++-- src/python/pants/backend/docker/utils.py | 34 +------------------ src/python/pants/backend/docker/utils_test.py | 21 +----------- src/python/pants/util/strutil.py | 34 ++++++++++++++++++- src/python/pants/util/strutil_test.py | 19 +++++++++++ 5 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/python/pants/backend/docker/util_rules/docker_build_context.py b/src/python/pants/backend/docker/util_rules/docker_build_context.py index 661db4900a9..fdd23ac715f 100644 --- a/src/python/pants/backend/docker/util_rules/docker_build_context.py +++ b/src/python/pants/backend/docker/util_rules/docker_build_context.py @@ -21,7 +21,7 @@ DockerBuildEnvironmentError, DockerBuildEnvironmentRequest, ) -from pants.backend.docker.utils import get_hash, suggest_renames +from pants.backend.docker.utils import suggest_renames from pants.backend.docker.value_interpolation import DockerBuildArgsInterpolationValue from pants.backend.shell.target_types import ShellSourceField from pants.core.goals.package import BuiltPackage, EnvironmentAwarePackageRequest, PackageFieldSet @@ -44,7 +44,7 @@ ) from pants.engine.unions import UnionRule from pants.util.meta import classproperty -from pants.util.strutil import softwrap +from pants.util.strutil import get_stable_hash, softwrap from pants.util.value_interpolation import InterpolationContext, InterpolationValue logger = logging.getLogger(__name__) @@ -130,7 +130,7 @@ def create( # Data from Pants. interpolation_context["pants"] = { # Present hash for all inputs that can be used for image tagging. - "hash": get_hash((build_args, build_env, snapshot.digest)).hexdigest(), + "hash": get_stable_hash((build_args, build_env, snapshot.digest)).hexdigest(), } # Base image tags values for all stages (as parsed from the Dockerfile instructions). diff --git a/src/python/pants/backend/docker/utils.py b/src/python/pants/backend/docker/utils.py index ec097916369..a5a86efd69e 100644 --- a/src/python/pants/backend/docker/utils.py +++ b/src/python/pants/backend/docker/utils.py @@ -4,14 +4,9 @@ from __future__ import annotations import difflib -import hashlib -import json import os.path -from collections import abc -from dataclasses import asdict, is_dataclass from fnmatch import fnmatch -from functools import partial -from typing import Any, Callable, Iterable, Iterator, Sequence, TypeVar +from typing import Callable, Iterable, Iterator, Sequence, TypeVar from pants.help.maybe_color import MaybeColor from pants.util.ordered_set import FrozenOrderedSet @@ -169,30 +164,3 @@ def format_rename_suggestion(src_path: str, dst_path: str, *, colors: bool) -> s rem = color.maybe_red(src_path) add = color.maybe_green(dst_path) return f"{rem} => {add}" - - -class JsonEncoder(json.JSONEncoder): - """Allow us to serialize everything, with a fallback on `str()` in case of any esoteric - types.""" - - def default(self, o): - """Return a serializable object for o.""" - if is_dataclass(o): - return asdict(o) - if isinstance(o, abc.Mapping): - return dict(o) - if isinstance(o, abc.Sequence): - return list(o) - try: - return super().default(o) - except TypeError: - return str(o) - - -json_dumps = partial( - json.dumps, indent=None, separators=(",", ":"), sort_keys=True, cls=JsonEncoder -) - - -def get_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: - return hashlib.new(name, json_dumps(value).encode("utf-8")) diff --git a/src/python/pants/backend/docker/utils_test.py b/src/python/pants/backend/docker/utils_test.py index ea853ec0d68..49e1bc62c90 100644 --- a/src/python/pants/backend/docker/utils_test.py +++ b/src/python/pants/backend/docker/utils_test.py @@ -3,12 +3,9 @@ from __future__ import annotations -from dataclasses import dataclass - import pytest -from pants.backend.docker.utils import format_rename_suggestion, get_hash, suggest_renames -from pants.util.frozendict import FrozenDict +from pants.backend.docker.utils import format_rename_suggestion, suggest_renames @pytest.mark.parametrize( @@ -171,19 +168,3 @@ def test_suggest_renames( def test_format_rename_suggestion(src: str, dst: str) -> None: actual = format_rename_suggestion(src, dst, colors=False) assert actual == f"{src} => {dst}" - - -def test_hash() -> None: - @dataclass(frozen=True) - class Data: - mapping: FrozenDict[str, str] - - data = Data( - FrozenDict( - {alpha: alpha.lower() for alpha in [chr(a) for a in range(ord("A"), ord("Z") + 1)]} - ) - ) - assert ( - get_hash(data).hexdigest() - == "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730" - ) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 923df65d80d..fbc5069239c 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -3,10 +3,14 @@ from __future__ import annotations +import abc +import dataclasses +import hashlib +import json import re import shlex import textwrap -from typing import Callable, Iterable, TypeVar +from typing import Any, Callable, Iterable, TypeVar from typing_extensions import ParamSpec @@ -335,3 +339,31 @@ def wrapper(func: Callable[P, R]) -> Callable[P, R]: return func return wrapper + + +class _JsonEncoder(json.JSONEncoder): + """Allow us to serialize everything, with a fallback on `str()` in case of any esoteric + types.""" + + def default(self, o): + """Return a serializable object for o.""" + if dataclasses.is_dataclass(o): + return dataclasses.asdict(o) + if isinstance(o, abc.Mapping): + return dict(o) + if isinstance(o, abc.Sequence): + return list(o) + try: + return super().default(o) + except TypeError: + return str(o) + + +def get_stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: + """Attempts to return a stable hash of the value stable across processes.""" + return hashlib.new( + name, + json.dumps( + value, indent=None, separators=(",", ":"), sort_keys=True, cls=_JsonEncoder + ).encode("utf-8"), + ) diff --git a/src/python/pants/util/strutil_test.py b/src/python/pants/util/strutil_test.py index bddb354bf9c..86cefe7b1b8 100644 --- a/src/python/pants/util/strutil_test.py +++ b/src/python/pants/util/strutil_test.py @@ -2,10 +2,12 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import textwrap +from dataclasses import dataclass from textwrap import dedent import pytest +from pants.util.frozendict import FrozenDict from pants.util.strutil import ( bullet_list, comma_separated_list, @@ -14,6 +16,7 @@ ensure_text, first_paragraph, fmt_memory_size, + get_stable_hash, hard_wrap, path_safe, pluralize, @@ -397,3 +400,19 @@ def show_why_this_is_needed() -> None: with pytest.raises(AssertionError): assert show_why_this_is_needed.__doc__ == "calc 1 + 1 = 2" + + +def test_hash() -> None: + @dataclass(frozen=True) + class Data: + mapping: FrozenDict[str, str] + + data = Data( + FrozenDict( + {alpha: alpha.lower() for alpha in [chr(a) for a in range(ord("A"), ord("Z") + 1)]} + ) + ) + assert ( + get_stable_hash(data).hexdigest() + == "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730" + ) From 4a50ba0e30b10443b1af4fd678f348f1f318febb Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 11 Apr 2023 11:25:27 -0500 Subject: [PATCH 02/13] rename --- .../pants/backend/docker/util_rules/docker_build_context.py | 4 ++-- src/python/pants/util/strutil.py | 2 +- src/python/pants/util/strutil_test.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/python/pants/backend/docker/util_rules/docker_build_context.py b/src/python/pants/backend/docker/util_rules/docker_build_context.py index fdd23ac715f..367e04d93ea 100644 --- a/src/python/pants/backend/docker/util_rules/docker_build_context.py +++ b/src/python/pants/backend/docker/util_rules/docker_build_context.py @@ -44,7 +44,7 @@ ) from pants.engine.unions import UnionRule from pants.util.meta import classproperty -from pants.util.strutil import get_stable_hash, softwrap +from pants.util.strutil import softwrap, stable_hash from pants.util.value_interpolation import InterpolationContext, InterpolationValue logger = logging.getLogger(__name__) @@ -130,7 +130,7 @@ def create( # Data from Pants. interpolation_context["pants"] = { # Present hash for all inputs that can be used for image tagging. - "hash": get_stable_hash((build_args, build_env, snapshot.digest)).hexdigest(), + "hash": stable_hash((build_args, build_env, snapshot.digest)).hexdigest(), } # Base image tags values for all stages (as parsed from the Dockerfile instructions). diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index fbc5069239c..66a3f8bc02d 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -359,7 +359,7 @@ def default(self, o): return str(o) -def get_stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: +def stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: """Attempts to return a stable hash of the value stable across processes.""" return hashlib.new( name, diff --git a/src/python/pants/util/strutil_test.py b/src/python/pants/util/strutil_test.py index 86cefe7b1b8..cfae9b74854 100644 --- a/src/python/pants/util/strutil_test.py +++ b/src/python/pants/util/strutil_test.py @@ -16,11 +16,11 @@ ensure_text, first_paragraph, fmt_memory_size, - get_stable_hash, hard_wrap, path_safe, pluralize, softwrap, + stable_hash, strip_prefix, strip_v2_chroot_path, ) @@ -402,7 +402,7 @@ def show_why_this_is_needed() -> None: assert show_why_this_is_needed.__doc__ == "calc 1 + 1 = 2" -def test_hash() -> None: +def test_stable_hash() -> None: @dataclass(frozen=True) class Data: mapping: FrozenDict[str, str] @@ -413,6 +413,6 @@ class Data: ) ) assert ( - get_stable_hash(data).hexdigest() + stable_hash(data).hexdigest() == "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730" ) From 0ad0189e06a967aad274e4ba5c48116f60e8c1fa Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 11 Apr 2023 12:27:28 -0500 Subject: [PATCH 03/13] oopsie --- src/python/pants/util/strutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 66a3f8bc02d..09569b902ff 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -3,13 +3,13 @@ from __future__ import annotations -import abc import dataclasses import hashlib import json import re import shlex import textwrap +from collections import abc from typing import Any, Callable, Iterable, TypeVar from typing_extensions import ParamSpec From 053825305ae747a18730387268b986659ad6380f Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 11 Apr 2023 13:04:20 -0500 Subject: [PATCH 04/13] Remove stringification --- src/python/pants/util/strutil.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 09569b902ff..babbe9cde44 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -353,10 +353,8 @@ def default(self, o): return dict(o) if isinstance(o, abc.Sequence): return list(o) - try: - return super().default(o) - except TypeError: - return str(o) + return super().default(o) + def stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: From 3aa742a8b2a71e175a4084fdac0e77c82edf6c0e Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 11 Apr 2023 20:39:01 -0500 Subject: [PATCH 05/13] stability and ordered sets --- src/python/pants/util/strutil.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index babbe9cde44..9fc756f403a 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -14,6 +14,8 @@ from typing_extensions import ParamSpec +from pants.util.ordered_set import FrozenOrderedSet, OrderedSet + def ensure_binary(text_or_binary: bytes | str) -> bytes: if isinstance(text_or_binary, bytes): @@ -351,14 +353,18 @@ def default(self, o): return dataclasses.asdict(o) if isinstance(o, abc.Mapping): return dict(o) - if isinstance(o, abc.Sequence): + if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)): return list(o) return super().default(o) - def stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: - """Attempts to return a stable hash of the value stable across processes.""" + """Attempts to return a stable hash of the value stable across processes. + + "Stable" here means that if `value` is equivalent in multiple invocations (across multiple + processes), it should produce the same hash. To that end, what values are accepted are limited + in scope. + """ return hashlib.new( name, json.dumps( From 75d0baed2ef32bb53531f68fb2966f7ca5547f23 Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 11 Apr 2023 21:13:25 -0500 Subject: [PATCH 06/13] digest too --- src/python/pants/util/strutil.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 9fc756f403a..8f83c51a6ef 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -14,6 +14,7 @@ from typing_extensions import ParamSpec +from pants.engine.internals.native_engine import Digest from pants.util.ordered_set import FrozenOrderedSet, OrderedSet @@ -355,6 +356,11 @@ def default(self, o): return dict(o) if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)): return list(o) + if isinstance(o, (Digest,)): + return { + "fingerprint": o.fingerprint, + "serialized_bytes_length": o.serialized_bytes_length, + } return super().default(o) From 2bcb5d22379e35beb3163912c9984b2f4cfc39f3 Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 12 Apr 2023 08:39:22 -0500 Subject: [PATCH 07/13] change hash --- .../backend/docker/util_rules/docker_build_context_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/pants/backend/docker/util_rules/docker_build_context_test.py b/src/python/pants/backend/docker/util_rules/docker_build_context_test.py index da0727ca99f..60dc19038c1 100644 --- a/src/python/pants/backend/docker/util_rules/docker_build_context_test.py +++ b/src/python/pants/backend/docker/util_rules/docker_build_context_test.py @@ -155,7 +155,7 @@ def test_pants_hash(rule_runner: RuleRunner) -> None: "stage0": "latest", }, "build_args": {}, - "pants": {"hash": "fd19488a9b08a0184432762cab85f1370904d09bafd9df1a2f8a94614b2b7eb6"}, + "pants": {"hash": "32b02dd86d1ad0169fa02a28583b70e8834ce22a5fccb5e9de422315055de6a0"}, }, ) From 9037478d2c7aa1a10eedeae1130f18386dbd7fe4 Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 12 Apr 2023 11:07:26 -0500 Subject: [PATCH 08/13] update docs --- src/python/pants/notes/2.16.x.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/python/pants/notes/2.16.x.md b/src/python/pants/notes/2.16.x.md index 791986ee063..642ee933860 100644 --- a/src/python/pants/notes/2.16.x.md +++ b/src/python/pants/notes/2.16.x.md @@ -2,6 +2,7 @@ ## What's New + ### BUILD files The new `env` function in BUILD files allows access to environment variables in BUILD files. @@ -14,6 +15,11 @@ the same syntax as target selectors. ### Backends +#### Docker + +The [`{pants.hash}`](https://www.pantsbuild.org/docs/tagging-docker-images#string-interpolation-using-placeholder-values) +generation code was changed such that the generated hash will change for the same input. + #### Python The Python backend added or improved support for various tools including: From 614702eed5248be9959b02c559cd322cf00af853 Mon Sep 17 00:00:00 2001 From: Joshua Cannon Date: Wed, 12 Apr 2023 11:56:40 -0500 Subject: [PATCH 09/13] Update src/python/pants/notes/2.16.x.md Co-authored-by: Jacob Floyd --- src/python/pants/notes/2.16.x.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/pants/notes/2.16.x.md b/src/python/pants/notes/2.16.x.md index 642ee933860..b127ab84897 100644 --- a/src/python/pants/notes/2.16.x.md +++ b/src/python/pants/notes/2.16.x.md @@ -18,7 +18,7 @@ the same syntax as target selectors. #### Docker The [`{pants.hash}`](https://www.pantsbuild.org/docs/tagging-docker-images#string-interpolation-using-placeholder-values) -generation code was changed such that the generated hash will change for the same input. +generation code was changed such that the generated hash for the same input will be different in pants 2.16. #### Python From bd8817abf41e6d0a4443813c358dc12bf60b4b36 Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 12 Apr 2023 15:30:50 -0500 Subject: [PATCH 10/13] no len --- src/python/pants/util/strutil.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 8f83c51a6ef..7cf5b6cd47f 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -359,7 +359,6 @@ def default(self, o): if isinstance(o, (Digest,)): return { "fingerprint": o.fingerprint, - "serialized_bytes_length": o.serialized_bytes_length, } return super().default(o) From 69651bf7555c54f5d54ed9158ad53b361031eb1b Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 12 Apr 2023 15:32:46 -0500 Subject: [PATCH 11/13] no len --- src/python/pants/util/strutil.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 7cf5b6cd47f..ba84b0da287 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -357,9 +357,7 @@ def default(self, o): if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)): return list(o) if isinstance(o, (Digest,)): - return { - "fingerprint": o.fingerprint, - } + return {"fingerprint": o.fingerprint} return super().default(o) From 5ac9e5f6f087f98aa703eeefa76daa4230c22fe1 Mon Sep 17 00:00:00 2001 From: Joshua Date: Thu, 13 Apr 2023 08:44:14 -0500 Subject: [PATCH 12/13] fix hash again --- .../docker/util_rules/docker_build_context_test.py | 2 +- src/python/pants/util/strutil.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/python/pants/backend/docker/util_rules/docker_build_context_test.py b/src/python/pants/backend/docker/util_rules/docker_build_context_test.py index 60dc19038c1..39f8df22483 100644 --- a/src/python/pants/backend/docker/util_rules/docker_build_context_test.py +++ b/src/python/pants/backend/docker/util_rules/docker_build_context_test.py @@ -155,7 +155,7 @@ def test_pants_hash(rule_runner: RuleRunner) -> None: "stage0": "latest", }, "build_args": {}, - "pants": {"hash": "32b02dd86d1ad0169fa02a28583b70e8834ce22a5fccb5e9de422315055de6a0"}, + "pants": {"hash": "87e90685c07ac302bbff8f9d846b4015621255f741008485fd3ce72253ce54f4"}, }, ) diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index ba84b0da287..ce30eba21e0 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -350,14 +350,18 @@ class _JsonEncoder(json.JSONEncoder): def default(self, o): """Return a serializable object for o.""" - if dataclasses.is_dataclass(o): - return dataclasses.asdict(o) if isinstance(o, abc.Mapping): return dict(o) if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)): return list(o) + + # NB: A quick way to embed the type in the hash so that two objects with the same data but + # different types produce different hashes. + classname = o.__class__.__name__ + if dataclasses.is_dataclass(o): + return {"__class__.__name__": classname, **dataclasses.asdict(o)} if isinstance(o, (Digest,)): - return {"fingerprint": o.fingerprint} + return {"__class__.__name__": classname, "fingerprint": o.fingerprint} return super().default(o) From 4a251b652c38e84c1b22322c03432924f530ccbd Mon Sep 17 00:00:00 2001 From: Joshua Date: Thu, 13 Apr 2023 10:01:50 -0500 Subject: [PATCH 13/13] fix hash --- src/python/pants/util/strutil_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/pants/util/strutil_test.py b/src/python/pants/util/strutil_test.py index cfae9b74854..cb3b832dcff 100644 --- a/src/python/pants/util/strutil_test.py +++ b/src/python/pants/util/strutil_test.py @@ -414,5 +414,5 @@ class Data: ) assert ( stable_hash(data).hexdigest() - == "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730" + == "1f2a0caa2588274fa99dc7397c1687dbbe6159be0de646a37ba7af241ecf1add" )