diff --git a/src/python/pants/backend/docker/util_rules/docker_build_context.py b/src/python/pants/backend/docker/util_rules/docker_build_context.py index 661db4900a9..367e04d93ea 100644 --- a/src/python/pants/backend/docker/util_rules/docker_build_context.py +++ b/src/python/pants/backend/docker/util_rules/docker_build_context.py @@ -21,7 +21,7 @@ DockerBuildEnvironmentError, DockerBuildEnvironmentRequest, ) -from pants.backend.docker.utils import get_hash, suggest_renames +from pants.backend.docker.utils import suggest_renames from pants.backend.docker.value_interpolation import DockerBuildArgsInterpolationValue from pants.backend.shell.target_types import ShellSourceField from pants.core.goals.package import BuiltPackage, EnvironmentAwarePackageRequest, PackageFieldSet @@ -44,7 +44,7 @@ ) from pants.engine.unions import UnionRule from pants.util.meta import classproperty -from pants.util.strutil import softwrap +from pants.util.strutil import softwrap, stable_hash from pants.util.value_interpolation import InterpolationContext, InterpolationValue logger = logging.getLogger(__name__) @@ -130,7 +130,7 @@ def create( # Data from Pants. interpolation_context["pants"] = { # Present hash for all inputs that can be used for image tagging. - "hash": get_hash((build_args, build_env, snapshot.digest)).hexdigest(), + "hash": stable_hash((build_args, build_env, snapshot.digest)).hexdigest(), } # Base image tags values for all stages (as parsed from the Dockerfile instructions). diff --git a/src/python/pants/backend/docker/util_rules/docker_build_context_test.py b/src/python/pants/backend/docker/util_rules/docker_build_context_test.py index da0727ca99f..39f8df22483 100644 --- a/src/python/pants/backend/docker/util_rules/docker_build_context_test.py +++ b/src/python/pants/backend/docker/util_rules/docker_build_context_test.py @@ -155,7 +155,7 @@ def test_pants_hash(rule_runner: RuleRunner) -> None: "stage0": "latest", }, "build_args": {}, - "pants": {"hash": "fd19488a9b08a0184432762cab85f1370904d09bafd9df1a2f8a94614b2b7eb6"}, + "pants": {"hash": "87e90685c07ac302bbff8f9d846b4015621255f741008485fd3ce72253ce54f4"}, }, ) diff --git a/src/python/pants/backend/docker/utils.py b/src/python/pants/backend/docker/utils.py index ec097916369..a5a86efd69e 100644 --- a/src/python/pants/backend/docker/utils.py +++ b/src/python/pants/backend/docker/utils.py @@ -4,14 +4,9 @@ from __future__ import annotations import difflib -import hashlib -import json import os.path -from collections import abc -from dataclasses import asdict, is_dataclass from fnmatch import fnmatch -from functools import partial -from typing import Any, Callable, Iterable, Iterator, Sequence, TypeVar +from typing import Callable, Iterable, Iterator, Sequence, TypeVar from pants.help.maybe_color import MaybeColor from pants.util.ordered_set import FrozenOrderedSet @@ -169,30 +164,3 @@ def format_rename_suggestion(src_path: str, dst_path: str, *, colors: bool) -> s rem = color.maybe_red(src_path) add = color.maybe_green(dst_path) return f"{rem} => {add}" - - -class JsonEncoder(json.JSONEncoder): - """Allow us to serialize everything, with a fallback on `str()` in case of any esoteric - types.""" - - def default(self, o): - """Return a serializable object for o.""" - if is_dataclass(o): - return asdict(o) - if isinstance(o, abc.Mapping): - return dict(o) - if isinstance(o, abc.Sequence): - return list(o) - try: - return super().default(o) - except TypeError: - return str(o) - - -json_dumps = partial( - json.dumps, indent=None, separators=(",", ":"), sort_keys=True, cls=JsonEncoder -) - - -def get_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: - return hashlib.new(name, json_dumps(value).encode("utf-8")) diff --git a/src/python/pants/backend/docker/utils_test.py b/src/python/pants/backend/docker/utils_test.py index ea853ec0d68..49e1bc62c90 100644 --- a/src/python/pants/backend/docker/utils_test.py +++ b/src/python/pants/backend/docker/utils_test.py @@ -3,12 +3,9 @@ from __future__ import annotations -from dataclasses import dataclass - import pytest -from pants.backend.docker.utils import format_rename_suggestion, get_hash, suggest_renames -from pants.util.frozendict import FrozenDict +from pants.backend.docker.utils import format_rename_suggestion, suggest_renames @pytest.mark.parametrize( @@ -171,19 +168,3 @@ def test_suggest_renames( def test_format_rename_suggestion(src: str, dst: str) -> None: actual = format_rename_suggestion(src, dst, colors=False) assert actual == f"{src} => {dst}" - - -def test_hash() -> None: - @dataclass(frozen=True) - class Data: - mapping: FrozenDict[str, str] - - data = Data( - FrozenDict( - {alpha: alpha.lower() for alpha in [chr(a) for a in range(ord("A"), ord("Z") + 1)]} - ) - ) - assert ( - get_hash(data).hexdigest() - == "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730" - ) diff --git a/src/python/pants/notes/2.16.x.md b/src/python/pants/notes/2.16.x.md index 791986ee063..b127ab84897 100644 --- a/src/python/pants/notes/2.16.x.md +++ b/src/python/pants/notes/2.16.x.md @@ -2,6 +2,7 @@ ## What's New + ### BUILD files The new `env` function in BUILD files allows access to environment variables in BUILD files. @@ -14,6 +15,11 @@ the same syntax as target selectors. ### Backends +#### Docker + +The [`{pants.hash}`](https://www.pantsbuild.org/docs/tagging-docker-images#string-interpolation-using-placeholder-values) +generation code was changed such that the generated hash for the same input will be different in pants 2.16. + #### Python The Python backend added or improved support for various tools including: diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 923df65d80d..ce30eba21e0 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -3,13 +3,20 @@ from __future__ import annotations +import dataclasses +import hashlib +import json import re import shlex import textwrap -from typing import Callable, Iterable, TypeVar +from collections import abc +from typing import Any, Callable, Iterable, TypeVar from typing_extensions import ParamSpec +from pants.engine.internals.native_engine import Digest +from pants.util.ordered_set import FrozenOrderedSet, OrderedSet + def ensure_binary(text_or_binary: bytes | str) -> bytes: if isinstance(text_or_binary, bytes): @@ -335,3 +342,39 @@ def wrapper(func: Callable[P, R]) -> Callable[P, R]: return func return wrapper + + +class _JsonEncoder(json.JSONEncoder): + """Allow us to serialize everything, with a fallback on `str()` in case of any esoteric + types.""" + + def default(self, o): + """Return a serializable object for o.""" + if isinstance(o, abc.Mapping): + return dict(o) + if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)): + return list(o) + + # NB: A quick way to embed the type in the hash so that two objects with the same data but + # different types produce different hashes. + classname = o.__class__.__name__ + if dataclasses.is_dataclass(o): + return {"__class__.__name__": classname, **dataclasses.asdict(o)} + if isinstance(o, (Digest,)): + return {"__class__.__name__": classname, "fingerprint": o.fingerprint} + return super().default(o) + + +def stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash: + """Attempts to return a stable hash of the value stable across processes. + + "Stable" here means that if `value` is equivalent in multiple invocations (across multiple + processes), it should produce the same hash. To that end, what values are accepted are limited + in scope. + """ + return hashlib.new( + name, + json.dumps( + value, indent=None, separators=(",", ":"), sort_keys=True, cls=_JsonEncoder + ).encode("utf-8"), + ) diff --git a/src/python/pants/util/strutil_test.py b/src/python/pants/util/strutil_test.py index bddb354bf9c..cb3b832dcff 100644 --- a/src/python/pants/util/strutil_test.py +++ b/src/python/pants/util/strutil_test.py @@ -2,10 +2,12 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import textwrap +from dataclasses import dataclass from textwrap import dedent import pytest +from pants.util.frozendict import FrozenDict from pants.util.strutil import ( bullet_list, comma_separated_list, @@ -18,6 +20,7 @@ path_safe, pluralize, softwrap, + stable_hash, strip_prefix, strip_v2_chroot_path, ) @@ -397,3 +400,19 @@ def show_why_this_is_needed() -> None: with pytest.raises(AssertionError): assert show_why_this_is_needed.__doc__ == "calc 1 + 1 = 2" + + +def test_stable_hash() -> None: + @dataclass(frozen=True) + class Data: + mapping: FrozenDict[str, str] + + data = Data( + FrozenDict( + {alpha: alpha.lower() for alpha in [chr(a) for a in range(ord("A"), ord("Z") + 1)]} + ) + ) + assert ( + stable_hash(data).hexdigest() + == "1f2a0caa2588274fa99dc7397c1687dbbe6159be0de646a37ba7af241ecf1add" + )