Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefactor: Move get_hash from docker backend to strutil #18715

Merged
merged 14 commits into from
Apr 13, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
DockerBuildEnvironmentError,
DockerBuildEnvironmentRequest,
)
from pants.backend.docker.utils import get_hash, suggest_renames
from pants.backend.docker.utils import suggest_renames
from pants.backend.docker.value_interpolation import DockerBuildArgsInterpolationValue
from pants.backend.shell.target_types import ShellSourceField
from pants.core.goals.package import BuiltPackage, EnvironmentAwarePackageRequest, PackageFieldSet
Expand All @@ -44,7 +44,7 @@
)
from pants.engine.unions import UnionRule
from pants.util.meta import classproperty
from pants.util.strutil import softwrap
from pants.util.strutil import softwrap, stable_hash
from pants.util.value_interpolation import InterpolationContext, InterpolationValue

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -130,7 +130,7 @@ def create(
# Data from Pants.
interpolation_context["pants"] = {
# Present hash for all inputs that can be used for image tagging.
"hash": get_hash((build_args, build_env, snapshot.digest)).hexdigest(),
"hash": stable_hash((build_args, build_env, snapshot.digest)).hexdigest(),
}

# Base image tags values for all stages (as parsed from the Dockerfile instructions).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def test_pants_hash(rule_runner: RuleRunner) -> None:
"stage0": "latest",
},
"build_args": {},
"pants": {"hash": "fd19488a9b08a0184432762cab85f1370904d09bafd9df1a2f8a94614b2b7eb6"},
"pants": {"hash": "32b02dd86d1ad0169fa02a28583b70e8834ce22a5fccb5e9de422315055de6a0"},
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
},
)

Expand Down
34 changes: 1 addition & 33 deletions src/python/pants/backend/docker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,9 @@
from __future__ import annotations

import difflib
import hashlib
import json
import os.path
from collections import abc
from dataclasses import asdict, is_dataclass
from fnmatch import fnmatch
from functools import partial
from typing import Any, Callable, Iterable, Iterator, Sequence, TypeVar
from typing import Callable, Iterable, Iterator, Sequence, TypeVar

from pants.help.maybe_color import MaybeColor
from pants.util.ordered_set import FrozenOrderedSet
Expand Down Expand Up @@ -169,30 +164,3 @@ def format_rename_suggestion(src_path: str, dst_path: str, *, colors: bool) -> s
rem = color.maybe_red(src_path)
add = color.maybe_green(dst_path)
return f"{rem} => {add}"


class JsonEncoder(json.JSONEncoder):
"""Allow us to serialize everything, with a fallback on `str()` in case of any esoteric
types."""

def default(self, o):
"""Return a serializable object for o."""
if is_dataclass(o):
return asdict(o)
if isinstance(o, abc.Mapping):
return dict(o)
if isinstance(o, abc.Sequence):
return list(o)
try:
return super().default(o)
except TypeError:
return str(o)


json_dumps = partial(
json.dumps, indent=None, separators=(",", ":"), sort_keys=True, cls=JsonEncoder
)


def get_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash:
return hashlib.new(name, json_dumps(value).encode("utf-8"))
21 changes: 1 addition & 20 deletions src/python/pants/backend/docker/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@

from __future__ import annotations

from dataclasses import dataclass

import pytest

from pants.backend.docker.utils import format_rename_suggestion, get_hash, suggest_renames
from pants.util.frozendict import FrozenDict
from pants.backend.docker.utils import format_rename_suggestion, suggest_renames


@pytest.mark.parametrize(
Expand Down Expand Up @@ -171,19 +168,3 @@ def test_suggest_renames(
def test_format_rename_suggestion(src: str, dst: str) -> None:
actual = format_rename_suggestion(src, dst, colors=False)
assert actual == f"{src} => {dst}"


def test_hash() -> None:
@dataclass(frozen=True)
class Data:
mapping: FrozenDict[str, str]

data = Data(
FrozenDict(
{alpha: alpha.lower() for alpha in [chr(a) for a in range(ord("A"), ord("Z") + 1)]}
)
)
assert (
get_hash(data).hexdigest()
== "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730"
)
6 changes: 6 additions & 0 deletions src/python/pants/notes/2.16.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## What's New


### BUILD files

The new `env` function in BUILD files allows access to environment variables in BUILD files.
Expand All @@ -14,6 +15,11 @@ the same syntax as target selectors.

### Backends

#### Docker

The [`{pants.hash}`](https://www.pantsbuild.org/docs/tagging-docker-images#string-interpolation-using-placeholder-values)
generation code was changed such that the generated hash will change for the same input.
thejcannon marked this conversation as resolved.
Show resolved Hide resolved

#### Python

The Python backend added or improved support for various tools including:
Expand Down
44 changes: 43 additions & 1 deletion src/python/pants/util/strutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@

from __future__ import annotations

import dataclasses
import hashlib
import json
import re
import shlex
import textwrap
from typing import Callable, Iterable, TypeVar
from collections import abc
from typing import Any, Callable, Iterable, TypeVar

from typing_extensions import ParamSpec

from pants.engine.internals.native_engine import Digest
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet


def ensure_binary(text_or_binary: bytes | str) -> bytes:
if isinstance(text_or_binary, bytes):
Expand Down Expand Up @@ -335,3 +342,38 @@ def wrapper(func: Callable[P, R]) -> Callable[P, R]:
return func

return wrapper


class _JsonEncoder(json.JSONEncoder):
"""Allow us to serialize everything, with a fallback on `str()` in case of any esoteric
types."""

def default(self, o):
"""Return a serializable object for o."""
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
cognifloyd marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(o, abc.Mapping):
return dict(o)
if isinstance(o, (abc.Sequence, OrderedSet, FrozenOrderedSet)):
return list(o)
if isinstance(o, (Digest,)):
return {
"fingerprint": o.fingerprint,
"serialized_bytes_length": o.serialized_bytes_length,
}
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
return super().default(o)
kaos marked this conversation as resolved.
Show resolved Hide resolved


def stable_hash(value: Any, *, name: str = "sha256") -> hashlib._Hash:
"""Attempts to return a stable hash of the value stable across processes.

"Stable" here means that if `value` is equivalent in multiple invocations (across multiple
processes), it should produce the same hash. To that end, what values are accepted are limited
in scope.
"""
return hashlib.new(
name,
json.dumps(
value, indent=None, separators=(",", ":"), sort_keys=True, cls=_JsonEncoder
).encode("utf-8"),
)
19 changes: 19 additions & 0 deletions src/python/pants/util/strutil_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import textwrap
from dataclasses import dataclass
from textwrap import dedent

import pytest

from pants.util.frozendict import FrozenDict
from pants.util.strutil import (
bullet_list,
comma_separated_list,
Expand All @@ -18,6 +20,7 @@
path_safe,
pluralize,
softwrap,
stable_hash,
strip_prefix,
strip_v2_chroot_path,
)
Expand Down Expand Up @@ -397,3 +400,19 @@ def show_why_this_is_needed() -> None:

with pytest.raises(AssertionError):
assert show_why_this_is_needed.__doc__ == "calc 1 + 1 = 2"


def test_stable_hash() -> None:
@dataclass(frozen=True)
class Data:
mapping: FrozenDict[str, str]

data = Data(
FrozenDict(
{alpha: alpha.lower() for alpha in [chr(a) for a in range(ord("A"), ord("Z") + 1)]}
)
)
assert (
stable_hash(data).hexdigest()
== "e4da3c55de6ce98ddcbd5b854ff01f5c8b47fdcb2e10ddd5176505e39a332730"
)