Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update flatten to deduplicate files #131

Merged
merged 3 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions distroless/private/flatten.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ if [[ "$output" != "-" ]]; then
fi
done


# There not a lot happening here but there is still too many implicit knowledge.
#
# When we run bsdtar, we ask for it to prompt every entry, in the same order we created above, the mtree.
Expand All @@ -29,21 +28,26 @@ if [[ "$output" != "-" ]]; then
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L240
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L216
#
# And finally we iterate over all the entries generating 31 bytes of interleaved 'Y' or 'N' date based on if
# we came across the entry before, for directories the first occurrence is kept, and for files copies are
# preserved.
# To match the extraction behavior of tar itself, we want to preserve only the final occurrence of each file
thesayyn marked this conversation as resolved.
Show resolved Hide resolved
# and directory in the archive. To do this, we iterate over all the entries twice. The first pass computes the
# number of occurrences of each path, and the second pass determines whether each entry is the final (or only)
# occurrence of that path.

$bsdtar --confirmation "$@" > $output 2< <(awk '{
if (substr($0,0,1) == "#") {
next;
}
count[$1]++;
files[NR] = $1
}
END {
ORS=""
keep="n"
if (count[$1] == 1 || $1 !~ "/$") {
keep="y"
for (i=1; i<=NR; i++) {
seen[files[i]]++
keep="n"
if (count[files[i]] == seen[files[i]]) {
keep="y"
}
for (j=0; j<31; j++) print keep
fflush()
}
for (i=0;i<31;i++) print keep
fflush()
}' "$mtree")
rm "$mtree"
else
Expand Down
46 changes: 44 additions & 2 deletions distroless/tests/asserts.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,58 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("@bazel_skylib//rules:write_file.bzl", "write_file")

# buildifier: disable=function-docstring
def assert_tar_mtree(name, actual, expected):
"""
Assert that an mtree representation of a tarball matches an expected value.

Args:
name: name of this assertion
actual: label for a tarball
expected: expected mtree
"""
actual_mtree = "_{}_mtree".format(name)
expected_mtree = "_{}_expected".format(name)

native.genrule(
name = actual_mtree,
srcs = [actual],
outs = ["_{}.mtree".format(name)],
cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -cf $@ --format=mtree --options '!nlink' @-".format(actual),
toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"],
)

write_file(
name = expected_mtree,
out = "_{}.expected".format(name),
content = [expected],
newline = "unix",
)

diff_test(
name = name,
file1 = actual_mtree,
file2 = expected_mtree,
timeout = "short",
)


def assert_tar_listing(name, actual, expected):
"""
Assert that the listed contents of a tarball match an expected value. This is useful when checking for duplicated paths.

Args:
name: name of this assertion
actual: label for a tarball
expected: expected listing
"""
actual_listing = "_{}_listing".format(name)
expected_listing = "_{}_expected".format(name)

native.genrule(
name = actual_listing,
srcs = [actual],
outs = ["_{}.listing".format(name)],
cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -cf $@ --format=mtree --options '!nlink' @-".format(actual),
cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -tf - > $@".format(actual),
toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"],
)

Expand Down
4 changes: 2 additions & 2 deletions examples/cacerts/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
load("//distroless:defs.bzl", "cacerts")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

cacerts(
name = "cacerts",
package = "@example-bullseye-ca-certificates//:data.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_cacerts",
actual = "cacerts",
expected = """\
Expand Down
22 changes: 18 additions & 4 deletions examples/flatten/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:tar.bzl", "tar")
load("//distroless:defs.bzl", "flatten", "home", "passwd")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_listing", "assert_tar_mtree")

passwd(
name = "passwd",
Expand Down Expand Up @@ -47,7 +47,7 @@ flatten(
],
)

assert_tar_listing(
assert_tar_mtree(
name = "test_flatten",
actual = "flatten",
expected = """\
Expand Down Expand Up @@ -94,8 +94,8 @@ flatten(
],
)

assert_tar_listing(
name = "test_flatten_dedup",
assert_tar_mtree(
name = "test_flatten_dedup_mtree",
actual = "flatten_dedup",
expected = """\
#mtree
Expand All @@ -107,3 +107,17 @@ assert_tar_listing(
./examples/flatten/dir/sub/content.txt time=1672560000.0 mode=755 gid=0 uid=0 type=file size=0
""",
)

assert_tar_listing(
name = "test_flatten_dedup_listing",
actual = "flatten_dedup",
expected = """\
examples/
examples/flatten/
examples/flatten/dir/
examples/flatten/dir/changelog
examples/flatten/dir/sub/
examples/flatten/dir/sub/content.txt
""",
)

4 changes: 2 additions & 2 deletions examples/group/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("//distroless:defs.bzl", "group")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

group(
name = "group",
Expand Down Expand Up @@ -32,7 +32,7 @@ diff_test(
file2 = "group.expected.txt",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_group",
actual = "group",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/home/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("//distroless:defs.bzl", "home")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

home(
name = "home",
Expand All @@ -17,7 +17,7 @@ home(
],
)

assert_tar_listing(
assert_tar_mtree(
name = "test_home",
actual = "home",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/java_keystore/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("//distroless:defs.bzl", "java_keystore")
load("//distroless/tests:asserts.bzl", "assert_jks_listing", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_jks_listing", "assert_tar_mtree")

java_keystore(
name = "java_keystore",
Expand All @@ -23,7 +23,7 @@ assert_jks_listing(
expected = "expected.jks.output",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_java_keystore",
actual = "java_keystore",
expected = """\
Expand Down
6 changes: 3 additions & 3 deletions examples/locale/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
load("//distroless:defs.bzl", "locale")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

locale(
name = "bullseye",
charset = "C.UTF-8",
package = "@example-bullseye-libc-bin//:data.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_bullseye",
actual = "bullseye",
expected = """\
Expand Down Expand Up @@ -41,7 +41,7 @@ locale(
package = "@example-bookworm-libc-bin//:data.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_bookworm",
actual = "bookworm",
expected = """\
Expand Down
6 changes: 3 additions & 3 deletions examples/os_release/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("//distroless:defs.bzl", "os_release")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

os_release(
name = "os_release",
Expand All @@ -16,7 +16,7 @@ diff_test(
file2 = "content.expected.txt",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_os_release",
actual = "os_release",
expected = """\
Expand All @@ -37,7 +37,7 @@ os_release(
path = "/etc/os-release",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_os_release_alternative_path",
actual = "os_release_alternative_path",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/passwd/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("//distroless:defs.bzl", "passwd")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

passwd(
name = "passwd",
Expand All @@ -22,7 +22,7 @@ diff_test(
file2 = "passwd.expected.txt",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_passwd",
actual = "passwd",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/statusd/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# buildifier: disable=bzl-visibility
load("//apt:defs.bzl", "dpkg_statusd")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

dpkg_statusd(
name = "statusd",
package_name = "ca-certificates",
control = "@example-bullseye-ca-certificates//:control.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_statusd",
actual = "statusd",
expected = """\
Expand Down
Loading