From 327581422cac411bfca47d3e094419b0887be299 Mon Sep 17 00:00:00 2001 From: Matt Westrik Date: Mon, 9 Dec 2024 14:06:42 -0800 Subject: [PATCH] feat: update flatten to deduplicate files As currently implemented, this option does not deduplicate files, which means the duplicate file path errors can still occur in some cases. This change updates `flatten.sh` to deduplicate files as well, with the final file in the archive being preserved for each path. It also renames `assert_tar_listing` to `assert_tar_mtree` and creates a new `assert_tar_listing` rule that lists archive contents instead of exporting an mtree. --- distroless/private/flatten.sh | 28 ++++++++++-------- distroless/tests/asserts.bzl | 46 ++++++++++++++++++++++++++++-- examples/cacerts/BUILD.bazel | 4 +-- examples/flatten/BUILD.bazel | 25 +++++++++++++--- examples/group/BUILD.bazel | 4 +-- examples/home/BUILD.bazel | 4 +-- examples/java_keystore/BUILD.bazel | 4 +-- examples/locale/BUILD.bazel | 6 ++-- examples/os_release/BUILD.bazel | 6 ++-- examples/passwd/BUILD.bazel | 4 +-- examples/statusd/BUILD.bazel | 4 +-- 11 files changed, 99 insertions(+), 36 deletions(-) diff --git a/distroless/private/flatten.sh b/distroless/private/flatten.sh index 49ba33c..1989bba 100755 --- a/distroless/private/flatten.sh +++ b/distroless/private/flatten.sh @@ -17,7 +17,6 @@ if [[ "$output" != "-" ]]; then fi done - # There not a lot happening here but there is still too many implicit knowledge. # # When we run bsdtar, we ask for it to prompt every entry, in the same order we created above, the mtree. @@ -29,21 +28,26 @@ if [[ "$output" != "-" ]]; then # See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L240 # See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L216 # - # And finally we iterate over all the entries generating 31 bytes of interleaved 'Y' or 'N' date based on if - # we came across the entry before, for directories the first occurrence is kept, and for files copies are - # preserved. + # To match the extraction behavior of tar itself, we want to preserve only the final occurrence of each file + # and directory in the archive. To do this, we iterate over all the entries twice. The first pass computes the + # number of occurrences of each path, and the second pass determines whether each entry is the final (or only) + # occurrence of that path. + $bsdtar --confirmation "$@" > $output 2< <(awk '{ - if (substr($0,0,1) == "#") { - next; - } count[$1]++; + files[NR] = $1 + } + END { ORS="" - keep="n" - if (count[$1] == 1 || $1 !~ "/$") { - keep="y" + for (i=1; i<=NR; i++) { + seen[files[i]]++ + keep="n" + if (count[files[i]] == seen[files[i]]) { + keep="y" + } + for (j=0; j<31; j++) print keep + fflush() } - for (i=0;i<31;i++) print keep - fflush() }' "$mtree") rm "$mtree" else diff --git a/distroless/tests/asserts.bzl b/distroless/tests/asserts.bzl index 873c99a..f3271ac 100644 --- a/distroless/tests/asserts.bzl +++ b/distroless/tests/asserts.bzl @@ -3,8 +3,50 @@ load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test") load("@bazel_skylib//rules:write_file.bzl", "write_file") -# buildifier: disable=function-docstring +def assert_tar_mtree(name, actual, expected): + """ + Assert that an mtree representation of a tarball matches an expected value. + + Args: + name: name of this assertion + actual: label for a tarball + expected: expected mtree + """ + actual_mtree = "_{}_mtree".format(name) + expected_mtree = "_{}_expected".format(name) + + native.genrule( + name = actual_mtree, + srcs = [actual], + outs = ["_{}.mtree".format(name)], + cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -cf $@ --format=mtree --options '!nlink' @-".format(actual), + toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"], + ) + + write_file( + name = expected_mtree, + out = "_{}.expected".format(name), + content = [expected], + newline = "unix", + ) + + diff_test( + name = name, + file1 = actual_mtree, + file2 = expected_mtree, + timeout = "short", + ) + + def assert_tar_listing(name, actual, expected): + """ + Assert that the listed contents of a tarball match an expected value. This is useful when checking for duplicated paths. + + Args: + name: name of this assertion + actual: label for a tarball + expected: expected listing + """ actual_listing = "_{}_listing".format(name) expected_listing = "_{}_expected".format(name) @@ -12,7 +54,7 @@ def assert_tar_listing(name, actual, expected): name = actual_listing, srcs = [actual], outs = ["_{}.listing".format(name)], - cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -cf $@ --format=mtree --options '!nlink' @-".format(actual), + cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -tf - > $@".format(actual), toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"], ) diff --git a/examples/cacerts/BUILD.bazel b/examples/cacerts/BUILD.bazel index d629b33..2eeadea 100644 --- a/examples/cacerts/BUILD.bazel +++ b/examples/cacerts/BUILD.bazel @@ -1,12 +1,12 @@ load("//distroless:defs.bzl", "cacerts") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") cacerts( name = "cacerts", package = "@example-bullseye-ca-certificates//:data.tar.xz", ) -assert_tar_listing( +assert_tar_mtree( name = "test_cacerts", actual = "cacerts", expected = """\ diff --git a/examples/flatten/BUILD.bazel b/examples/flatten/BUILD.bazel index 476f992..387f3df 100644 --- a/examples/flatten/BUILD.bazel +++ b/examples/flatten/BUILD.bazel @@ -1,6 +1,6 @@ load("@aspect_bazel_lib//lib:tar.bzl", "tar") load("//distroless:defs.bzl", "flatten", "home", "passwd") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_listing", "assert_tar_mtree") passwd( name = "passwd", @@ -47,7 +47,7 @@ flatten( ], ) -assert_tar_listing( +assert_tar_mtree( name = "test_flatten", actual = "flatten", expected = """\ @@ -94,8 +94,8 @@ flatten( ], ) -assert_tar_listing( - name = "test_flatten_dedup", +assert_tar_mtree( + name = "test_flatten_dedup_mtree", actual = "flatten_dedup", expected = """\ #mtree @@ -107,3 +107,20 @@ assert_tar_listing( ./examples/flatten/dir/sub/content.txt time=1672560000.0 mode=755 gid=0 uid=0 type=file size=0 """, ) + +assert_tar_listing( + name = "test_flatten_dedup_listing", + actual = "flatten_dedup", + expected = """\ +examples/ +examples/flatten/ +examples/flatten/dir/ +examples/flatten/dir/changelog +examples/flatten/dir/sub/ +examples/flatten/dir/sub/content.txt +examples/flatten/dir/changelog +examples/flatten/dir/changelog +examples/flatten/dir/sub/content.txt +""", +) + diff --git a/examples/group/BUILD.bazel b/examples/group/BUILD.bazel index c0f8050..b569926 100644 --- a/examples/group/BUILD.bazel +++ b/examples/group/BUILD.bazel @@ -1,6 +1,6 @@ load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test") load("//distroless:defs.bzl", "group") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") group( name = "group", @@ -32,7 +32,7 @@ diff_test( file2 = "group.expected.txt", ) -assert_tar_listing( +assert_tar_mtree( name = "test_group", actual = "group", expected = """\ diff --git a/examples/home/BUILD.bazel b/examples/home/BUILD.bazel index 331331e..354d72f 100644 --- a/examples/home/BUILD.bazel +++ b/examples/home/BUILD.bazel @@ -1,5 +1,5 @@ load("//distroless:defs.bzl", "home") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") home( name = "home", @@ -17,7 +17,7 @@ home( ], ) -assert_tar_listing( +assert_tar_mtree( name = "test_home", actual = "home", expected = """\ diff --git a/examples/java_keystore/BUILD.bazel b/examples/java_keystore/BUILD.bazel index dc33f71..97ce1b2 100644 --- a/examples/java_keystore/BUILD.bazel +++ b/examples/java_keystore/BUILD.bazel @@ -1,5 +1,5 @@ load("//distroless:defs.bzl", "java_keystore") -load("//distroless/tests:asserts.bzl", "assert_jks_listing", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_jks_listing", "assert_tar_mtree") java_keystore( name = "java_keystore", @@ -23,7 +23,7 @@ assert_jks_listing( expected = "expected.jks.output", ) -assert_tar_listing( +assert_tar_mtree( name = "test_java_keystore", actual = "java_keystore", expected = """\ diff --git a/examples/locale/BUILD.bazel b/examples/locale/BUILD.bazel index f631505..5c41fca 100644 --- a/examples/locale/BUILD.bazel +++ b/examples/locale/BUILD.bazel @@ -1,5 +1,5 @@ load("//distroless:defs.bzl", "locale") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") locale( name = "bullseye", @@ -7,7 +7,7 @@ locale( package = "@example-bullseye-libc-bin//:data.tar.xz", ) -assert_tar_listing( +assert_tar_mtree( name = "test_bullseye", actual = "bullseye", expected = """\ @@ -41,7 +41,7 @@ locale( package = "@example-bookworm-libc-bin//:data.tar.xz", ) -assert_tar_listing( +assert_tar_mtree( name = "test_bookworm", actual = "bookworm", expected = """\ diff --git a/examples/os_release/BUILD.bazel b/examples/os_release/BUILD.bazel index 727ba82..f86793d 100644 --- a/examples/os_release/BUILD.bazel +++ b/examples/os_release/BUILD.bazel @@ -1,6 +1,6 @@ load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test") load("//distroless:defs.bzl", "os_release") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") os_release( name = "os_release", @@ -16,7 +16,7 @@ diff_test( file2 = "content.expected.txt", ) -assert_tar_listing( +assert_tar_mtree( name = "test_os_release", actual = "os_release", expected = """\ @@ -37,7 +37,7 @@ os_release( path = "/etc/os-release", ) -assert_tar_listing( +assert_tar_mtree( name = "test_os_release_alternative_path", actual = "os_release_alternative_path", expected = """\ diff --git a/examples/passwd/BUILD.bazel b/examples/passwd/BUILD.bazel index 3907a20..a1bdc37 100644 --- a/examples/passwd/BUILD.bazel +++ b/examples/passwd/BUILD.bazel @@ -1,6 +1,6 @@ load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test") load("//distroless:defs.bzl", "passwd") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") passwd( name = "passwd", @@ -22,7 +22,7 @@ diff_test( file2 = "passwd.expected.txt", ) -assert_tar_listing( +assert_tar_mtree( name = "test_passwd", actual = "passwd", expected = """\ diff --git a/examples/statusd/BUILD.bazel b/examples/statusd/BUILD.bazel index aa0dc01..146f0b0 100644 --- a/examples/statusd/BUILD.bazel +++ b/examples/statusd/BUILD.bazel @@ -1,6 +1,6 @@ # buildifier: disable=bzl-visibility load("//apt:defs.bzl", "dpkg_statusd") -load("//distroless/tests:asserts.bzl", "assert_tar_listing") +load("//distroless/tests:asserts.bzl", "assert_tar_mtree") dpkg_statusd( name = "statusd", @@ -8,7 +8,7 @@ dpkg_statusd( control = "@example-bullseye-ca-certificates//:control.tar.xz", ) -assert_tar_listing( +assert_tar_mtree( name = "test_statusd", actual = "statusd", expected = """\