From 6eac63d63881d9e5559f6a5925f70a71e8268c23 Mon Sep 17 00:00:00 2001 From: jonmeow Date: Wed, 30 Oct 2024 14:14:38 -0700 Subject: [PATCH 1/4] Switch tar verification to a manifest comparison. --- toolchain/install/BUILD | 14 +++--- toolchain/install/pkg_helpers.bzl | 12 ++--- toolchain/install/toolchain_tar_test.py | 65 ++++++++++++++----------- 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/toolchain/install/BUILD b/toolchain/install/BUILD index f91ef404dc8ea..d8857b11e847d 100644 --- a/toolchain/install/BUILD +++ b/toolchain/install/BUILD @@ -4,6 +4,7 @@ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") load("//bazel/cc_toolchains:defs.bzl", "cc_env") +load("//bazel/manifest:defs.bzl", "manifest") load("install_filegroups.bzl", "install_filegroup", "install_symlink", "install_target", "make_install_filegroups") load("pkg_helpers.bzl", "pkg_naming_variables", "pkg_tar_and_test") load("run_tool.bzl", "run_tool") @@ -145,6 +146,11 @@ make_install_filegroups( prefix = "prefix_root", ) +manifest( + name = "install_data_manifest.txt", + srcs = [":install_data"], +) + pkg_naming_variables( name = "packaging_variables", ) @@ -157,18 +163,12 @@ pkg_naming_variables( # `carbon_toolchain_tar_gz_rule`. pkg_tar_and_test( srcs = [":pkg_data"], + install_data_manifest = ":install_data_manifest.txt", name_base = "carbon_toolchain", package_dir = "carbon_toolchain-$(version)", package_file_name_base = "carbon_toolchain-$(version)", package_variables = ":packaging_variables", stamp = -1, # Allow `--stamp` builds to produce file timestamps. - test_data = [ - ":install_data", - ], - # TODO: This is used to make sure that tar files are in install_data (one - # direction). Replace with a check that the files in install_data and tar - # match (bidirectional). - test_install_marker = "prefix_root/lib/carbon/carbon_install.txt", ) # Support `bazel run` on specific binaries. diff --git a/toolchain/install/pkg_helpers.bzl b/toolchain/install/pkg_helpers.bzl index 7e2743d8b7de1..06ced2e494c56 100644 --- a/toolchain/install/pkg_helpers.bzl +++ b/toolchain/install/pkg_helpers.bzl @@ -25,7 +25,7 @@ pkg_naming_variables = rule( attrs = VERSION_ATTRS, ) -def pkg_tar_and_test(name_base, package_file_name_base, test_data, test_install_marker, **kwargs): +def pkg_tar_and_test(name_base, package_file_name_base, install_data_manifest, **kwargs): """Create a `pkg_tar` and a test for both `.tar` and `.tar.gz` extensions. Args: @@ -35,10 +35,8 @@ def pkg_tar_and_test(name_base, package_file_name_base, test_data, test_install_ package_file_name_base: The base of the `package_file_name` attribute to `pkg_tar`. The file extensions will be appended after a `.`. - test_data: - The test data to verify the tar file against. - test_install_marker: - The install marker within the test data to locate the installation. + install_data_manifest: + The install data manifest file to compare with. **kwargs: Passed to `pkg_tar` for all the rest of its attributes. """ @@ -58,10 +56,10 @@ def pkg_tar_and_test(name_base, package_file_name_base, test_data, test_install_ name = name_base + "_" + target_ext + "_test", size = "small", srcs = ["toolchain_tar_test.py"], - data = [":" + tar_target, test_install_marker] + test_data, + data = [":" + tar_target, install_data_manifest], args = [ "$(location :{})".format(tar_target), - "$(location {})".format(test_install_marker), + "$(location {})".format(install_data_manifest), ], main = "toolchain_tar_test.py", # The compressed tar is slow, exclude building and testing that. diff --git a/toolchain/install/toolchain_tar_test.py b/toolchain/install/toolchain_tar_test.py index 19daad876af36..fd78dba5d283a 100644 --- a/toolchain/install/toolchain_tar_test.py +++ b/toolchain/install/toolchain_tar_test.py @@ -9,11 +9,21 @@ """ import argparse -import sys from pathlib import Path +import re import tarfile +def expect_empty_set(filename: str, file_set: set[str]) -> bool: + """Prints and returns false when the set has entries.""" + if file_set: + print(f"error: files only in `{filename}`:") + for f in file_set: + print(f" - ${f}") + return False + return True + + def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -22,42 +32,41 @@ def main() -> None: help="The tar file to test.", ) parser.add_argument( - "install_marker", + "install_data_manifest", type=Path, - help="The path of the install marker in a prefix root to test against.", + help="The install data manifest file.", ) args = parser.parse_args() - # Locate the prefix root from the install marker. - if not args.install_marker.exists(): - sys.exit("ERROR: No install marker: " + args.install_marker) - prefix_root_path = args.install_marker.parents[2] + with open(args.install_data_manifest) as manifest: + # Remove everything up to and including `prefix_root`. + install_files = set( + [ + re.sub("^.*/prefix_root/", "", entry.strip()) + for entry in manifest.readlines() + ] + ) + assert len(install_files), f"`{args.install_data_manifest}` is empty." # First check that every file and directory in the tar file exists in our # prefix root, and build a set of those paths. - installed_paths = set() with tarfile.open(args.tar_file) as tar: - for tarinfo in tar: - relative_path = Path(*Path(tarinfo.name).parts[1:]) - installed_paths.add(relative_path) - if not prefix_root_path.joinpath(relative_path).exists(): - sys.exit( - "ERROR: File `{0}` is not in prefix root: `{1}`".format( - tarinfo.name, prefix_root_path - ) - ) + # Remove the first path component. + tar_files = set( + [ + str(Path(*Path(tarinfo.name).parts[1:])) + for tarinfo in tar + if not tarinfo.isdir() + ] + ) + assert len(tar_files), f"`{args.tar_file}` is empty." - # If we found an empty tar file, it's always an error. - if len(installed_paths) == 0: - sys.exit("ERROR: Tar file `{0}` was empty.".format(args.tar_file)) - - # Now check that every file and directory in the prefix root is in that set. - for prefix_path in prefix_root_path.glob("**/*"): - relative_path = prefix_path.relative_to(prefix_root_path) - if relative_path not in installed_paths: - sys.exit( - "ERROR: File `{0}` is not in tar file.".format(relative_path) - ) + tar_okay = expect_empty_set(args.tar_file, tar_files - install_files) + install_okay = expect_empty_set( + args.install_data_manifest, install_files - tar_files + ) + if not (tar_okay and install_okay): + exit("error: tar and install data did not match.") if __name__ == "__main__": From 2b315c640604845f9fab4b2219a4a4830b8f23d4 Mon Sep 17 00:00:00 2001 From: jonmeow Date: Wed, 30 Oct 2024 14:20:40 -0700 Subject: [PATCH 2/4] comments --- toolchain/install/toolchain_tar_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toolchain/install/toolchain_tar_test.py b/toolchain/install/toolchain_tar_test.py index fd78dba5d283a..47f7cd170fe2d 100644 --- a/toolchain/install/toolchain_tar_test.py +++ b/toolchain/install/toolchain_tar_test.py @@ -38,6 +38,7 @@ def main() -> None: ) args = parser.parse_args() + # Gather install data files. with open(args.install_data_manifest) as manifest: # Remove everything up to and including `prefix_root`. install_files = set( @@ -48,8 +49,7 @@ def main() -> None: ) assert len(install_files), f"`{args.install_data_manifest}` is empty." - # First check that every file and directory in the tar file exists in our - # prefix root, and build a set of those paths. + # Gather tar files. with tarfile.open(args.tar_file) as tar: # Remove the first path component. tar_files = set( From 4c0dafedd4cd3d175cf862f4467d378ef0213cbc Mon Sep 17 00:00:00 2001 From: jonmeow Date: Wed, 30 Oct 2024 14:21:08 -0700 Subject: [PATCH 3/4] comment --- toolchain/install/toolchain_tar_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/toolchain/install/toolchain_tar_test.py b/toolchain/install/toolchain_tar_test.py index 47f7cd170fe2d..b6fc63c353f54 100644 --- a/toolchain/install/toolchain_tar_test.py +++ b/toolchain/install/toolchain_tar_test.py @@ -61,6 +61,7 @@ def main() -> None: ) assert len(tar_files), f"`{args.tar_file}` is empty." + # Verify file sets match. tar_okay = expect_empty_set(args.tar_file, tar_files - install_files) install_okay = expect_empty_set( args.install_data_manifest, install_files - tar_files From 8c0cade0375c0a4d563fd0c2ed11c3a7f7fb18c0 Mon Sep 17 00:00:00 2001 From: jonmeow Date: Wed, 30 Oct 2024 15:36:43 -0700 Subject: [PATCH 4/4] Switch to unittest --- toolchain/install/pkg_helpers.bzl | 8 +-- toolchain/install/toolchain_tar_test.py | 92 +++++++++---------------- 2 files changed, 38 insertions(+), 62 deletions(-) diff --git a/toolchain/install/pkg_helpers.bzl b/toolchain/install/pkg_helpers.bzl index 06ced2e494c56..8e1c0badc3f0f 100644 --- a/toolchain/install/pkg_helpers.bzl +++ b/toolchain/install/pkg_helpers.bzl @@ -57,10 +57,10 @@ def pkg_tar_and_test(name_base, package_file_name_base, install_data_manifest, * size = "small", srcs = ["toolchain_tar_test.py"], data = [":" + tar_target, install_data_manifest], - args = [ - "$(location :{})".format(tar_target), - "$(location {})".format(install_data_manifest), - ], + env = { + "INSTALL_DATA_MANIFEST": "$(location {})".format(install_data_manifest), + "TAR_FILE": "$(location :{})".format(tar_target), + }, main = "toolchain_tar_test.py", # The compressed tar is slow, exclude building and testing that. tags = ["manual"] if file_ext == "tar.gz" else [], diff --git a/toolchain/install/toolchain_tar_test.py b/toolchain/install/toolchain_tar_test.py index b6fc63c353f54..99d867d0364a6 100644 --- a/toolchain/install/toolchain_tar_test.py +++ b/toolchain/install/toolchain_tar_test.py @@ -8,67 +8,43 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ -import argparse from pathlib import Path +import os import re import tarfile - - -def expect_empty_set(filename: str, file_set: set[str]) -> bool: - """Prints and returns false when the set has entries.""" - if file_set: - print(f"error: files only in `{filename}`:") - for f in file_set: - print(f" - ${f}") - return False - return True - - -def main() -> None: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "tar_file", - type=Path, - help="The tar file to test.", - ) - parser.add_argument( - "install_data_manifest", - type=Path, - help="The install data manifest file.", - ) - args = parser.parse_args() - - # Gather install data files. - with open(args.install_data_manifest) as manifest: - # Remove everything up to and including `prefix_root`. - install_files = set( - [ - re.sub("^.*/prefix_root/", "", entry.strip()) - for entry in manifest.readlines() - ] - ) - assert len(install_files), f"`{args.install_data_manifest}` is empty." - - # Gather tar files. - with tarfile.open(args.tar_file) as tar: - # Remove the first path component. - tar_files = set( - [ - str(Path(*Path(tarinfo.name).parts[1:])) - for tarinfo in tar - if not tarinfo.isdir() - ] - ) - assert len(tar_files), f"`{args.tar_file}` is empty." - - # Verify file sets match. - tar_okay = expect_empty_set(args.tar_file, tar_files - install_files) - install_okay = expect_empty_set( - args.install_data_manifest, install_files - tar_files - ) - if not (tar_okay and install_okay): - exit("error: tar and install data did not match.") +import unittest + + +class ToolchainTarTest(unittest.TestCase): + def test_tar(self) -> None: + install_data_manifest = Path(os.environ["INSTALL_DATA_MANIFEST"]) + tar_file = Path(os.environ["TAR_FILE"]) + + # Gather install data files. + with open(install_data_manifest) as manifest: + # Remove everything up to and including `prefix_root`. + install_files = set( + [ + re.sub("^.*/prefix_root/", "", entry.strip()) + for entry in manifest.readlines() + ] + ) + self.assertTrue(install_files, f"`{install_data_manifest}` is empty.") + + # Gather tar files. + with tarfile.open(tar_file) as tar: + # Remove the first path component. + tar_files = set( + [ + str(Path(*Path(tarinfo.name).parts[1:])) + for tarinfo in tar + if not tarinfo.isdir() + ] + ) + self.assertTrue(tar_files, f"`{tar_file}` is empty.") + + self.assertSetEqual(install_files, tar_files) if __name__ == "__main__": - main() + unittest.main()