Skip to content

Commit

Permalink
WIP: Bintar deps validation script
Browse files Browse the repository at this point in the history
  • Loading branch information
cvicentiu committed Feb 14, 2025
1 parent e84cb1b commit ed1387f
Show file tree
Hide file tree
Showing 11 changed files with 4,795 additions and 0 deletions.
627 changes: 627 additions & 0 deletions runtime_config/bintar_deps/deps_10.11.yaml

Large diffs are not rendered by default.

412 changes: 412 additions & 0 deletions runtime_config/bintar_deps/deps_10.5.yaml

Large diffs are not rendered by default.

602 changes: 602 additions & 0 deletions runtime_config/bintar_deps/deps_10.6.yaml

Large diffs are not rendered by default.

628 changes: 628 additions & 0 deletions runtime_config/bintar_deps/deps_11.4.yaml

Large diffs are not rendered by default.

651 changes: 651 additions & 0 deletions runtime_config/bintar_deps/deps_11.6.yaml

Large diffs are not rendered by default.

651 changes: 651 additions & 0 deletions runtime_config/bintar_deps/deps_11.7.yaml

Large diffs are not rendered by default.

654 changes: 654 additions & 0 deletions runtime_config/bintar_deps/deps_11.8.yaml

Large diffs are not rendered by default.

Empty file added scripts/bintars/__init__.py
Empty file.
111 changes: 111 additions & 0 deletions scripts/bintars/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import logging
import tarfile
import subprocess
import shutil
import sys
import re
import os
from typing import Tuple

from pathlib import Path


def setup_logging(level: int):
# ANSI escape codes for colors
RESET = "\033[0m"
GREEN = "\033[32m"
RED = "\033[31m"
YELLOW = "\033[33m"


# Custom log formatter to include colors
class ColoredFormatter(logging.Formatter):
def format(self, record):
if record.levelno == logging.INFO:
color = GREEN
elif record.levelno == logging.ERROR:
color = RED
elif record.levelno == logging.WARNING:
color = YELLOW
else:
color = RESET

# Apply color to the message
record.msg = f"{color}{record.levelname}{RESET}: {record.msg}"
return super().format(record)

# Basic logging configuration
logging.basicConfig(
level=level,
format="%(message)s", # No logger name or timestamp
handlers=[
logging.StreamHandler()
]
)

# Apply the custom formatter
logging.getLogger().handlers[0].setFormatter(ColoredFormatter("%(message)s"))


# Helper functions
def run_command(command):
"""Run a shell command and return the output."""
try:
result = subprocess.run(command, shell=True, check=True, text=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
logging.error(f"Error running command '{command}': {e} {e.stderr.strip()}")
return None


def _unpack_archive(tarball_path: Path, dst_path: Path):
logging.info(f"Extracting archive {tarball_path}")
with tarfile.open(str(tarball_path), 'r:*') as tar:
tar.extractall(path=str(dst_path), filter='fully_trusted')


def _parse_archive_path(archive_path: Path) -> Tuple[str, str]:
archive_name = archive_path.name

# Removes the last extension (e.g., .gz)
base_name = Path(archive_name).stem
# Check and remove the .tar extension
if base_name.endswith(".tar"):
base_name = Path(base_name).stem

# Let's extract the product version from the archive:
match = re.search('([1-9][0-9]+\\.[0-9]+\\.[0-9]+)', base_name)
if not match:
logging.error(f'Archive name {archive_name} must contain product version')
sys.exit(1)

# Only interested in major and minor version numbers, not point.
version = match.group(0).split('.')
major_minor = f'{version[0]}.{version[1]}'

logging.info(f'Product version (major.minor) {major_minor}')

return base_name, major_minor


def prepare_test_directory(archive_path: Path, tests_path: Path) -> Tuple[Path, str]:

base_name, major_minor = _parse_archive_path(archive_path)
# The archive contains a folder with the same name as the archive.
# We are interested in the contents within that folder, as thats where
# the files are.
files_path = tests_path / base_name

# Cleanup any previous run.
shutil.rmtree(files_path, ignore_errors=True)

# Create the test directory.
tests_path.mkdir(parents=True, exist_ok=True)

_unpack_archive(archive_path, tests_path)

# Sanity check that the archive has maintained its format.
assert files_path.is_dir()

return files_path, major_minor
256 changes: 256 additions & 0 deletions scripts/bintars/deps_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
import argparse
import os
import re
import sys
from concurrent.futures import ProcessPoolExecutor
from typing import Iterable, Tuple, Generator
import logging

import magic
import yaml

from common import run_command, setup_logging, prepare_test_directory
from pathlib import Path


def check_file_is_elf_binary_callback(file_path: str) -> str | None:
global mime
try:
file_type = mime.from_file(file_path)
if "elf" in file_type.lower(): # Identify ELF files
return file_path
except Exception as e:
logging.error(f"Error checking file {file_path}: {e}")
return None


def start_worker():
global mime
mime = magic.Magic()


def get_file_paths(path: str) -> Generator[str, None, None]:
# Generator to feed file paths to processes.
for root, _, files in os.walk(path):
for file in files:
yield os.path.join(root, file)


def get_executables(path: str):
"""
Recursively searches for ELF executable files and libraries in the given
path using a multiprocess approach (to speed up).
Args:
path (str): Root directory to search.
Returns:
list: List of paths to ELF executables and libraries.
"""
executables = []

# Use ProcessPoolExecutor to process files in parallel
# This offers a 10x speed up compared to single threaded.
with ProcessPoolExecutor(initializer=start_worker,
max_workers=os.cpu_count()) as executor:
results = executor.map(check_file_is_elf_binary_callback,
get_file_paths(path))

# Collect non-None results
executables = [result for result in results if result]

return executables


def get_file_dependencies_callback(file: str) -> Tuple[str, set[str]]:
result = set()
output = run_command(f'readelf -d {file}')
if output is None:
logging.error(f"Failed to check libraries for {file}.")
return file, False

pattern = "Shared library: \\[(\\S*)\\]"
regex_shared_library = re.compile(pattern)

for line in output.splitlines():
# Here is an example line we match:
# 0x0000000000000001 (NEEDED) Shared library: [libsystemd.so.0]

match = regex_shared_library.search(line)
if not match:
continue
library = match.group(1)
result.add(library)

return file, result


def get_dependencies_for_files(files: Iterable[str]) -> dict[str, list[str]]:
with ProcessPoolExecutor(initializer=start_worker,
max_workers=os.cpu_count()) as executor:
results = executor.map(get_file_dependencies_callback, files)

deps = {}
for full_file_path, file_deps in results:
# TODO(cvicentiu) Perhaps this should be marked as a failure.
# Unable to read file dependencies, skip the file.
if file_deps is False:
continue

deps[full_file_path] = file_deps

return deps


def remove_base_path_from_files(dependencies: dict[str, list[str]],
base_path: str) -> dict[str, list[str]]:
"""
For all keys in dependencies, remove the base_path prefix.
"./tests/mariadb-11.6.2-linux-systemd-x86_64/lib/libgalera_smm.so"
becomes
"lib/libgalera_smm.so"
"""
result = {}
for full_file_name, deps in dependencies.items():
# If this assert fails, there is a bug in the testing script.
assert full_file_name.startswith(base_path)
file_name = full_file_name[len(base_path)+1:]
result[file_name] = deps
return result


def dependencies_to_canonical_repr(
dependencies: dict[str, set[str]],
version: str,
base_path: Path
) -> dict[str, dict[str, list[str]]]:

dependencies = remove_base_path_from_files(dependencies, base_path.as_posix())
result = {
'version': version,
'files': {},
}

for file, deps in dependencies.items():
result['files'][file] = list(sorted(deps))

return result


def get_standard_dependencies(path: str):
with open(path, 'r') as spec_file:
return yaml.safe_load(spec_file)


def get_executable_files_dependencies(path: str):
files = get_executables(path)
return get_dependencies_for_files(files)


def compare_versions(archive_deps, standard_deps,
allow_cross_version: bool):
a_version = archive_deps['version']
s_version = standard_deps['version']

if a_version != s_version:
if allow_cross_version:
logging.warn(f'WARNING: version mismatch {a_version} {s_version}')
else:
logging.error(f'version mismatch {a_version} {s_version}')
return True
return False


def compare_dependencies(archive_deps, standard_deps):
error = False
files = archive_deps['files']
control = standard_deps['files']

files_set = set(files.keys())
control_set = set(control.keys())

files_extra = files_set.difference(control_set)
files_missing = control_set.difference(files_set)
common = files_set.intersection(control_set)

if files_extra:
logging.error(f'We have extra files! {files_extra}')
error = True

if files_missing:
logging.error(f'We have missing files from the archive! {files_missing}')
error = True

for file in common:
deps_extra = set(files[file]).difference(control[file])
deps_missing = set(control[file]).difference(files[file])

if deps_extra:
logging.error(f'We have extra deps for {file}! {deps_extra}')
error = True
if deps_missing:
logging.error(f'We have missing deps for {file}! {deps_missing}')
error = True

return error


def main(archive_path: Path,
tests_path: Path,
deps_file: Path,
record: bool,
allow_cross_version: bool):
error = False # track any errors so we can return properly.

files_path, major_minor = prepare_test_directory(archive_path, tests_path)

logging.info("Fetching archive dependencies")
dependencies = get_executable_files_dependencies(files_path)

canonical_deps = dependencies_to_canonical_repr(dependencies,
version=major_minor,
base_path=files_path)

if record:
logging.info(f"Recording new result to {deps_file}")
with open(deps_file, 'w') as f:
yaml.dump(canonical_deps, f)
return

# Validate dependencies.
standard = get_standard_dependencies(deps_file)

error |= compare_versions(canonical_deps, standard, allow_cross_version)
error |= compare_dependencies(canonical_deps, standard)

if error:
logging.error("Some tests failed")
sys.exit(1)

logging.info("All OK")


if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='bintar_deps.py',
description='Checks/Records bintar files and dependencies')
parser.add_argument('archive',
help='Path to the binary tarball archive')
parser.add_argument('deps_file',
help='Path to YAML file with a list of dependencies')
parser.add_argument('--record', action='store_true',
help='Use the bintar archive to generate a deps file')
parser.add_argument('--test_directory', type=str, default='./tests/',
help='Where to extract the archive and run tests.')
parser.add_argument('--allow_cross_version', action='store_true',
help='Tests pass even if there is a '
'version mismatch between the archive and '
'the deps_file version')
args = parser.parse_args()

setup_logging(logging.INFO)
main(archive_path=Path(args.archive),
tests_path=Path(args.test_directory),
deps_file=Path(args.deps_file),
record=args.record,
allow_cross_version=args.allow_cross_version)
Loading

0 comments on commit ed1387f

Please sign in to comment.