diff --git a/icechunk-python/benchmarks/conftest.py b/icechunk-python/benchmarks/conftest.py index 08225030..f8b02bec 100644 --- a/icechunk-python/benchmarks/conftest.py +++ b/icechunk-python/benchmarks/conftest.py @@ -38,6 +38,13 @@ def synth_dataset(request) -> Store: ds.storage_config = ds.storage_config.with_overwrite( **TEST_BUCKETS[where] ).with_extra(prefix=extra_prefix, force_idempotent=True) + if ds.setupfn is None: + # these datasets aren't automatically set up + # so skip if the data haven't been written yet. + try: + ds.store() + except ValueError as e: + pytest.skip(reason=str(e)) return ds diff --git a/icechunk-python/benchmarks/helpers.py b/icechunk-python/benchmarks/helpers.py index 7065e88c..410b7c07 100644 --- a/icechunk-python/benchmarks/helpers.py +++ b/icechunk-python/benchmarks/helpers.py @@ -58,6 +58,12 @@ def get_commit(ref: str) -> str: ).stdout.strip()[:8] +def get_full_commit(ref: str) -> str: + return subprocess.run( + ["git", "rev-parse", ref], capture_output=True, text=True, check=True + ).stdout.strip() + + def rdms() -> str: import random import string diff --git a/icechunk-python/benchmarks/most_recent.sh b/icechunk-python/benchmarks/most_recent.sh new file mode 100644 index 00000000..bfb30fbc --- /dev/null +++ b/icechunk-python/benchmarks/most_recent.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env sh + +echo $(ls -t ./.benchmarks/**/* | head -n 1) +pytest-benchmark compare --group=group,func,param --sort=fullname --columns=median --name=normal `ls -t ./.benchmarks/**/* | head -n 1` diff --git a/icechunk-python/benchmarks/runner.py b/icechunk-python/benchmarks/runner.py index 88949527..6ef50fc7 100644 --- a/icechunk-python/benchmarks/runner.py +++ b/icechunk-python/benchmarks/runner.py @@ -18,11 +18,10 @@ assert_cwd_is_icechunk_python, get_coiled_kwargs, get_commit, + get_full_commit, setup_logger, ) -import icechunk as ic - logger = setup_logger() PIP_OPTIONS = "--disable-pip-version-check -q" @@ -55,20 +54,21 @@ class Runner: def __init__(self, *, ref: str, where: str) -> None: self.ref = ref self.commit = get_commit(ref) + self.full_commit = get_full_commit(ref) self.where = where @property def pip_github_url(self) -> str: # optional extras cannot be specified here, "not guaranteed to work" # https://pip.pypa.io/en/stable/topics/vcs-support/#url-fragments - return f"git+https://github.com/earth-mover/icechunk.git@{self.commit}#subdirectory=icechunk-python" + return f"git+https://github.com/earth-mover/icechunk.git@{self.full_commit}#subdirectory=icechunk-python" @property def prefix(self) -> str: - try: - return f"v{ic.spec_version():02d}" - except AttributeError: - return f"{self.ref}_{self.commit}" + # try: + # return f"v{ic.spec_version():02d}" + # except AttributeError: + return f"{self.ref}_{self.commit}" @property def ref_commit(self) -> str: @@ -84,12 +84,11 @@ def execute(cmd: str) -> None: def initialize(self) -> None: """Builds virtual envs etc.""" - raise NotImplementedError + self.sync_benchmarks_folder() def setup(self, *, force: bool): """Creates datasets for read benchmarks.""" logger.info(f"setup_benchmarks for {self.ref} / {self.commit}") - self.sync_benchmarks_folder() cmd = ( f"pytest {PYTEST_OPTIONS} -nauto " f"-m setup_benchmarks --force-setup={force} " @@ -104,23 +103,21 @@ def run(self, *, pytest_extra: str = "") -> None: """Actually runs the benchmarks.""" logger.info(f"running benchmarks for {self.ref} / {self.commit}") - self.sync_benchmarks_folder() - # shorten the name so `pytest-benchmark compare` is readable clean_ref = self.ref.removeprefix("icechunk-v0.1.0-alph") assert self.bench_store_dir is not None # Note: .benchmarks is the default location for pytest-benchmark cmd = ( - f"pytest {PYTEST_OPTIONS} " + f"pytest {pytest_extra} " f"--benchmark-storage={self.bench_store_dir}/.benchmarks " f"--benchmark-save={clean_ref}_{self.commit}_{self.where} " f"--where={self.where} " f"--icechunk-prefix=benchmarks/{self.prefix}/ " - f"{pytest_extra} " + f"{PYTEST_OPTIONS} " "benchmarks/" ) - logger.info(cmd) + print(cmd) self.execute(cmd, check=False) @@ -149,13 +146,34 @@ def initialize(self) -> None: deps = get_benchmark_deps(f"{CURRENTDIR}/pyproject.toml") subprocess.run(["mkdir", "-p", self.pycwd], check=False) subprocess.run(["python3", "-m", "venv", ".venv"], cwd=self.pycwd, check=True) + self.sync_benchmarks_folder() cmd = f"pip install {PIP_OPTIONS} {self.pip_github_url} {deps}" self.execute(cmd, check=True) + super().initialize() + + def run(self, *, pytest_extra: str = "") -> None: + super().run(pytest_extra=pytest_extra) class CoiledRunner(Runner): bench_store_dir = "." + def get_coiled_run_args(self) -> tuple[str]: + ckwargs = self.get_coiled_kwargs() + return ( + "coiled", + "run", + "--interactive", + "--name", + f"icebench-{self.commit}", # cluster name + "--keepalive", + "10m", + f"--workspace={ckwargs['workspace']}", # cloud + f"--vm-type={ckwargs['vm_type']}", + f"--software={ckwargs['software']}", + f"--region={ckwargs['region']}", + ) + def get_coiled_kwargs(self): COILED_SOFTWARE = { "icechunk-v0.1.0-alpha.1": "icechunk-alpha-release", @@ -185,34 +203,26 @@ def initialize(self) -> None: }, pip=[self.pip_github_url, "coiled", *deps], ) + super().initialize() def execute(self, cmd, **kwargs) -> None: - ckwargs = self.get_coiled_kwargs() - ls = [f for f in os.listdir(CURRENTDIR) if f not in [".benchmarks", "benchmarks"]] - toignore = " ".join(ls) + subprocess.run([*self.get_coiled_run_args(), cmd], **kwargs) + + def sync_benchmarks_folder(self) -> None: subprocess.run( [ - "coiled", - "run", - "--interactive", - "--name", - f"icebench-{self.commit}", # cluster name - "--sync", - f"--sync-ignore={toignore!r}", - "--keepalive", - "10m", - f"--workspace={ckwargs['workspace']}", # cloud - f"--vm-type={ckwargs['vm_type']}", - f"--software={ckwargs['software']}", - f"--region={ckwargs['region']}", - cmd, + *self.get_coiled_run_args(), + "--file", + "benchmarks/", + "ls -alh ./.benchmarks/", ], - **kwargs, + check=True, ) - def sync_benchmarks_folder(self) -> None: - # uses command-line --sync option - pass + def run(self, *, pytest_extra: str = "") -> None: + super().run(pytest_extra=pytest_extra) + # This prints to screen but we could upload to a bucket in here. + self.execute("sh benchmarks/most_recent.sh") def init_for_ref(runner: Runner): @@ -223,7 +233,7 @@ def init_for_ref(runner: Runner): parser = argparse.ArgumentParser() parser.add_argument("refs", help="refs to run benchmarks for", nargs="+") parser.add_argument("--pytest", help="passed to pytest", default="") - parser.add_argument("--where", help="where to run? [local]", default="local") + parser.add_argument("--where", help="where to run? [local|s3|gcs]", default="local") parser.add_argument( "--skip-setup", help="skip setup step, useful for benchmarks that don't need data",