Skip to content

Commit

Permalink
Remote benchmark tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Feb 17, 2025
1 parent 9d2eb92 commit 9f64654
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 36 deletions.
7 changes: 7 additions & 0 deletions icechunk-python/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ def synth_dataset(request) -> Store:
ds.storage_config = ds.storage_config.with_overwrite(
**TEST_BUCKETS[where]
).with_extra(prefix=extra_prefix, force_idempotent=True)
if ds.setupfn is None:
# these datasets aren't automatically set up
# so skip if the data haven't been written yet.
try:
ds.store()
except ValueError as e:
pytest.skip(reason=str(e))
return ds


Expand Down
6 changes: 6 additions & 0 deletions icechunk-python/benchmarks/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ def get_commit(ref: str) -> str:
).stdout.strip()[:8]


def get_full_commit(ref: str) -> str:
return subprocess.run(
["git", "rev-parse", ref], capture_output=True, text=True, check=True
).stdout.strip()


def rdms() -> str:
import random
import string
Expand Down
4 changes: 4 additions & 0 deletions icechunk-python/benchmarks/most_recent.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env sh

echo $(ls -t ./.benchmarks/**/* | head -n 1)
pytest-benchmark compare --group=group,func,param --sort=fullname --columns=median --name=normal `ls -t ./.benchmarks/**/* | head -n 1`
82 changes: 46 additions & 36 deletions icechunk-python/benchmarks/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
assert_cwd_is_icechunk_python,
get_coiled_kwargs,
get_commit,
get_full_commit,
setup_logger,
)

import icechunk as ic

logger = setup_logger()

PIP_OPTIONS = "--disable-pip-version-check -q"
Expand Down Expand Up @@ -55,20 +54,21 @@ class Runner:
def __init__(self, *, ref: str, where: str) -> None:
self.ref = ref
self.commit = get_commit(ref)
self.full_commit = get_full_commit(ref)
self.where = where

@property
def pip_github_url(self) -> str:
# optional extras cannot be specified here, "not guaranteed to work"
# https://pip.pypa.io/en/stable/topics/vcs-support/#url-fragments
return f"git+https://github.com/earth-mover/icechunk.git@{self.commit}#subdirectory=icechunk-python"
return f"git+https://github.com/earth-mover/icechunk.git@{self.full_commit}#subdirectory=icechunk-python"

@property
def prefix(self) -> str:
try:
return f"v{ic.spec_version():02d}"
except AttributeError:
return f"{self.ref}_{self.commit}"
# try:
# return f"v{ic.spec_version():02d}"
# except AttributeError:
return f"{self.ref}_{self.commit}"

@property
def ref_commit(self) -> str:
Expand All @@ -84,12 +84,11 @@ def execute(cmd: str) -> None:

def initialize(self) -> None:
"""Builds virtual envs etc."""
raise NotImplementedError
self.sync_benchmarks_folder()

def setup(self, *, force: bool):
"""Creates datasets for read benchmarks."""
logger.info(f"setup_benchmarks for {self.ref} / {self.commit}")
self.sync_benchmarks_folder()
cmd = (
f"pytest {PYTEST_OPTIONS} -nauto "
f"-m setup_benchmarks --force-setup={force} "
Expand All @@ -104,23 +103,21 @@ def run(self, *, pytest_extra: str = "") -> None:
"""Actually runs the benchmarks."""
logger.info(f"running benchmarks for {self.ref} / {self.commit}")

self.sync_benchmarks_folder()

# shorten the name so `pytest-benchmark compare` is readable
clean_ref = self.ref.removeprefix("icechunk-v0.1.0-alph")

assert self.bench_store_dir is not None
# Note: .benchmarks is the default location for pytest-benchmark
cmd = (
f"pytest {PYTEST_OPTIONS} "
f"pytest {pytest_extra} "
f"--benchmark-storage={self.bench_store_dir}/.benchmarks "
f"--benchmark-save={clean_ref}_{self.commit}_{self.where} "
f"--where={self.where} "
f"--icechunk-prefix=benchmarks/{self.prefix}/ "
f"{pytest_extra} "
f"{PYTEST_OPTIONS} "
"benchmarks/"
)
logger.info(cmd)
print(cmd)

self.execute(cmd, check=False)

Expand Down Expand Up @@ -149,13 +146,34 @@ def initialize(self) -> None:
deps = get_benchmark_deps(f"{CURRENTDIR}/pyproject.toml")
subprocess.run(["mkdir", "-p", self.pycwd], check=False)
subprocess.run(["python3", "-m", "venv", ".venv"], cwd=self.pycwd, check=True)
self.sync_benchmarks_folder()
cmd = f"pip install {PIP_OPTIONS} {self.pip_github_url} {deps}"
self.execute(cmd, check=True)
super().initialize()

def run(self, *, pytest_extra: str = "") -> None:
super().run(pytest_extra=pytest_extra)


class CoiledRunner(Runner):
bench_store_dir = "."

def get_coiled_run_args(self) -> tuple[str]:
ckwargs = self.get_coiled_kwargs()
return (
"coiled",
"run",
"--interactive",
"--name",
f"icebench-{self.commit}", # cluster name
"--keepalive",
"10m",
f"--workspace={ckwargs['workspace']}", # cloud
f"--vm-type={ckwargs['vm_type']}",
f"--software={ckwargs['software']}",
f"--region={ckwargs['region']}",
)

def get_coiled_kwargs(self):
COILED_SOFTWARE = {
"icechunk-v0.1.0-alpha.1": "icechunk-alpha-release",
Expand Down Expand Up @@ -185,34 +203,26 @@ def initialize(self) -> None:
},
pip=[self.pip_github_url, "coiled", *deps],
)
super().initialize()

def execute(self, cmd, **kwargs) -> None:
ckwargs = self.get_coiled_kwargs()
ls = [f for f in os.listdir(CURRENTDIR) if f not in [".benchmarks", "benchmarks"]]
toignore = " ".join(ls)
subprocess.run([*self.get_coiled_run_args(), cmd], **kwargs)

def sync_benchmarks_folder(self) -> None:
subprocess.run(
[
"coiled",
"run",
"--interactive",
"--name",
f"icebench-{self.commit}", # cluster name
"--sync",
f"--sync-ignore={toignore!r}",
"--keepalive",
"10m",
f"--workspace={ckwargs['workspace']}", # cloud
f"--vm-type={ckwargs['vm_type']}",
f"--software={ckwargs['software']}",
f"--region={ckwargs['region']}",
cmd,
*self.get_coiled_run_args(),
"--file",
"benchmarks/",
"ls -alh ./.benchmarks/",
],
**kwargs,
check=True,
)

def sync_benchmarks_folder(self) -> None:
# uses command-line --sync option
pass
def run(self, *, pytest_extra: str = "") -> None:
super().run(pytest_extra=pytest_extra)
# This prints to screen but we could upload to a bucket in here.
self.execute("sh benchmarks/most_recent.sh")


def init_for_ref(runner: Runner):
Expand All @@ -223,7 +233,7 @@ def init_for_ref(runner: Runner):
parser = argparse.ArgumentParser()
parser.add_argument("refs", help="refs to run benchmarks for", nargs="+")
parser.add_argument("--pytest", help="passed to pytest", default="")
parser.add_argument("--where", help="where to run? [local]", default="local")
parser.add_argument("--where", help="where to run? [local|s3|gcs]", default="local")
parser.add_argument(
"--skip-setup",
help="skip setup step, useful for benchmarks that don't need data",
Expand Down

0 comments on commit 9f64654

Please sign in to comment.