Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for memory-mapped files #604

Merged
merged 18 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions bench/aggregate_multiple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from pathlib import Path
import re
import os
import subprocess
from typing import Union

import numpy as np
import argparse


def run_command(command: str, return_output: bool = False) -> Union[tuple[list[float], str], tuple[list[float]]]:
bench_folder = Path(__file__).parent.parent / "build" / "bench"
if os.name == "nt":
bench_folder /= "Release"
res = subprocess.run(f"{bench_folder}/{command}", shell=True, capture_output=True)
assert res.returncode == 0, f"\nstdout: {res.stdout.decode('utf-8')}\nstderr: {res.stderr.decode('utf-8')}"

out = res.stdout.decode("utf-8")
numbers = []
for number in re.findall(r"-?\d+(?:\.\d+)?", out):
numbers.append(float(number))
assert len(numbers) > 0, f"Could not find any numbers in the output:\n{out}"

if return_output:
return numbers, out
else:
return numbers


class ReplaceNumbers:
def __init__(self, numbers: np.ndarray):
self.numbers = numbers
self.index = 0

def __call__(self, match: re.Match) -> str:
assert match.group() == "NUMBER"
number = self.numbers[self.index]
self.index += 1

if number.is_integer():
return str(int(number))
else:
return f"{number:.3f}"


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Repeat multiple runs of a benchmark and aggregate all numbers in the output (the outputs from all runs must "
"have the same format)."
),
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--command",
required=True,
type=str,
help="The command to execute (e.g. sframe_bench 1000 insert 1000 io_mmap).",
)
parser.add_argument(
"--warmups",
default=3,
required=False,
type=int,
help="Number of warmup runs.",
)
parser.add_argument(
"--runs",
default=30,
required=False,
type=int,
help="Number of runs to perform and aggregate the output from.",
)
args = parser.parse_args()

for i in range(args.warmups):
print(f"Warmup {i+1}/{args.warmups}", end="\r")
run_command(args.command)
print()

# Collect numbers from outputs of multiple runs
example_output = None
numbers = []
for i in range(args.runs):
print(f"Run {i+1}/{args.runs}", end="\r")

if example_output is None:
values, text = run_command(args.command, return_output=True)
example_output = text
numbers.append(values)
else:
numbers.append(run_command(args.command))
print()

numbers = np.asarray(numbers)
numbers = np.min(numbers, axis=0)

assert example_output is not None, "No output was collected."
example_output = re.sub(r"-?\d+(?:\.\d+)?", "NUMBER", example_output)
aggregated_output = re.sub("NUMBER", ReplaceNumbers(numbers), example_output)
print(aggregated_output)
28 changes: 25 additions & 3 deletions bench/sframe_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

int nchunks = NCHUNKS;
int iterations = 5;
int io_type = BLOSC2_IO_FILESYSTEM;



Expand Down Expand Up @@ -249,7 +250,18 @@ void test_create_sframe_frame(char* operation) {
blosc2_remove_urlpath(storage.urlpath);
schunk_sframe = blosc2_schunk_new(&storage);

blosc2_stdio_mmap mmap_file = BLOSC2_STDIO_MMAP_DEFAULTS;
mmap_file.mode = "w+";
blosc2_io io_mmap = {.id = BLOSC2_IO_FILESYSTEM_MMAP, .name = "filesystem_mmap", .params = &mmap_file};

blosc2_storage storage2 = {.contiguous=true, .urlpath="test_cframe.b2frame", .cparams=&cparams, .dparams=&dparams};
if (io_type == BLOSC2_IO_FILESYSTEM) {
storage2.io = (blosc2_io*)&BLOSC2_IO_DEFAULTS;
}
else if (io_type == BLOSC2_IO_FILESYSTEM_MMAP) {
storage2.io = &io_mmap;
}

blosc2_remove_urlpath(storage2.urlpath);
schunk_cframe = blosc2_schunk_new(&storage2);

Expand Down Expand Up @@ -350,8 +362,8 @@ if (operation != NULL) {
int main(int argc, char* argv[]) {
char* operation = NULL;

if (argc >= 5) {
printf("Usage: ./sframe_bench [nchunks] [insert | update | reorder] [num operations]\n");
if (argc >= 6) {
printf("Usage: ./sframe_bench [nchunks] [insert | update | reorder] [num operations] [io_file | io_mmap]\n");
exit(1);
}
else if (argc >= 2) {
Expand All @@ -360,9 +372,19 @@ int main(int argc, char* argv[]) {
if (argc >= 3) {
operation = argv[2];
}
if (argc == 4) {
if (argc >= 4) {
iterations = (int)strtol(argv[3], NULL, 10);
}
if (argc == 5) {
if (strcmp(argv[4], "io_file") == 0) {
io_type = BLOSC2_IO_FILESYSTEM;
} else if (strcmp(argv[4], "io_mmap") == 0) {
io_type = BLOSC2_IO_FILESYSTEM_MMAP;
} else {
printf("Invalid io type. Use io_file or io_mmap\n");
exit(1);
}
}

test_create_sframe_frame(operation);

Expand Down
Loading