Skip to content

Commit

Permalink
migration: ubench fixes (#46)
Browse files Browse the repository at this point in the history
* migration: ubench fixes

* migration: more fixes
  • Loading branch information
csegarragonz authored May 23, 2024
1 parent 11e13d9 commit 4f9a03f
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 40 deletions.
4 changes: 2 additions & 2 deletions tasks/migration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ applications to benefit from dynamic changes in the compute environment.
First, provision the cluster:

```bash
(faasm-exp-pase) inv cluster.provision --vm Standard_D8_v5 --nodes 3 --name ${CLUSTER_NAME}
(faasm-exp-base) inv cluster.credentials --name ${CLUSTER_NAME}
inv cluster.provision --vm Standard_D8_v5 --nodes 3 --name ${CLUSTER_NAME}
inv cluster.credentials --name ${CLUSTER_NAME}
```

Second, deploy the cluster
Expand Down
29 changes: 15 additions & 14 deletions tasks/migration/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from glob import glob
from invoke import task
from math import ceil
from matplotlib.pyplot import savefig, subplots
from matplotlib.pyplot import subplots
from os import makedirs
from os.path import basename, join
from random import sample
Expand All @@ -24,6 +24,7 @@
get_lammps_data_file,
get_lammps_migration_params,
)
from tasks.util.plot import save_plot
from time import sleep


Expand Down Expand Up @@ -54,14 +55,14 @@ def calculate_cross_vm_links(part):


@task()
def run(ctx, workload="network", nprocs=None):
def run(ctx, workload="very-network", nprocs=None):
"""
Experiment to measure the benefits of migration in isolation
"""
# Work out the number of processes to run with
num_procs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
num_procs = [2, 3, 4, 5, 6, 7, 8] # , 9, 10, 11, 12, 13, 14, 15, 16]
num_cpus_per_vm = 8
num_vms = 16
num_vms = 8 # 16
if nprocs is not None:
num_procs = [int(nprocs)]

Expand Down Expand Up @@ -160,20 +161,17 @@ def do_write_csv_line(csv_name, part, xvm_links, actual_time):


@task
def plot(ctx):
def plot(ctx, workload="very-network"):
plots_dir = join(PLOTS_ROOT, "migration")
makedirs(plots_dir, exist_ok=True)
out_file = join(
plots_dir, "migration_oracle_{}.pdf".format(LAMMPS_SIM_WORKLOAD)
)

results_dir = join(PROJ_ROOT, "results", "migration")
result_dict = {}

for csv in glob(
join(
results_dir,
"migration_oracle_{}_*.csv".format(LAMMPS_SIM_WORKLOAD),
"migration_oracle_{}_*.csv".format(workload),
)
):
num_procs = csv.split("_")[-1].split(".")[0]
Expand All @@ -193,13 +191,14 @@ def plot(ctx):
float(line.split(",")[-1])
)

print(result_dict)
num_plots = len(result_dict)
num_cols = 4
num_rows = ceil(num_plots / num_cols)
fig, axes = subplots(nrows=num_rows, ncols=num_cols)
fig.suptitle(
"Correlation between execution time (Y) and x-VM links (X)\n(wload: compute)"
"Correlation between execution time (Y) and x-VM links (X)\n(wload: {})".format(
workload
)
)

def do_plot(ax, results, num_procs):
Expand All @@ -213,6 +212,8 @@ def do_plot(ax, results, num_procs):
axes[int(i / 4)][int(i % 4)], result_dict[num_procs], num_procs
)

fig.tight_layout()
savefig(out_file, format="pdf") # , bbox_inches="tight")
print("Plot saved to: {}".format(out_file))
save_plot(
fig,
join(PLOTS_ROOT, "migration"),
"migration_oracle_{}".format(workload),
)
22 changes: 17 additions & 5 deletions tasks/migration/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,16 @@
from pandas import read_csv
from tasks.util.env import PROJ_ROOT
from tasks.util.migration import MIGRATION_PLOTS_DIR
from tasks.util.plot import save_plot
from tasks.util.plot import UBENCH_PLOT_COLORS, save_plot


ALL_WORKLOADS = ["compute", "network"]
ALL_WORKLOADS = [
"all-to-all",
"compute",
"network",
"og-network",
"very-network",
]


def _read_results():
Expand Down Expand Up @@ -43,8 +49,11 @@ def plot(ctx):
"""
migration_results = _read_results()

do_plot("compute", migration_results)
do_plot("network", migration_results)
do_plot("all-to-all", migration_results)
# do_plot("compute", migration_results)
# do_plot("network", migration_results)
do_plot("very-network", migration_results)
# do_plot("og-network", migration_results)


def do_plot(workload, migration_results):
Expand All @@ -63,11 +72,14 @@ def do_plot(workload, migration_results):
)
)

color_idx = list(migration_results.keys()).index(workload)

ax.bar(
xticks,
ys,
width,
label=workload,
color=UBENCH_PLOT_COLORS[color_idx],
edgecolor="black",
)

Expand All @@ -86,7 +98,7 @@ def do_plot(workload, migration_results):
1.5,
"{:.1f}".format(ys[0]),
rotation="vertical",
fontsize=6,
fontsize=8,
bbox={
"boxstyle": "Square, pad=0.2",
"edgecolor": "black",
Expand Down
52 changes: 33 additions & 19 deletions tasks/migration/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from os import makedirs
from os.path import basename, join
from tasks.migration.util import generate_host_list
from tasks.util.env import RESULTS_DIR
from tasks.util.env import (
MPI_MIGRATE_FAASM_FUNC,
MPI_MIGRATE_FAASM_USER,
RESULTS_DIR,
)
from tasks.util.faasm import (
get_faasm_exec_time_from_json,
post_async_msg_and_get_result_json,
Expand Down Expand Up @@ -55,16 +59,22 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
check_array = [int(check_in)]

for workload in w:
if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
if (
workload != "all-to-all"
and workload not in LAMMPS_SIM_WORKLOAD_CONFIGS
):
print(
"Unrecognised workload config ({}) must be one in: {}".format(
workload, LAMMPS_SIM_WORKLOAD.keys()
)
)
workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
data_file = basename(
get_lammps_data_file(workload_config["data_file"])["data"][0]
)
raise RuntimeError("Unrecognised workload: {}".format(workload))

if workload != "all-to-all":
workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
data_file = basename(
get_lammps_data_file(workload_config["data_file"])["data"][0]
)

csv_name = "migration_{}.csv".format(workload)
_init_csv_file(csv_name)
Expand All @@ -75,37 +85,41 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):

# Print progress
print(
"Running migration micro-benchmark (wload:"
"Running migration micro-benchmark (wload: "
+ "{} - check-at: {} - repeat: {}/{})".format(
workload, check, run_num + 1, repeats
)
)

"""
TODO: do we want to keep the all-to-all baseline?
if workload == "all-to-all":
num_loops = 100000
user = MPI_MIGRATE_FAASM_USER
func = MPI_MIGRATE_FAASM_FUNC
cmdline = "{} {}".format(
check if check != 0 else 5, num_loops
)
"""
input_data = None
else:
user = LAMMPS_FAASM_USER
func = LAMMPS_FAASM_MIGRATION_NET_FUNC
cmdline = "-in faasm://lammps-data/{}".format(data_file)
input_data = get_lammps_migration_params(
check_every=check if check != 0 else 5,
num_loops=5,
num_net_loops=workload_config["num_net_loops"],
chunk_size=workload_config["chunk_size"],
)

# Run LAMMPS
cmdline = "-in faasm://lammps-data/{}".format(data_file)
msg = {
"user": LAMMPS_FAASM_USER,
"function": LAMMPS_FAASM_MIGRATION_NET_FUNC,
"user": user,
"function": func,
"cmdline": cmdline,
"mpi_world_size": int(num_cores_per_vm),
"input_data": get_lammps_migration_params(
num_loops=5,
num_net_loops=workload_config["num_net_loops"],
chunk_size=workload_config["chunk_size"],
),
}

if input_data is not None:
msg["input_data"] = input_data

if check == 0:
# Setting a check fraction of 0 means we don't
# under-schedule. We use it as a baseline
Expand Down

0 comments on commit 4f9a03f

Please sign in to comment.