migration: ubench fixes (#46)

* migration: ubench fixes * migration: more fixes
faasm · May 23, 2024 · 4f9a03f · 4f9a03f
1 parent 11e13d9
commit 4f9a03f
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 40 deletions.
diff --git a/tasks/migration/README.md b/tasks/migration/README.md
@@ -6,8 +6,8 @@ applications to benefit from dynamic changes in the compute environment.
 First, provision the cluster:
 
 ```bash
-(faasm-exp-pase) inv cluster.provision --vm Standard_D8_v5 --nodes 3 --name ${CLUSTER_NAME}
-(faasm-exp-base) inv cluster.credentials --name ${CLUSTER_NAME}
+inv cluster.provision --vm Standard_D8_v5 --nodes 3 --name ${CLUSTER_NAME}
+inv cluster.credentials --name ${CLUSTER_NAME}
 ```
 
 Second, deploy the cluster

diff --git a/tasks/migration/oracle.py b/tasks/migration/oracle.py
@@ -2,7 +2,7 @@
 from glob import glob
 from invoke import task
 from math import ceil
-from matplotlib.pyplot import savefig, subplots
+from matplotlib.pyplot import subplots
 from os import makedirs
 from os.path import basename, join
 from random import sample
@@ -24,6 +24,7 @@
     get_lammps_data_file,
     get_lammps_migration_params,
 )
+from tasks.util.plot import save_plot
 from time import sleep
 
 
@@ -54,14 +55,14 @@ def calculate_cross_vm_links(part):
 
 
 @task()
-def run(ctx, workload="network", nprocs=None):
+def run(ctx, workload="very-network", nprocs=None):
     """
     Experiment to measure the benefits of migration in isolation
     """
     # Work out the number of processes to run with
-    num_procs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+    num_procs = [2, 3, 4, 5, 6, 7, 8]  # , 9, 10, 11, 12, 13, 14, 15, 16]
     num_cpus_per_vm = 8
-    num_vms = 16
+    num_vms = 8  # 16
     if nprocs is not None:
         num_procs = [int(nprocs)]
 
@@ -160,20 +161,17 @@ def do_write_csv_line(csv_name, part, xvm_links, actual_time):
 
 
 @task
-def plot(ctx):
+def plot(ctx, workload="very-network"):
     plots_dir = join(PLOTS_ROOT, "migration")
     makedirs(plots_dir, exist_ok=True)
-    out_file = join(
-        plots_dir, "migration_oracle_{}.pdf".format(LAMMPS_SIM_WORKLOAD)
-    )
 
     results_dir = join(PROJ_ROOT, "results", "migration")
     result_dict = {}
 
     for csv in glob(
         join(
             results_dir,
-            "migration_oracle_{}_*.csv".format(LAMMPS_SIM_WORKLOAD),
+            "migration_oracle_{}_*.csv".format(workload),
         )
     ):
         num_procs = csv.split("_")[-1].split(".")[0]
@@ -193,13 +191,14 @@ def plot(ctx):
                     float(line.split(",")[-1])
                 )
 
-    print(result_dict)
     num_plots = len(result_dict)
     num_cols = 4
     num_rows = ceil(num_plots / num_cols)
     fig, axes = subplots(nrows=num_rows, ncols=num_cols)
     fig.suptitle(
-        "Correlation between execution time (Y) and x-VM links (X)\n(wload: compute)"
+        "Correlation between execution time (Y) and x-VM links (X)\n(wload: {})".format(
+            workload
+        )
     )
 
     def do_plot(ax, results, num_procs):
@@ -213,6 +212,8 @@ def do_plot(ax, results, num_procs):
             axes[int(i / 4)][int(i % 4)], result_dict[num_procs], num_procs
         )
 
-    fig.tight_layout()
-    savefig(out_file, format="pdf")  # , bbox_inches="tight")
-    print("Plot saved to: {}".format(out_file))
+    save_plot(
+        fig,
+        join(PLOTS_ROOT, "migration"),
+        "migration_oracle_{}".format(workload),
+    )
diff --git a/tasks/migration/plot.py b/tasks/migration/plot.py
@@ -6,10 +6,16 @@
 from pandas import read_csv
 from tasks.util.env import PROJ_ROOT
 from tasks.util.migration import MIGRATION_PLOTS_DIR
-from tasks.util.plot import save_plot
+from tasks.util.plot import UBENCH_PLOT_COLORS, save_plot
 
 
-ALL_WORKLOADS = ["compute", "network"]
+ALL_WORKLOADS = [
+    "all-to-all",
+    "compute",
+    "network",
+    "og-network",
+    "very-network",
+]
 
 
 def _read_results():
@@ -43,8 +49,11 @@ def plot(ctx):
     """
     migration_results = _read_results()
 
-    do_plot("compute", migration_results)
-    do_plot("network", migration_results)
+    do_plot("all-to-all", migration_results)
+    # do_plot("compute", migration_results)
+    # do_plot("network", migration_results)
+    do_plot("very-network", migration_results)
+    # do_plot("og-network", migration_results)
 
 
 def do_plot(workload, migration_results):
@@ -63,11 +72,14 @@ def do_plot(workload, migration_results):
             )
         )
 
+    color_idx = list(migration_results.keys()).index(workload)
+
     ax.bar(
         xticks,
         ys,
         width,
         label=workload,
+        color=UBENCH_PLOT_COLORS[color_idx],
         edgecolor="black",
     )
 
@@ -86,7 +98,7 @@ def do_plot(workload, migration_results):
             1.5,
             "{:.1f}".format(ys[0]),
             rotation="vertical",
-            fontsize=6,
+            fontsize=8,
             bbox={
                 "boxstyle": "Square, pad=0.2",
                 "edgecolor": "black",

diff --git a/tasks/migration/run.py b/tasks/migration/run.py
@@ -4,7 +4,11 @@
 from os import makedirs
 from os.path import basename, join
 from tasks.migration.util import generate_host_list
-from tasks.util.env import RESULTS_DIR
+from tasks.util.env import (
+    MPI_MIGRATE_FAASM_FUNC,
+    MPI_MIGRATE_FAASM_USER,
+    RESULTS_DIR,
+)
 from tasks.util.faasm import (
     get_faasm_exec_time_from_json,
     post_async_msg_and_get_result_json,
@@ -55,16 +59,22 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
         check_array = [int(check_in)]
 
     for workload in w:
-        if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
+        if (
+            workload != "all-to-all"
+            and workload not in LAMMPS_SIM_WORKLOAD_CONFIGS
+        ):
             print(
                 "Unrecognised workload config ({}) must be one in: {}".format(
                     workload, LAMMPS_SIM_WORKLOAD.keys()
                 )
             )
-        workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
-        data_file = basename(
-            get_lammps_data_file(workload_config["data_file"])["data"][0]
-        )
+            raise RuntimeError("Unrecognised workload: {}".format(workload))
+
+        if workload != "all-to-all":
+            workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
+            data_file = basename(
+                get_lammps_data_file(workload_config["data_file"])["data"][0]
+            )
 
         csv_name = "migration_{}.csv".format(workload)
         _init_csv_file(csv_name)
@@ -75,37 +85,41 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
 
                 # Print progress
                 print(
-                    "Running migration micro-benchmark (wload:"
+                    "Running migration micro-benchmark (wload: "
                     + "{} - check-at: {} - repeat: {}/{})".format(
                         workload, check, run_num + 1, repeats
                     )
                 )
 
-                """
-                TODO: do we want to keep the all-to-all baseline?
                 if workload == "all-to-all":
                     num_loops = 100000
                     user = MPI_MIGRATE_FAASM_USER
                     func = MPI_MIGRATE_FAASM_FUNC
                     cmdline = "{} {}".format(
                         check if check != 0 else 5, num_loops
                     )
-                """
+                    input_data = None
+                else:
+                    user = LAMMPS_FAASM_USER
+                    func = LAMMPS_FAASM_MIGRATION_NET_FUNC
+                    cmdline = "-in faasm://lammps-data/{}".format(data_file)
+                    input_data = get_lammps_migration_params(
+                        check_every=check if check != 0 else 5,
+                        num_loops=5,
+                        num_net_loops=workload_config["num_net_loops"],
+                        chunk_size=workload_config["chunk_size"],
+                    )
 
-                # Run LAMMPS
-                cmdline = "-in faasm://lammps-data/{}".format(data_file)
                 msg = {
-                    "user": LAMMPS_FAASM_USER,
-                    "function": LAMMPS_FAASM_MIGRATION_NET_FUNC,
+                    "user": user,
+                    "function": func,
                     "cmdline": cmdline,
                     "mpi_world_size": int(num_cores_per_vm),
-                    "input_data": get_lammps_migration_params(
-                        num_loops=5,
-                        num_net_loops=workload_config["num_net_loops"],
-                        chunk_size=workload_config["chunk_size"],
-                    ),
                 }
 
+                if input_data is not None:
+                    msg["input_data"] = input_data
+
                 if check == 0:
                     # Setting a check fraction of 0 means we don't
                     # under-schedule. We use it as a baseline