diff --git a/mergetb/average_churn_data.py b/mergetb/average_churn_data.py index 8623cba..b3ca522 100644 --- a/mergetb/average_churn_data.py +++ b/mergetb/average_churn_data.py @@ -5,102 +5,31 @@ from average_data import metrics_to_extract, get_data, calculate_average_data import extract_raw_data -from extract_raw_churn_data import metrics_interval if __name__ == "__main__": - if len(sys.argv) < 3: + if len(sys.argv) < 1: print("Usage: python average_data.py ") sys.exit(1) directory = sys.argv[1] duration = float(sys.argv[2]) data = get_data(directory) - + average_data = {} - print(f"variables: {data.keys()}") - - for variable_name, variable_data in data.items(): - # for variable_name, variable_data in ["duplicates"]: - # for variable_name, variable_data in ["duplicates"]: - # variable_name = "retry" - # variable_data = data[variable_name] - - average_data[variable_name] = {} - print(f"runs: {variable_data.keys()}") - for run, runs in variable_data.items(): - # run = "4" - average_data[variable_name][run] = {} - print("Getting data for: ", variable_name, run) - - for time_index, time_data in variable_data[run].items(): - # for time_index, time_data in runs.items(): - if time_index != "0": - prev_step = variable_data[run][str(int(time_index) - metrics_interval)] - # print(f"time_index: {time_index}, prev_step: {prev_step}") - # print(f"prev_step data: {prev_step['loopix_incoming_messages'], prev_step['loopix_number_of_proxy_requests']}") - # print(f"time_data data: {time_data['loopix_incoming_messages'], time_data['loopix_number_of_proxy_requests']}") - - for metric in extract_raw_data.metrics_to_extract: - if metric in ["loopix_bandwidth_bytes", "loopix_incoming_messages", "loopix_number_of_proxy_requests"]: - # print(f"{time_index}--------------------------------") - prev_step_data = np.array(prev_step[metric]) - time_data_data = np.array(time_data[metric]) - # print(f"prev_step_data: {prev_step_data}") - # print(f"time_data_data: {time_data_data}") - # print(f"data[{metric}]:") - if len(prev_step_data) < len(time_data_data): - data[metric] = time_data_data[:len(prev_step_data)] - prev_step_data - elif len(prev_step_data) > len(time_data_data): - data[metric] = time_data_data - prev_step_data[:len(time_data_data)] - else: - data[metric] = time_data_data - prev_step_data - # print(f"{data[metric]}") - - elif metric == "loopix_start_time_seconds": - data[metric] = time_data[metric] - else: - # print("--------------------------------") - prev_step_sum = np.array(prev_step[metric]["sum"]) - time_data_sum = np.array(time_data[metric]["sum"]) - # print(f"prev_step_sum: {prev_step_sum}") - # print(f"time_data_sum: {time_data_sum}") - - if len(prev_step_sum) < len(time_data_sum): - data[metric]["sum"] = time_data_sum[:len(prev_step_sum)] - prev_step_sum - elif len(prev_step_sum) > len(time_data_sum): - data[metric]["sum"] = time_data_sum - prev_step_sum[:len(time_data_sum)] - else: - data[metric]["sum"] = time_data_sum - prev_step_sum - - prev_step_count = np.array(prev_step[metric]["count"]) - time_data_count = np.array(time_data[metric]["count"]) - # print(f"prev_step_count: {prev_step_count}") - # print(f"time_data_count: {time_data_count}") - - if len(prev_step_count) < len(time_data_count): - data[metric]["count"] = time_data_count[:len(prev_step_count)] - prev_step_count - elif len(prev_step_count) > len(time_data_count): - data[metric]["count"] = time_data_count - prev_step_count[:len(time_data_count)] - else: - data[metric]["count"] = time_data_count - prev_step_count - - # print(f"data[{metric}]: {data[metric]}") - - - else: - data = time_data - print(f"time_index: {time_index}, data: {data['loopix_incoming_messages'], data['loopix_number_of_proxy_requests']}") + for variable, runs in data.items(): + average_data[variable] = {} + for try_index, kill_runs in runs.items(): + average_data[variable][try_index] = {} + for kill, runs in kill_runs.items(): + print(runs.keys()) + print(f"try_index: {try_index}") + print(f"run: {kill}") - - average_data[variable_name][run][time_index] = {} - print(f"data: {data}") - # data = time_data - average_data[variable_name][run][time_index] = calculate_average_data(data, metrics_interval) + average_data[variable][try_index][kill] = calculate_average_data(runs, duration) with open(os.path.join(directory, 'average_data.json'), 'w') as f: json.dump(average_data, f, indent=2) - diff --git a/mergetb/average_data.py b/mergetb/average_data.py index f906204..6091b6f 100644 --- a/mergetb/average_data.py +++ b/mergetb/average_data.py @@ -50,7 +50,7 @@ def calculate_average_data(data, duration): return results if __name__ == "__main__": - if len(sys.argv) < 1: + if len(sys.argv) < 3: print("Usage: python average_data.py ") sys.exit(1) diff --git a/mergetb/extract_raw_churn_data.py b/mergetb/extract_raw_churn_data.py index 9c3820e..18b074e 100644 --- a/mergetb/extract_raw_churn_data.py +++ b/mergetb/extract_raw_churn_data.py @@ -10,7 +10,7 @@ def get_metrics_data(data_dir, path_length, n_clients, results, variable, index) create_results_dict(results, metrics_to_extract) directory = os.path.join(data_dir, variable) - metrics_file = os.path.join(directory, f"metrics_4.txt") + metrics_file = os.path.join(directory, f"metrics.txt") print(f"Getting metrics data from {metrics_file}") if os.path.exists(metrics_file): @@ -96,9 +96,7 @@ def main(): node_dir = os.path.join(run_dir, f"{i}_{j}") print(node_dir) - - metrics_file_name = f"metrics_{4}.txt" - + nodes = os.listdir(node_dir) for enum_node, node in enumerate(nodes): diff --git a/mergetb/plot_data_churn.py b/mergetb/plot_data_churn.py index 7dc691a..79914f0 100644 --- a/mergetb/plot_data_churn.py +++ b/mergetb/plot_data_churn.py @@ -3,8 +3,12 @@ import json import sys import os -from plot_data import get_data, plot_latency_components, plot_reliability, plot_incoming_messages, plot_latency, plot_bandwidth, plot_latency_and_bandwidth, plot_reliability_latency +from plot_data import get_data, save_plot_directory, plot_latency_components, plot_reliability, plot_incoming_messages, plot_latency, plot_bandwidth, plot_latency_and_bandwidth, plot_reliability_latency +from matplotlib.ticker import FuncFormatter +def log_format(y, _): + return f"{y:.0f}" + x_axis_name = {"lambda_loop": "Loop and Drop Messages per Second", "lambda_payload": "Payload Messages per Second from the Client"} if __name__ == "__main__": @@ -18,16 +22,53 @@ duration = int(sys.argv[3]) data = get_data(directory) - for variable, run in data.items(): - print(f"Plotting {variable}") - for run_index, run_data in run.items(): - plot_dir = os.path.join(directory, "plots", variable, run_index) - print(f"Plotting {run_index} with plot_dir {plot_dir}") - plot_latency_components(plot_dir, path_length, variable, run_data) - plot_reliability(plot_dir, variable, run_data) - plot_incoming_messages(plot_dir, variable, run_data) - plot_latency(plot_dir, variable, run_data) - plot_bandwidth(plot_dir, duration, variable, run_data) - plot_latency_and_bandwidth(plot_dir, variable, run_data) - # plot_reliability_incoming_latency(directory, variable, run) - plot_reliability_latency(plot_dir, variable, run_data) + retry_levels = data["retry"].keys() + points_in_time = list(data["retry"]["0"].keys()) + points_in_time_labels = [ + "All Nodes Live", "1 Node Down", "2 Nodes Down", "3 Nodes Down" + ] + + reliability_data = { + retry: [ + data["retry"][retry][str(pt)]["loopix_reliability"] + for pt in points_in_time + ] + for retry in retry_levels + } + + plt.figure(figsize=(12, 8)) + + bar_width = 0.2 + x_indexes = range(len(points_in_time_labels)) + offset = 0 + + + plt.yscale('log') + plt.gca().yaxis.set_major_formatter(FuncFormatter(log_format)) + + for retry, reliabilities in reliability_data.items(): + reliability = [r*100 for r in reliabilities] + plt.bar( + [x + offset for x in x_indexes], + reliability, + width=bar_width, + label=f"{retry} Retry", + ) + offset += bar_width + + # Customizing the plot + plt.title("Reliability vs Node Status", fontsize=16) + plt.xlabel("Node Status", fontsize=14) + plt.ylabel("Reliability", fontsize=14) + plt.xticks([x + bar_width * (len(retry_levels) - 1) / 2 for x in x_indexes], points_in_time_labels, fontsize=12) + plt.legend(fontsize=12) + plt.grid(axis='y', linestyle='--', alpha=0.7) + + # Save and close the plot + save_plot_directory(directory) + plt.savefig(os.path.join(directory, "reliability_churn_bar_chart.png")) + plt.clf() + plt.close() + + + diff --git a/mergetb/run_churn_data_processing.bash b/mergetb/run_churn_data_processing.bash index 68abac3..71753b4 100644 --- a/mergetb/run_churn_data_processing.bash +++ b/mergetb/run_churn_data_processing.bash @@ -16,6 +16,6 @@ source ../venv/bin/activate python3 extract_raw_churn_data.py "$DATA_DIR" "$PATH_LENGTH" "$N_CLIENTS" -python3 average_churn_data.py "$DATA_DIR" "$DURATION" +# python3 average_churn_data.py "$DATA_DIR" "$DURATION" -python3 plot_data_churn.py "$DATA_DIR" "$PATH_LENGTH" "$DURATION" \ No newline at end of file +# python3 plot_data_churn.py "$DATA_DIR" "$PATH_LENGTH" "$DURATION" \ No newline at end of file