diff --git a/apps/bench1.cpp b/apps/bench1.cpp index 2a9b45a..0f42858 100644 --- a/apps/bench1.cpp +++ b/apps/bench1.cpp @@ -197,7 +197,7 @@ int main(int argc, char *argv[]) { if (myrank == 0) { double send_BW = total_size_bytes / timer[0] / 1024 / 1024; -#if false +#if true printf("%i, %i, %i, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, conf.num_threads, @@ -208,6 +208,7 @@ int main(int argc, char *argv[]) { send_BW); #endif } else { +#if false double recv_BW = total_size_bytes / timer[1] / 1024 / 1024; printf("%i, %i, %i, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, @@ -215,6 +216,7 @@ int main(int argc, char *argv[]) { conf.num_partitions, ((double)patition_size_bytes) / 1024, ((double)total_size_bytes) / 1024, timer[1] /*rank1*/, recv_BW); +#endif } MPI_Barrier(MPI_COMM_WORLD); diff --git a/apps/bench2.cpp b/apps/bench2.cpp index aa931e3..62065b7 100644 --- a/apps/bench2.cpp +++ b/apps/bench2.cpp @@ -11,17 +11,21 @@ //@HEADER */ +/* + Similar to bench1 but delays n-1 tasks by an equal ammount of time +*/ + #include "mpi.h" #include #include #include #include -#define DEFAULT_ITERS 5 +#define DEFAULT_ITERS 10 #define DATA_TYPE MPI_DOUBLE #define USE_PARRIVED -#define ALL_TASKS_SLEEP +//#define ALL_TASKS_SLEEP_SAME_AMMOUNT #define DEFAULT_RECV_SEND_PARTITION_RATIO 1 double timer[3] = {0.0, 0.0, 0.0}; @@ -64,15 +68,15 @@ void send_task(partix_task_args_t *args) { send_task_args_t *task_args = (send_task_args_t *)args->user_task_args; // First partition completion is delayed by sleep_time_ms -#ifdef ALL_TASKS_SLEEP - size_t sleep_time_ms = global_conf->overlap_duration; - usleep(sleep_time_ms * 1000); +size_t sleep_time_ms = 0; +#ifdef ALL_TASKS_SLEEP_SAME_AMMOUNT + sleep_time_ms = global_conf->overlap_duration; #else if (task_args->partition_id == 0) { - size_t sleep_time_ms = global_conf->overlap_duration; - usleep(sleep_time_ms * 1000); + sleep_time_ms = global_conf->overlap_duration; } #endif + usleep(sleep_time_ms * 1000); MPI_Pready(task_args->partition_id, *task_args->request); } @@ -202,6 +206,11 @@ int main(int argc, char *argv[]) { } } + // Measure perceived BW, that is communication as it were in the critical + // path, by subtracting overlap + timer[0] -= iterations * (float)global_conf->overlap_duration / 1000; + timer[1] -= iterations * (float)global_conf->overlap_duration / 1000; + timer[0] /= iterations; timer[1] /= iterations; @@ -211,22 +220,22 @@ int main(int argc, char *argv[]) { if (myrank == 0) { double send_BW = total_size_bytes / timer[0] / 1024 / 1024; #if true - printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n", + printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, conf.num_threads, conf.num_partitions, - (float)global_conf->overlap_duration, + (float)global_conf->overlap_duration / 1000.0, ((double)patition_size_bytes) / 1024, ((double)total_size_bytes) / 1024, timer[0] /*rank0*/, send_BW); #endif } else { #if false double recv_BW = total_size_bytes / timer[1] / 1024 / 1024; - printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n", + printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, conf.num_threads, conf.num_partitions, - (float)global_conf->overlap_duration, + (float)global_conf->overlap_duration / 1000.0, ((double)patition_size_bytes) / 1024, ((double)total_size_bytes) / 1024, timer[1] /*rank1*/, diff --git a/apps/bench3.cpp b/apps/bench3.cpp index 1927337..ffa1e9b 100644 --- a/apps/bench3.cpp +++ b/apps/bench3.cpp @@ -11,6 +11,10 @@ //@HEADER */ +/* + Similar to bench2 but delays n-1 tasks by a random ammount of time +*/ + #include "mpi.h" #include #include @@ -207,6 +211,11 @@ int main(int argc, char *argv[]) { } } + // Measure perceived BW, that is communication as it were in the critical + // path, by subtracting overlap + timer[0] -= iterations * (float)global_conf->overlap_duration / 1000; + timer[1] -= iterations * (float)global_conf->overlap_duration / 1000; + timer[0] /= iterations; timer[1] /= iterations; @@ -216,21 +225,21 @@ int main(int argc, char *argv[]) { if (myrank == 0) { double send_BW = total_size_bytes / timer[0] / 1024 / 1024; #if true - printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, + printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, conf.num_threads, conf.num_partitions, - (float)global_conf->overlap_duration, + (float)global_conf->overlap_duration / 1000.0, ((double)patition_size_bytes) / 1024, ((double)total_size_bytes) / 1024, timer[0] /*rank0*/, send_BW); #endif } else { #if false double recv_BW = total_size_bytes / timer[1] / 1024 / 1024; - printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n", + printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks, conf.num_threads, conf.num_partitions, - (float)global_conf->overlap_duration, + (float)global_conf->overlap_duration / 1000.0, ((double)patition_size_bytes) / 1024, ((double)total_size_bytes) / 1024, timer[1] /*rank1*/, diff --git a/scripts/run_over_overlap.sh b/scripts/run_over_overlap.sh index e3bf232..aa056ec 100644 --- a/scripts/run_over_overlap.sh +++ b/scripts/run_over_overlap.sh @@ -23,7 +23,7 @@ export QTHREAD_STACK_SIZE=8192 export OMP_PROC_BIND=true export OMP_PLACES=cores -overlap_default=0 #msec +overlap_default=1 #msec FLAGS="--bind-to core --rank-by core" PRELOAD="-x LD_PRELOAD=/home/projects/x86-64/gcc/10.2.0/lib64/libstdc++.so.6" @@ -46,7 +46,7 @@ for threads in {1..9..1}; do $FLAGS $PRELOAD -x OMP_PLACES=cores -x OMP_NUM_THREADS=$num_threads \ -x QTHREAD_STACK_SIZE=8196 -x OMP_PROC_BIND=true \ $binary $num_tasks $num_threads $num_part $num_partlen $overlp - if [[ $overlp -eq 0 ]] + if [[ $overlp -eq 1 ]] then overlp=10 fi diff --git a/src/partix.h b/src/partix.h index 9614ed3..5192808 100644 --- a/src/partix.h +++ b/src/partix.h @@ -35,7 +35,7 @@ #define OVERLAP_IN_MSEC_DEFAULT 100 /* Used add task duration divergence as a % of OVERLAP_IN_MSEC_DEFAULT */ -#define NOISE_IN_PERCENTAGE_OF_OVERLAP 30 +#define NOISE_IN_PERCENTAGE_OF_OVERLAP 0 #include