From 1aafa5c767e9edeb223d9cf1875186529a24d3d4 Mon Sep 17 00:00:00 2001 From: Jayesh Krishna Date: Wed, 20 Jan 2016 21:19:59 +0000 Subject: [PATCH] Updating timing events and associated logic in pio Performance experiments have identified the utility of some additional timers in the PIO library. --- pio/box_rearrange.F90.in | 42 ++++++++++++++++++++++++++++++++++++++++ pio/piodarray.F90.in | 14 ++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/pio/box_rearrange.F90.in b/pio/box_rearrange.F90.in index 79ad6303bbea..b7e045a43457 100644 --- a/pio/box_rearrange.F90.in +++ b/pio/box_rearrange.F90.in @@ -49,6 +49,9 @@ module box_rearrange #else use pio_support, only : piodie, Debug, DebugIO, CheckMPIReturn, pio_fc_gather_offset +#endif +#ifdef TIMING + use perf_mod, only : t_startf, t_stopf !_EXTERNAL #endif use alloc_mod, only : alloc_check, dealloc_check use pio_spmd_utils, only : pio_swapm @@ -282,15 +285,27 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, & if (pio_option == COLLECTIVE) then +#ifdef TIMING + call t_startf("a2a_box_rear_comp2io_{TYPE}") +#endif call MPI_ALLTOALLW(src, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) +#ifdef TIMING + call t_stopf("a2a_box_rear_comp2io_{TYPE}") +#endif call CheckMPIReturn('box_rearrange', ierror) else +#ifdef TIMING + call t_startf("swapm_box_rear_comp2io_{TYPE}") +#endif call pio_swapm( nprocs, myrank, & src, ndof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & dest, niodof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) +#ifdef TIMING + call t_stopf("swapm_box_rear_comp2io_{TYPE}") +#endif endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) @@ -307,6 +322,9 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, & endif #endif +#ifdef TIMING + call t_startf("p2p_box_rear_comp2io_{TYPE}") +#endif ! ! send data from comp procs ! @@ -362,6 +380,10 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, & call dealloc_check(sreq, 'send requests') +#ifdef TIMING + call t_stopf("p2p_box_rear_comp2io_{TYPE}") +#endif + #if DEBUG_BARRIER call MPI_BARRIER(IOsystem%union_comm,ierror) call CheckMPIReturn(subName,ierror) @@ -564,15 +586,29 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf, endif if (pio_option == COLLECTIVE) then + +#ifdef TIMING + call t_startf("a2a_box_rear_io2comp_{TYPE}") +#endif call MPI_ALLTOALLW(iobuf, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, ierror ) +#ifdef TIMING + call t_stopf("a2a_box_rear_io2comp_{TYPE}") +#endif call CheckMPIReturn(subName, ierror) else + +#ifdef TIMING + call t_startf("swapm_box_rear_io2comp_{TYPE}") +#endif call pio_swapm( nprocs, myrank, & iobuf, niodof, a2a_sendcounts, a2a_displs, a2a_sendtypes, & compbuf, ndof, a2a_recvcounts, a2a_displs, a2a_recvtypes, & IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq ) +#ifdef TIMING + call t_stopf("swapm_box_rear_io2comp_{TYPE}") +#endif endif call dealloc_check(a2a_sendcounts) call dealloc_check(a2a_displs) @@ -582,6 +618,9 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf, else +#ifdef TIMING + call t_startf("p2p_box_rear_io2comp_{TYPE}") +#endif call alloc_check(rreq, num_iotasks, 'recv requests') ! @@ -641,6 +680,9 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf, call dealloc_check(sreq,'send requests') endif +#ifdef TIMING + call t_stopf("p2p_box_rear_io2comp_{TYPE}") +#endif endif ! POINT_TO_POINT #endif /* not _MPISERIAL */ diff --git a/pio/piodarray.F90.in b/pio/piodarray.F90.in index b9517be8855d..e808455ac0d1 100644 --- a/pio/piodarray.F90.in +++ b/pio/piodarray.F90.in @@ -503,6 +503,7 @@ contains File%iosystem%comp_rank,' UseRearranger: ',UseRearranger,iodesc%glen, iodesc%iomap%start, len #ifdef TIMING call t_startf("pio_rearrange_write") + call t_startf("pre_pio_write_nf") #endif if(UseRearranger) then if (IOproc) then @@ -638,6 +639,7 @@ contains endif #ifdef TIMING + call t_stopf("pre_pio_write_nf") call t_startf("pio_write_nf") #endif ierr = write_nf(File,IOBUF,varDesc,iodesc,start,count, request) @@ -648,11 +650,17 @@ contains call dealloc_check(count) if(IOPROC) then +#ifdef TIMING + call t_startf("post_pio_write_nf") +#endif if(file%iotype==pio_iotype_pnetcdf) then call add_data_to_buffer(File, IOBUF, request) else if(Userearranger) then deallocate(iobuf) end if +#ifdef TIMING + call t_stopf("post_pio_write_nf") +#endif end if ! call MPI_Barrier(File%iosystem%comp_comm,ierr) @@ -1249,7 +1257,13 @@ contains this_buffsize = size(iobuf)*c_sizeof(iobuf(1)) file%buffsize=file%buffsize+this_buffsize total_buffsize = total_buffsize+this_buffsize +#ifdef TIMING + call t_startf("allred_add_data_to_buf") +#endif call MPI_ALLREDUCE(total_buffsize,maxbuffsize,1,MPI_INTEGER,MPI_MAX,file%iosystem%io_comm, mpierr) +#ifdef TIMING + call t_stopf("allred_add_data_to_buf") +#endif if(maxbuffsize > pio_buffer_size_limit) then call darray_write_complete(File)