Skip to content

Commit

Permalink
Merge branch 'worleyph/utils/timing_lib_and_instr_update' (PR #92)
Browse files Browse the repository at this point in the history
a) enabling perf_mod to enable/disable GPTL timer calls

The perf_mod wrapper on the GPTL timing library control the
enabling/disabling of t_startf/t_stopf calls in a number of ways.
However, this control does not currently extend to direct calls of
the underlaying GPTLstart/stop routines. This modification corrects
this, allowing, for example, GPTL-instrumented versions of Trilinos
to have timers enabled/disabled in the same way as in the rest of ACME.

b) updating timing events and associated logic in drv, pio, and cam

Recent updates changed the logical structure of the driver routines,
necessitating corresponding changes in the performance timing logic.
Performance experiments have also identified the utility of some
additional timers in the PIO library. Finally, Filled another hole
in the timer coverage of potentially significant performance events
(prim_printstate).

Only changes performance timing data output

[BFB]

PG-59, PG-57
  • Loading branch information
jayeshkrishna committed Feb 17, 2015
2 parents cd337e2 + 339d87d commit c5fa8d4
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 31 deletions.
11 changes: 10 additions & 1 deletion models/atm/cam/src/dynamics/se/share/prim_state_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ module prim_state_mod
! ------------------------------
use reduction_mod, only : parallelmax,parallelmin
! ------------------------------
use perf_mod, only: t_startf, t_stopf
! ------------------------------
implicit none
private
character(len=*), private, parameter :: massfname = "mass.out"
Expand Down Expand Up @@ -145,6 +147,8 @@ subroutine prim_printstate(elem, tl,hybrid,hvcoord,nets,nete, fvm)
integer :: n0, nm1, pnm1, np1
integer :: npts,n,q

call t_startf('prim_printstate')

if (hybrid%masterthread) then
write(iulog,*) "nstep=",tl%nstep," time=",Time_at(tl%nstep)/(24*3600)," [day]"
end if
Expand Down Expand Up @@ -428,7 +432,10 @@ subroutine prim_printstate(elem, tl,hybrid,hvcoord,nets,nete, fvm)
endif


if (tl%nstep < tl%nstep0) return
if (tl%nstep < tl%nstep0) then
call t_stopf('prim_printstate')
return
endif

! ====================================================================
!
Expand Down Expand Up @@ -682,6 +689,8 @@ subroutine prim_printstate(elem, tl,hybrid,hvcoord,nets,nete, fvm)
time0=time1
endif

call t_stopf('prim_printstate')

end subroutine prim_printstate


Expand Down
20 changes: 9 additions & 11 deletions models/drv/driver/ccsm_comp_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,12 @@ subroutine ccsm_pre_init1()
iam=comp_comm_iam(it))
if (iamroot_CPLID) output_perf = .true.

! Timer initialization (has to be after mpi init)
call t_initf(NLFileName, LogPrint=.false., mpicom=mpicom_GLOID, &
MasterTask=iamroot_GLOID)
call t_startf('DRIVER_INIT')
call t_startf('ccsm_pre_init1')

if (iamin_CPLID) complist = trim(complist)//' cpl'

comp_id(it) = CPLID
Expand Down Expand Up @@ -725,6 +731,9 @@ subroutine ccsm_pre_init1()
write(logunit,'(2A)') subname,' ESMF_INTERFACE is set'
#endif
endif

call t_stopf('ccsm_pre_init1')
call t_stopf('DRIVER_INIT')
!
! When using io servers (pio_async_interface=.true.) the server tasks do not return from
! shr_pio_init2
Expand All @@ -747,19 +756,10 @@ subroutine ccsm_pre_init2()

if (iamroot_CPLID) call seq_ccsm_printlogheader()

!----------------------------------------------------------
!| Timer initialization (has to be after mpi init)
!----------------------------------------------------------

call t_initf(NLFileName, LogPrint=.false., mpicom=mpicom_GLOID, &
MasterTask=iamroot_GLOID)

if (iamin_CPLID) then
call seq_io_cpl_init()
endif

call t_startf('DRIVER_INIT')

!----------------------------------------------------------
!| Memory test
!----------------------------------------------------------
Expand Down Expand Up @@ -1931,8 +1931,6 @@ subroutine ccsm_init()
call shr_sys_flush(logunit)
endif

call t_stopf ('DRIVER_INIT')

end subroutine ccsm_init

!===============================================================================
Expand Down
21 changes: 21 additions & 0 deletions models/drv/driver/ccsm_driver.F90
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,33 @@ program ccsm_driver
!--------------------------------------------------------------------------
! Setup and initialize the communications and logging.
!--------------------------------------------------------------------------
! Timer initialization has to be after mpi init, so called inside of
! ccsm_pre_init1, as are t_startf for first two timers. When using pio async
! option, servers do not return from ccsm_pre_init1, so t_stopf timers
! are also called within ccsm_pre_init1.
!--------------------------------------------------------------------------
! call t_startf('DRIVER_INIT')
! call t_startf('ccsm_pre_init1')
call ccsm_pre_init1()
! call t_stopf('ccsm_pre_init1')
! call t_stopf('DRIVER_INIT')

call t_startf('DRIVER_INIT')
!--------------------------------------------------------------------------
! Initialize ESMF. This is done outside of the ESMF_INTERFACE ifdef
! because it is needed for the time manager, even if the ESMF_INTERFACE
! is not used.
!--------------------------------------------------------------------------
call t_startf('ESMF_Initialize')
call ESMF_Initialize()
call t_stopf('ESMF_Initialize')

!--------------------------------------------------------------------------
! Read in the configuration information and initialize the time manager.
!--------------------------------------------------------------------------
call t_startf('ccsm_pre_init2')
call ccsm_pre_init2()
call t_stopf('ccsm_pre_init2')

#ifdef USE_ESMF_LIB

Expand All @@ -74,8 +88,11 @@ program ccsm_driver
! cap component.
!--------------------------------------------------------------------------

call t_startf('ccsm_init_esmf')
call ESMF_CplCompInitialize(drvcomp, rc=localrc)
if (localrc /= 0) call shr_sys_abort('failed to esmf initialize')
call t_stopf('ccsm_init_esmf')
call t_stopf('DRIVER_INIT')

call ESMF_CplCompRun(drvcomp, rc=localrc)
if (localrc /= 0) call shr_sys_abort('failed to esmf run')
Expand All @@ -89,7 +106,11 @@ program ccsm_driver
! If ESMF is not defined, then just call the initialize, run and finalize
! routines directly.
!--------------------------------------------------------------------------
call t_startf('ccsm_init')
call ccsm_init()
call t_stopf('ccsm_init')
call t_stopf('DRIVER_INIT')

call ccsm_run()
call ccsm_final()

Expand Down
47 changes: 47 additions & 0 deletions models/utils/pio/box_rearrange.F90.in
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ module box_rearrange
#else
use pio_support, only : piodie, Debug, DebugIO, CheckMPIReturn, pio_fc_gather_offset

#endif
#ifdef TIMING
use perf_mod, only : t_startf, t_stopf !_EXTERNAL
#endif
use alloc_mod, only : alloc_check, dealloc_check
use pio_spmd_utils, only : pio_swapm
Expand Down Expand Up @@ -296,16 +299,31 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, &
#ifdef _USE_ALLTOALLW
if (pio_option == COLLECTIVE) then

#ifdef TIMING
call t_startf("a2a_box_rear_comp2io_{TYPE}")
#endif
call MPI_ALLTOALLW(src, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
dest, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, ierror )
#ifdef TIMING
call t_stopf("a2a_box_rear_comp2io_{TYPE}")
#endif
call CheckMPIReturn('box_rearrange', ierror)

else
#endif

#ifdef TIMING
call t_startf("swapm_box_rear_comp2io_{TYPE}")
#endif
call pio_swapm( nprocs, myrank, &
src, ndof, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
dest, niodof, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq )
#ifdef TIMING
call t_stopf("swapm_box_rear_comp2io_{TYPE}")
#endif

#ifdef _USE_ALLTOALLW
endif
#endif
Expand All @@ -324,6 +342,9 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, &
endif
#endif

#ifdef TIMING
call t_startf("p2p_box_rear_comp2io_{TYPE}")
#endif
!
! send data from comp procs
!
Expand Down Expand Up @@ -379,6 +400,10 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, &

call dealloc_check(sreq, 'send requests')

#ifdef TIMING
call t_stopf("p2p_box_rear_comp2io_{TYPE}")
#endif

#if DEBUG_BARRIER
call MPI_BARRIER(IOsystem%union_comm,ierror)
call CheckMPIReturn(subName,ierror)
Expand Down Expand Up @@ -562,16 +587,32 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf,

#ifdef _USE_ALLTOALLW
if (pio_option == COLLECTIVE) then

#ifdef TIMING
call t_startf("a2a_box_rear_io2comp_{TYPE}")
#endif
call MPI_ALLTOALLW(iobuf, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
compbuf, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, ierror )
#ifdef TIMING
call t_stopf("a2a_box_rear_io2comp_{TYPE}")
#endif
call CheckMPIReturn(subName, ierror)

else
#endif

#ifdef TIMING
call t_startf("swapm_box_rear_io2comp_{TYPE}")
#endif
call pio_swapm( nprocs, myrank, &
iobuf, niodof, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
compbuf, ndof, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq )
#ifdef TIMING
call t_stopf("swapm_box_rear_io2comp_{TYPE}")
#endif

#ifdef _USE_ALLTOALLW
endif
#endif
Expand All @@ -583,6 +624,9 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf,

else

#ifdef TIMING
call t_startf("p2p_box_rear_io2comp_{TYPE}")
#endif
call alloc_check(rreq, num_iotasks, 'recv requests')

!
Expand Down Expand Up @@ -642,6 +686,9 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf,

call dealloc_check(sreq,'send requests')
endif
#ifdef TIMING
call t_stopf("p2p_box_rear_io2comp_{TYPE}")
#endif

endif ! POINT_TO_POINT
#endif /* not _MPISERIAL */
Expand Down
14 changes: 14 additions & 0 deletions models/utils/pio/piodarray.F90.in
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ contains
endif ! if(UseRearranger)
#ifdef TIMING
call t_stopf("pio_rearrange_write")
call t_startf("pre_pio_write_nf")
#endif

if (IOproc) then
Expand Down Expand Up @@ -602,6 +603,7 @@ contains
endif

#ifdef TIMING
call t_stopf("pre_pio_write_nf")
call t_startf("pio_write_nf")
#endif
ierr = write_nf(File,IOBUF,varDesc,iodesc,start,count, request)
Expand All @@ -612,11 +614,17 @@ contains
call dealloc_check(count)

if(IOPROC) then
#ifdef TIMING
call t_startf("post_pio_write_nf")
#endif
if(file%iotype==pio_iotype_pnetcdf) then
call add_data_to_buffer(File, IOBUF, request)
else if(Userearranger) then
deallocate(iobuf)
end if
#ifdef TIMING
call t_stopf("post_pio_write_nf")
#endif
end if

! call MPI_Barrier(File%iosystem%comp_comm,ierr)
Expand Down Expand Up @@ -1205,7 +1213,13 @@ contains
this_buffsize = size(iobuf)*c_sizeof(iobuf(1))
file%buffsize=file%buffsize+this_buffsize
total_buffsize = total_buffsize+this_buffsize
#ifdef TIMING
call t_startf("allred_add_data_to_buf")
#endif
call MPI_ALLREDUCE(total_buffsize,maxbuffsize,1,MPI_INTEGER,MPI_MAX,file%iosystem%io_comm, mpierr)
#ifdef TIMING
call t_stopf("allred_add_data_to_buf")
#endif

if(maxbuffsize > pio_buffer_size_limit) then
call darray_write_complete(File)
Expand Down
40 changes: 22 additions & 18 deletions models/utils/timing/perf_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -622,16 +622,13 @@ subroutine t_startf(event, handle)
!
!-----------------------------------------------------------------------
!
if ((timing_initialized) .and. &
(timing_disable_depth .eq. 0) .and. &
(cur_timing_detail .le. timing_detail_limit)) then

if ( present (handle) ) then
ierr = GPTLstart_handle(event, handle)
else
ierr = GPTLstart(event)
endif
if (.not. timing_initialized) return
if (timing_disable_depth > 0) return

if ( present (handle) ) then
ierr = GPTLstart_handle(event, handle)
else
ierr = GPTLstart(event)
endif

return
Expand Down Expand Up @@ -660,16 +657,13 @@ subroutine t_stopf(event, handle)
!
!-----------------------------------------------------------------------
!
if ((timing_initialized) .and. &
(timing_disable_depth .eq. 0) .and. &
(cur_timing_detail .le. timing_detail_limit)) then

if ( present (handle) ) then
ierr = GPTLstop_handle(event, handle)
else
ierr = GPTLstop(event)
endif
if (.not. timing_initialized) return
if (timing_disable_depth > 0) return

if ( present (handle) ) then
ierr = GPTLstop_handle(event, handle)
else
ierr = GPTLstop(event)
endif

return
Expand Down Expand Up @@ -774,6 +768,16 @@ subroutine t_adj_detailf(detail_adjustment)
if (omp_in_parallel()) return
#endif

! using disable/enable to implement timing_detail logic so also control
! direct GPTL calls (such as occur in Trilinos library)
if ((cur_timing_detail <= timing_detail_limit) .and. &
(cur_timing_detail + detail_adjustment > timing_detail_limit)) then
call t_disablef()
elseif ((cur_timing_detail > timing_detail_limit) .and. &
(cur_timing_detail + detail_adjustment <= timing_detail_limit)) then
call t_enablef()
endif

cur_timing_detail = cur_timing_detail + detail_adjustment

return
Expand Down
2 changes: 1 addition & 1 deletion models/utils/timing/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define MAX_STACK 128

/* longest timer name allowed (probably safe to just change) */
#define MAX_CHARS 63
#define MAX_CHARS 127

/*
** max allowable number of PAPI counters, or derived events. For convenience,
Expand Down

0 comments on commit c5fa8d4

Please sign in to comment.