Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modifying enable/disable logic in timing library, and updating timers in drv/pio/atm #92

Merged
merged 4 commits into from
Feb 17, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion models/atm/cam/src/dynamics/se/share/prim_state_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ module prim_state_mod
! ------------------------------
use reduction_mod, only : parallelmax,parallelmin
! ------------------------------
use perf_mod, only: t_startf, t_stopf
! ------------------------------
implicit none
private
character(len=*), private, parameter :: massfname = "mass.out"
Expand Down Expand Up @@ -145,6 +147,8 @@ subroutine prim_printstate(elem, tl,hybrid,hvcoord,nets,nete, fvm)
integer :: n0, nm1, pnm1, np1
integer :: npts,n,q

call t_startf('prim_printstate')

if (hybrid%masterthread) then
write(iulog,*) "nstep=",tl%nstep," time=",Time_at(tl%nstep)/(24*3600)," [day]"
end if
Expand Down Expand Up @@ -428,7 +432,10 @@ subroutine prim_printstate(elem, tl,hybrid,hvcoord,nets,nete, fvm)
endif


if (tl%nstep < tl%nstep0) return
if (tl%nstep < tl%nstep0) then
call t_stopf('prim_printstate')
return
endif

! ====================================================================
!
Expand Down Expand Up @@ -682,6 +689,8 @@ subroutine prim_printstate(elem, tl,hybrid,hvcoord,nets,nete, fvm)
time0=time1
endif

call t_stopf('prim_printstate')

end subroutine prim_printstate


Expand Down
20 changes: 9 additions & 11 deletions models/drv/driver/ccsm_comp_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,12 @@ subroutine ccsm_pre_init1()
iam=comp_comm_iam(it))
if (iamroot_CPLID) output_perf = .true.

! Timer initialization (has to be after mpi init)
call t_initf(NLFileName, LogPrint=.false., mpicom=mpicom_GLOID, &
MasterTask=iamroot_GLOID)
call t_startf('DRIVER_INIT')
call t_startf('ccsm_pre_init1')

if (iamin_CPLID) complist = trim(complist)//' cpl'

comp_id(it) = CPLID
Expand Down Expand Up @@ -725,6 +731,9 @@ subroutine ccsm_pre_init1()
write(logunit,'(2A)') subname,' ESMF_INTERFACE is set'
#endif
endif

call t_stopf('ccsm_pre_init1')
call t_stopf('DRIVER_INIT')
!
! When using io servers (pio_async_interface=.true.) the server tasks do not return from
! shr_pio_init2
Expand All @@ -747,19 +756,10 @@ subroutine ccsm_pre_init2()

if (iamroot_CPLID) call seq_ccsm_printlogheader()

!----------------------------------------------------------
!| Timer initialization (has to be after mpi init)
!----------------------------------------------------------

call t_initf(NLFileName, LogPrint=.false., mpicom=mpicom_GLOID, &
MasterTask=iamroot_GLOID)

if (iamin_CPLID) then
call seq_io_cpl_init()
endif

call t_startf('DRIVER_INIT')

!----------------------------------------------------------
!| Memory test
!----------------------------------------------------------
Expand Down Expand Up @@ -1931,8 +1931,6 @@ subroutine ccsm_init()
call shr_sys_flush(logunit)
endif

call t_stopf ('DRIVER_INIT')

end subroutine ccsm_init

!===============================================================================
Expand Down
21 changes: 21 additions & 0 deletions models/drv/driver/ccsm_driver.F90
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,33 @@ program ccsm_driver
!--------------------------------------------------------------------------
! Setup and initialize the communications and logging.
!--------------------------------------------------------------------------
! Timer initialization has to be after mpi init, so called inside of
! ccsm_pre_init1, as are t_startf for first two timers. When using pio async
! option, servers do not return from ccsm_pre_init1, so t_stopf timers
! are also called within ccsm_pre_init1.
!--------------------------------------------------------------------------
! call t_startf('DRIVER_INIT')
! call t_startf('ccsm_pre_init1')
call ccsm_pre_init1()
! call t_stopf('ccsm_pre_init1')
! call t_stopf('DRIVER_INIT')

call t_startf('DRIVER_INIT')
!--------------------------------------------------------------------------
! Initialize ESMF. This is done outside of the ESMF_INTERFACE ifdef
! because it is needed for the time manager, even if the ESMF_INTERFACE
! is not used.
!--------------------------------------------------------------------------
call t_startf('ESMF_Initialize')
call ESMF_Initialize()
call t_stopf('ESMF_Initialize')

!--------------------------------------------------------------------------
! Read in the configuration information and initialize the time manager.
!--------------------------------------------------------------------------
call t_startf('ccsm_pre_init2')
call ccsm_pre_init2()
call t_stopf('ccsm_pre_init2')

#ifdef USE_ESMF_LIB

Expand All @@ -74,8 +88,11 @@ program ccsm_driver
! cap component.
!--------------------------------------------------------------------------

call t_startf('ccsm_init_esmf')
call ESMF_CplCompInitialize(drvcomp, rc=localrc)
if (localrc /= 0) call shr_sys_abort('failed to esmf initialize')
call t_stopf('ccsm_init_esmf')
call t_stopf('DRIVER_INIT')

call ESMF_CplCompRun(drvcomp, rc=localrc)
if (localrc /= 0) call shr_sys_abort('failed to esmf run')
Expand All @@ -89,7 +106,11 @@ program ccsm_driver
! If ESMF is not defined, then just call the initialize, run and finalize
! routines directly.
!--------------------------------------------------------------------------
call t_startf('ccsm_init')
call ccsm_init()
call t_stopf('ccsm_init')
call t_stopf('DRIVER_INIT')

call ccsm_run()
call ccsm_final()

Expand Down
47 changes: 47 additions & 0 deletions models/utils/pio/box_rearrange.F90.in
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ module box_rearrange
#else
use pio_support, only : piodie, Debug, DebugIO, CheckMPIReturn, pio_fc_gather_offset

#endif
#ifdef TIMING
use perf_mod, only : t_startf, t_stopf !_EXTERNAL
#endif
use alloc_mod, only : alloc_check, dealloc_check
use pio_spmd_utils, only : pio_swapm
Expand Down Expand Up @@ -296,16 +299,31 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, &
#ifdef _USE_ALLTOALLW
if (pio_option == COLLECTIVE) then

#ifdef TIMING
call t_startf("a2a_box_rear_comp2io_{TYPE}")
#endif
call MPI_ALLTOALLW(src, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
dest, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, ierror )
#ifdef TIMING
call t_stopf("a2a_box_rear_comp2io_{TYPE}")
#endif
call CheckMPIReturn('box_rearrange', ierror)

else
#endif

#ifdef TIMING
call t_startf("swapm_box_rear_comp2io_{TYPE}")
#endif
call pio_swapm( nprocs, myrank, &
src, ndof, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
dest, niodof, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq )
#ifdef TIMING
call t_stopf("swapm_box_rear_comp2io_{TYPE}")
#endif

#ifdef _USE_ALLTOALLW
endif
#endif
Expand All @@ -324,6 +342,9 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, &
endif
#endif

#ifdef TIMING
call t_startf("p2p_box_rear_comp2io_{TYPE}")
#endif
!
! send data from comp procs
!
Expand Down Expand Up @@ -379,6 +400,10 @@ subroutine box_rearrange_comp2io_{TYPE} (IOsystem, ioDesc, s1, src, niodof, &

call dealloc_check(sreq, 'send requests')

#ifdef TIMING
call t_stopf("p2p_box_rear_comp2io_{TYPE}")
#endif

#if DEBUG_BARRIER
call MPI_BARRIER(IOsystem%union_comm,ierror)
call CheckMPIReturn(subName,ierror)
Expand Down Expand Up @@ -562,16 +587,32 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf,

#ifdef _USE_ALLTOALLW
if (pio_option == COLLECTIVE) then

#ifdef TIMING
call t_startf("a2a_box_rear_io2comp_{TYPE}")
#endif
call MPI_ALLTOALLW(iobuf, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
compbuf, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, ierror )
#ifdef TIMING
call t_stopf("a2a_box_rear_io2comp_{TYPE}")
#endif
call CheckMPIReturn(subName, ierror)

else
#endif

#ifdef TIMING
call t_startf("swapm_box_rear_io2comp_{TYPE}")
#endif
call pio_swapm( nprocs, myrank, &
iobuf, niodof, a2a_sendcounts, a2a_displs, a2a_sendtypes, &
compbuf, ndof, a2a_recvcounts, a2a_displs, a2a_recvtypes, &
IOsystem%union_comm, pio_hs, pio_isend, pio_maxreq )
#ifdef TIMING
call t_stopf("swapm_box_rear_io2comp_{TYPE}")
#endif

#ifdef _USE_ALLTOALLW
endif
#endif
Expand All @@ -583,6 +624,9 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf,

else

#ifdef TIMING
call t_startf("p2p_box_rear_io2comp_{TYPE}")
#endif
call alloc_check(rreq, num_iotasks, 'recv requests')

!
Expand Down Expand Up @@ -642,6 +686,9 @@ subroutine box_rearrange_io2comp_{TYPE} (IOsystem,ioDesc,s1, iobuf,s2, compbuf,

call dealloc_check(sreq,'send requests')
endif
#ifdef TIMING
call t_stopf("p2p_box_rear_io2comp_{TYPE}")
#endif

endif ! POINT_TO_POINT
#endif /* not _MPISERIAL */
Expand Down
14 changes: 14 additions & 0 deletions models/utils/pio/piodarray.F90.in
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ contains
endif ! if(UseRearranger)
#ifdef TIMING
call t_stopf("pio_rearrange_write")
call t_startf("pre_pio_write_nf")
#endif

if (IOproc) then
Expand Down Expand Up @@ -602,6 +603,7 @@ contains
endif

#ifdef TIMING
call t_stopf("pre_pio_write_nf")
call t_startf("pio_write_nf")
#endif
ierr = write_nf(File,IOBUF,varDesc,iodesc,start,count, request)
Expand All @@ -612,11 +614,17 @@ contains
call dealloc_check(count)

if(IOPROC) then
#ifdef TIMING
call t_startf("post_pio_write_nf")
#endif
if(file%iotype==pio_iotype_pnetcdf) then
call add_data_to_buffer(File, IOBUF, request)
else if(Userearranger) then
deallocate(iobuf)
end if
#ifdef TIMING
call t_stopf("post_pio_write_nf")
#endif
end if

! call MPI_Barrier(File%iosystem%comp_comm,ierr)
Expand Down Expand Up @@ -1205,7 +1213,13 @@ contains
this_buffsize = size(iobuf)*c_sizeof(iobuf(1))
file%buffsize=file%buffsize+this_buffsize
total_buffsize = total_buffsize+this_buffsize
#ifdef TIMING
call t_startf("allred_add_data_to_buf")
#endif
call MPI_ALLREDUCE(total_buffsize,maxbuffsize,1,MPI_INTEGER,MPI_MAX,file%iosystem%io_comm, mpierr)
#ifdef TIMING
call t_stopf("allred_add_data_to_buf")
#endif

if(maxbuffsize > pio_buffer_size_limit) then
call darray_write_complete(File)
Expand Down
40 changes: 22 additions & 18 deletions models/utils/timing/perf_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -622,16 +622,13 @@ subroutine t_startf(event, handle)
!
!-----------------------------------------------------------------------
!
if ((timing_initialized) .and. &
(timing_disable_depth .eq. 0) .and. &
(cur_timing_detail .le. timing_detail_limit)) then

if ( present (handle) ) then
ierr = GPTLstart_handle(event, handle)
else
ierr = GPTLstart(event)
endif
if (.not. timing_initialized) return
if (timing_disable_depth > 0) return

if ( present (handle) ) then
ierr = GPTLstart_handle(event, handle)
else
ierr = GPTLstart(event)
endif

return
Expand Down Expand Up @@ -660,16 +657,13 @@ subroutine t_stopf(event, handle)
!
!-----------------------------------------------------------------------
!
if ((timing_initialized) .and. &
(timing_disable_depth .eq. 0) .and. &
(cur_timing_detail .le. timing_detail_limit)) then

if ( present (handle) ) then
ierr = GPTLstop_handle(event, handle)
else
ierr = GPTLstop(event)
endif
if (.not. timing_initialized) return
if (timing_disable_depth > 0) return

if ( present (handle) ) then
ierr = GPTLstop_handle(event, handle)
else
ierr = GPTLstop(event)
endif

return
Expand Down Expand Up @@ -774,6 +768,16 @@ subroutine t_adj_detailf(detail_adjustment)
if (omp_in_parallel()) return
#endif

! using disable/enable to implement timing_detail logic so also control
! direct GPTL calls (such as occur in Trilinos library)
if ((cur_timing_detail <= timing_detail_limit) .and. &
(cur_timing_detail + detail_adjustment > timing_detail_limit)) then
call t_disablef()
elseif ((cur_timing_detail > timing_detail_limit) .and. &
(cur_timing_detail + detail_adjustment <= timing_detail_limit)) then
call t_enablef()
endif

cur_timing_detail = cur_timing_detail + detail_adjustment

return
Expand Down
2 changes: 1 addition & 1 deletion models/utils/timing/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define MAX_STACK 128

/* longest timer name allowed (probably safe to just change) */
#define MAX_CHARS 63
#define MAX_CHARS 127

/*
** max allowable number of PAPI counters, or derived events. For convenience,
Expand Down