From 2e299cfeeccf1a587addaac42a2c0ecdbb4d1df2 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Wed, 30 Oct 2024 01:58:46 -0700 Subject: [PATCH] most of the review comments addressed --- src/trans/gpu/algor/growing_allocator_mod.F90 | 2 +- src/trans/gpu/algor/hicblas_mod.F90 | 1 - src/trans/gpu/algor/hicfft.hip.cpp | 2 +- src/trans/gpu/external/setup_trans.F90 | 10 +++++----- src/trans/gpu/internal/ledir_mod.F90 | 2 +- src/trans/gpu/internal/leinv_mod.F90 | 4 ++-- src/trans/gpu/internal/prfi1b_mod.F90 | 2 +- src/trans/gpu/internal/spnsde_mod.F90 | 2 +- src/trans/gpu/internal/trltom_pack_unpack.F90 | 6 +++--- src/trans/gpu/internal/trmtol_pack_unpack.F90 | 4 ++-- src/trans/gpu/internal/updspb_mod.F90 | 2 +- src/trans/gpu/internal/uvtvd_mod.F90 | 2 +- src/trans/gpu/internal/vdtuv_mod.F90 | 2 +- 13 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/trans/gpu/algor/growing_allocator_mod.F90 b/src/trans/gpu/algor/growing_allocator_mod.F90 index 7e7919b4..db869e48 100644 --- a/src/trans/gpu/algor/growing_allocator_mod.F90 +++ b/src/trans/gpu/algor/growing_allocator_mod.F90 @@ -88,7 +88,7 @@ SUBROUTINE DESTROY_GROWING_ALLOCATOR(ALLOC) IMPLICIT NONE TYPE(GROWING_ALLOCATION_TYPE) :: ALLOC INTEGER :: I - IF (ALLOCATED(ALLOC%PTR)) THEN + IF (ASSOCIATED(ALLOC%PTR)) THEN DO I = 1, ALLOC%FREE_FUNCS_SZ CALL ALLOC%FREE_FUNCS(I)%FUNC(ALLOC%PTR, & SIZE(ALLOC%PTR, 1, C_SIZE_T)) diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index fecf9324..5341cff2 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -64,7 +64,6 @@ SUBROUTINE HIP_SGEMM_BATCHED( & END SUBROUTINE HIP_SGEMM_BATCHED END INTERFACE INTERFACE - PUBLIC CLEAN_GEMM SUBROUTINE CLEAN_GEMM(RESOL_ID) BIND(C, NAME="clean_gemm") USE ISO_C_BINDING INTEGER(KIND=C_INT), INTENT(IN), VALUE :: RESOL_ID diff --git a/src/trans/gpu/algor/hicfft.hip.cpp b/src/trans/gpu/algor/hicfft.hip.cpp index a1329372..488610d7 100644 --- a/src/trans/gpu/algor/hicfft.hip.cpp +++ b/src/trans/gpu/algor/hicfft.hip.cpp @@ -60,7 +60,7 @@ template class hicfft_plan { hicfft_plan(hipfftHandle handle_, int offset_) : handle_ptr(new hipfftHandle{handle_}, [](auto ptr) { - fftSafeCall(cufftDestroy(*ptr)); + fftSafeCall(hipfftDestroy(*ptr)); delete ptr; }), offset(offset_) {} diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index 591ef25a..84d60f29 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -506,11 +506,11 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& #ifdef OMPGPU WRITE(NOUT,*) 'Using OpenMP offloading' #endif -WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAS', SIZEOF(FG%ZAS) -WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAA', SIZEOF(FG%ZAA) -WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAS0', SIZEOF(FG%ZAS0) -WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAA0', SIZEOF(FG%ZAA0) -WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZEPSNM', SIZEOF(FG%ZEPSNM) +WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAS', C_SIZEOF(FG%ZAS(1,1,1))*SIZE(FG%ZAS) +WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAA', C_SIZEOF(FG%ZAA(1,1,1))*SIZE(FG%ZAA) +WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAS0', C_SIZEOF(FG%ZAS0(1,1))*SIZE(FG%ZAS0) +WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZAA0', C_SIZEOF(FG%ZAA0(1,1))*SIZE(FG%ZAA0) +WRITE(NOUT,'(A10,":",I9,"B")') 'FG%ZEPSNM', C_SIZEOF(FG%ZEPSNM(1,1))*SIZE(FG%ZEPSNM) IF (IMLOC0(1) > 0) THEN #ifdef ACCGPU diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 110f00dc..0c1ee9e3 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -158,7 +158,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) #ifdef ACCGPU !$ACC DATA & !$ACC& PRESENT(ZINPS,ZINPA,ZOUT,ZINPS0,ZINPA0,ZOUT0) & - !$ACC& PRESENT(D_MYMS,D_NUMP,R_NTMAX,R_NSMAX,G_NDGLU) & + !$ACC& PRESENT(D,D_MYMS,D_NUMP,R,R_NTMAX,R_NSMAX) & !$ACC& PRESENT(ZAA,ZAS,POA1,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2) #endif diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 0ef6b569..7d7d378c 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -153,10 +153,10 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(D_MYMS,D_NUMP,G_NDGLU) & + !$ACC DATA PRESENT(D,D_MYMS,D_NUMP) & !$ACC& PRESENT(ZINP,ZOUTS,ZOUTA,ZINP0,ZOUTS0,ZOUTA0) & !$ACC& PRESENT(ZAA,ZAS,PIA) & - !$ACC& PRESENT(R_NSMAX,G_NDGLU,D_OFFSETS_GEMM2) + !$ACC& PRESENT(R,R_NSMAX,D_OFFSETS_GEMM2) #endif ! READ 2:NSMAX+3 diff --git a/src/trans/gpu/internal/prfi1b_mod.F90 b/src/trans/gpu/internal/prfi1b_mod.F90 index 23ddd21b..83b7b8b9 100755 --- a/src/trans/gpu/internal/prfi1b_mod.F90 +++ b/src/trans/gpu/internal/prfi1b_mod.F90 @@ -81,7 +81,7 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) #ifdef ACCGPU !$ACC DATA & - !$ACC& PRESENT(D_NUMP,R_NSMAX,D_MYMS,D_NASM0) & + !$ACC& PRESENT(D,D_NUMP,R,R_NSMAX,D_MYMS,D_NASM0) & !$ACC& PRESENT(PIA) & !$ACC& PRESENT(PSPEC) ASYNC(1) #endif diff --git a/src/trans/gpu/internal/spnsde_mod.F90 b/src/trans/gpu/internal/spnsde_mod.F90 index 002c9376..e7c7283b 100755 --- a/src/trans/gpu/internal/spnsde_mod.F90 +++ b/src/trans/gpu/internal/spnsde_mod.F90 @@ -85,7 +85,7 @@ SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT (R_NTMAX, D_MYMS) & +!$ACC& PRESENT (R,R_NTMAX, D,D_MYMS) & !$ACC& PRESENT (D_NUMP,PEPSNM, PF, PNSD) ASYNC(1) #endif #ifdef OMPGPU diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 0a157a99..0a6bb4c2 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -98,7 +98,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(G_NMEN,D_NPNTGTB0,FOUBUF_IN,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS,G_NLOEN, R_NSMAX) ASYNC(1) + !$ACC DATA PRESENT(G,G_NMEN,D,D_NPNTGTB0,FOUBUF_IN,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS,G_NLOEN, R,R_NSMAX) ASYNC(1) #endif ! scale results and move into next transformation buffer @@ -232,8 +232,8 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP #ifdef ACCGPU !$ACC DATA & !$ACC& PRESENT(ZINPS,ZINPA,ZINPS0,ZINPA0) & - !$ACC& PRESENT(F_RW,F_RACTHE) & - !$ACC& PRESENT(D_MYMS,D_NUMP,R_NDGNH,R_NDGL,G_NDGLU) & + !$ACC& PRESENT(F,F_RW,F_RACTHE) & + !$ACC& PRESENT(D,D_MYMS,D_NUMP,R,R_NDGNH,R_NDGL,G_NDGLU) & !$ACC& PRESENT(D_NPNTGTB1,D_OFFSETS_GEMM1,FOUBUF) !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,ISL,IGLS,OFFSET1,OFFSET2,JGL,PAIA,PAIS) & diff --git a/src/trans/gpu/internal/trmtol_pack_unpack.F90 b/src/trans/gpu/internal/trmtol_pack_unpack.F90 index 82cc907a..0dd9d3a8 100755 --- a/src/trans/gpu/internal/trmtol_pack_unpack.F90 +++ b/src/trans/gpu/internal/trmtol_pack_unpack.F90 @@ -127,7 +127,7 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(D_MYMS,D_NPNTGTB1,D_NUMP,G_NDGLU,R_NDGNH,R_NDGL) & + !$ACC DATA PRESENT(D,D_MYMS,D_NPNTGTB1,D_NUMP,G,G_NDGLU,R,R_NDGNH,R_NDGL) & !$ACC& PRESENT(ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_IN,D_OFFSETS_GEMM1) #endif @@ -254,7 +254,7 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN #ifdef OMPGPU #endif #ifdef ACCGPU -!$ACC DATA PRESENT(G_NLOEN,G_NMEN,D_NPNTGTB0,FOUBUF,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS) ASYNC(1) +!$ACC DATA PRESENT(G,G_NLOEN,G_NMEN,D,D_NPNTGTB0,FOUBUF,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS) ASYNC(1) #endif OFFSET_VAR=D_NPTRLS(MYSETW) diff --git a/src/trans/gpu/internal/updspb_mod.F90 b/src/trans/gpu/internal/updspb_mod.F90 index 9daf794e..938a42fc 100755 --- a/src/trans/gpu/internal/updspb_mod.F90 +++ b/src/trans/gpu/internal/updspb_mod.F90 @@ -96,7 +96,7 @@ SUBROUTINE UPDSPB(KFIELD,POA,PSPEC,KFLDPTR) !loop over wavenumber #ifdef ACCGPU - !$ACC DATA PRESENT(PSPEC,POA,R_NTMAX,D_NUMP,D_MYMS,D_NASM0) ASYNC(1) + !$ACC DATA PRESENT(PSPEC,POA,R,R_NTMAX,D,D_NUMP,D_MYMS,D_NASM0) ASYNC(1) #endif #ifdef OMPGPU !WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! diff --git a/src/trans/gpu/internal/uvtvd_mod.F90 b/src/trans/gpu/internal/uvtvd_mod.F90 index a920bb53..421ebb16 100755 --- a/src/trans/gpu/internal/uvtvd_mod.F90 +++ b/src/trans/gpu/internal/uvtvd_mod.F90 @@ -86,7 +86,7 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT(D_MYMS,D_NUMP,R_NTMAX) & +!$ACC& PRESENT(D,D_MYMS,D_NUMP,R,R_NTMAX) & !$ACC& PRESENT(ZEPSNM,PU,PV,PVOR,PDIV) ASYNC(1) #endif #ifdef OMPGPU diff --git a/src/trans/gpu/internal/vdtuv_mod.F90 b/src/trans/gpu/internal/vdtuv_mod.F90 index eeb687f1..d9e90053 100755 --- a/src/trans/gpu/internal/vdtuv_mod.F90 +++ b/src/trans/gpu/internal/vdtuv_mod.F90 @@ -89,7 +89,7 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT(R_NTMAX,D_MYMS,D_NUMP,F_RLAPIN) & +!$ACC& PRESENT(R,R_NTMAX,D,D_MYMS,D_NUMP,F,F_RLAPIN) & !$ACC& PRESENT(PEPSNM, PVOR, PDIV) & !$ACC& PRESENT(PU, PV) #endif