From 7b98ba99d595a0760da12895b1a63959e881182c Mon Sep 17 00:00:00 2001 From: Ben Hourahine Date: Mon, 27 Dec 2021 13:54:15 +0000 Subject: [PATCH 1/4] gemr2d matrix copy/redistribute operation Includes work around for rank mismatch in generic calls --- CHANGELOG.rst | 8 +++ lib/blacs.fpp | 39 +++++++++++- lib/blacsfx.fpp | 54 ++++++++++++++++ test/CMakeLists.txt | 1 + test/test_gemr2d.f90 | 144 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 test/test_gemr2d.f90 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c393df8..90234f6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,14 @@ Change Log Notable project changes in various releases. +Unreleased +========== + +Added +----- + +* p?gemr2d routines + 1.0.2 ===== diff --git a/lib/blacs.fpp b/lib/blacs.fpp index 592d057..cf65225 100644 --- a/lib/blacs.fpp +++ b/lib/blacs.fpp @@ -9,9 +9,21 @@ module blacs_module public :: blacs_pinfo, blacs_get, blacs_gridinfo, blacs_gridinit public :: blacs_barrier, blacs_exit, blacs_abort, blacs_pnum - public :: gebs2d, gebr2d, gesd2d, gerv2d, gsum2d + public :: gebs2d, gebr2d, gesd2d, gerv2d, gsum2d, gemr2d + + + !> Performs 2d copy from data in matrix to another, potentially with different distribution + !> patterns. + !> See BLACS documentation for details. + interface gemr2d + #:for TYPE in TYPES + #:set TYPEABBREV = TYPE_ABBREVS[TYPE] + module procedure ${TYPEABBREV}$gemr2d + #:endfor + end interface gemr2d interface + !> Returns the number of processes available for use. !! \see BLACS documentation for details. subroutine blacs_pinfo(id, nproc) @@ -80,6 +92,7 @@ module blacs_module integer, intent(in) :: ictxt, pnum integer, intent(out) :: prow, pcol end subroutine blacs_pcoord + end interface !########################################################################## @@ -163,6 +176,7 @@ module blacs_module #:enddef blacs_gsum2d_interface + !########################################################################## !########################################################################## !########################################################################## @@ -217,5 +231,26 @@ module blacs_module #:endfor end interface gsum2d -end module blacs_module +contains +#:for TYPE in TYPES + #:set TYPEABBREV = TYPE_ABBREVS[TYPE] + #:set FTYPE = FORTRAN_TYPES[TYPE] + #:set PREFIX = TYPE_ABBREVS[TYPE] + !> Interface for ${TYPEABBREV}$ case of the p?gemr2d routine, explictly wrapped to work around the + !> lack of assumed size in interfaces (Fortran2018) + subroutine ${TYPEABBREV}$gemr2d(mm, nn, aa, ia, ja, descA, bb, ib, jb, descB, ictxt) + integer, intent(in) :: mm, nn, ia, ja, ib, jb + integer, intent(in) :: descA(DLEN_), descB(DLEN_) + ${FTYPE}$, intent(in) :: aa(:,:) + ${FTYPE}$, intent(inout) :: bb(:,:) + integer, intent(in) :: ictxt + external p${TYPEABBREV}$gemr2d + + call p${TYPEABBREV}$gemr2d(mm, nn, aa, ia, ja, descA, bb, ib, jb, descB, ictxt) + + end subroutine ${TYPEABBREV}$gemr2d + +#:endfor + +end module blacs_module diff --git a/lib/blacsfx.fpp b/lib/blacsfx.fpp index 6a786d1..79dcb37 100644 --- a/lib/blacsfx.fpp +++ b/lib/blacsfx.fpp @@ -16,6 +16,7 @@ module blacsfx_module public :: blacsfx_gebs, blacsfx_gebr public :: blacsfx_gesd, blacsfx_gerv public :: blacsfx_gsum + public :: blacsfx_gemr2d public :: blacsfx_barrier public :: blacsfx_pinfo, blacsfx_pcoord, blacsfx_pnum, blacsfx_exit @@ -65,6 +66,12 @@ module blacsfx_module #:endfor end interface blacsfx_gsum + interface blacsfx_gemr2d + #:for TYPE in TYPES + #:set TYPEABBREV = TYPE_ABBREVS[TYPE] + module procedure blacsfx_gemr2d_${TYPEABBREV}$ + #:endfor + end interface blacsfx_gemr2d contains @@ -500,6 +507,47 @@ contains #:endfor #:endfor + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!! Matrix copy/redistribution +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +#:def blacsfx_gemr2d_template(SUFFIX, TYPE) + + !> Copies/redistributes matrix (${TYPE}$). + !! \param mm number of rows of AA to copy. + !! \param mm number of columns of AA to copy. + !! \param aa distributed matrix AA from which to copy. + !! \param ia first row of AA from which to copy. + !! \param ja first column of AA from which to copy. + !! \param descA BLACS descriptor for source matrix. + !! \param bb distributed matrix BB into which data is copied. + !! \param ib first row of BB at which to copy. + !! \param jb first column of BB at which to copy. + !! \param descB BLACS descriptor for destination matrix. + !! \param ictxt Context for for union of all processes holding A or B + !! \see BLACS documentation (routine p?gemr2d). + subroutine blacsfx_gemr2d_${SUFFIX}$(mm, nn, aa, ia, ja, descA, bb, ib, jb, descB, ictxt) + integer, intent(in) :: descA(DLEN_) + integer, intent(in) :: descB(DLEN_) + ${TYPE}$, intent(in) :: aa(:,:) + ${TYPE}$, intent(inout) :: bb(:,:) + integer, intent(in) :: mm, nn, ia, ja, ib, jb, ictxt + + ! AA and BB should be references to starting corner of matrices + call gemr2d(mm, nn, aa, ia, ja, descA, bb, ib, jb, descB, ictxt) + + end subroutine blacsfx_gemr2d_${SUFFIX}$ + +#:enddef blacsfx_gemr2d_template + +#:for TYPE in TYPES + #:set FTYPE = FORTRAN_TYPES[TYPE] + #:set SUFFIX = TYPE_ABBREVS[TYPE] + $:blacsfx_gemr2d_template(SUFFIX, FTYPE) +#:endfor + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! Barrier !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -520,6 +568,9 @@ contains end subroutine blacsfx_barrier +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!! Grid information +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !> Delivers process information. !! @@ -567,6 +618,9 @@ contains end function blacsfx_pnum +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!! Stop +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !> Stops BLACS communication. !! diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b94b611..75fcd69 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,6 +5,7 @@ set(targets set(common-dep-targets test_det test_diag + test_gemr2d test_linecomm test_psyr_pher test_svd diff --git a/test/test_gemr2d.f90 b/test/test_gemr2d.f90 new file mode 100644 index 0000000..2dbe2f0 --- /dev/null +++ b/test/test_gemr2d.f90 @@ -0,0 +1,144 @@ +!> Testing copy between matrix patterns +program test_gemr2d + use, intrinsic :: iso_fortran_env, stdout => output_unit, stdin => input_unit + use test_common_module + use libscalapackfx_module + use blacsfx_module + implicit none + + ! Block sizes for matrix A (using an extremely small value for test purposes) + integer, parameter :: mbA = 2 + integer, parameter :: nbA = 2 + + call main() + +contains + + ! number elements in global matrix + pure function numelem(iGlob, jGlob, mm) + integer, intent(in) :: iGlob, jGlob, mm + integer :: numelem + numelem = jGlob + (iGlob-1) * mm + end function numelem + + subroutine main() + type(blacsgrid) :: mygridA, mygridB + integer, allocatable :: AA(:,:), BB(:,:) + integer :: descA(DLEN_), descB(DLEN_) + integer :: nprow, npcol, mm, nn, iproc, nproc, ii, jj, iGlob, jGlob + integer, allocatable :: iErr(:) + + ! Block size for matrix B + integer :: mbB + integer :: nbB + + ! grid information + call blacsfx_pinfo(iproc, nproc) + + ! approximately square grid for group A + do npcol = int(sqrt(real(nproc, dp))), nproc + if (mod(nproc, npcol) == 0) then + exit + end if + end do + nprow = nproc / npcol + call mygridA%initgrid(nprow, npcol) + call mygridB%initgrid(1, nproc) + if (mygridA%lead) then + write(stdout, "(A,2(1X,I0))") "# source processor grid:", nprow, npcol + end if + if (mygridB%lead) then + write(stdout, "(A,2(1X,I0))") "# destination processor grid:", 1, nproc + end if + + if (mygridA%lead) then + write(stdOut, *)'# Matrix size to re-distrbute?' + read(stdin, *) mm, nn + call blacsfx_gebs(mygridA, mm) + call blacsfx_gebs(mygridA, nn) + else + call blacsfx_gebr(mygridA, mm) + call blacsfx_gebr(mygridA, nn) + end if + + ! Block size for matrix B + mbB = mm + nbB = 1 + + if (mygridA%lead) then + write(stdout, "(A,2(1X,I0))") "# A processor grid:", nprow, npcol + write(stdout, "(A,1X,I0,1X,I0)") "# A block size:", mbA, nbA + end if + if (mygridB%lead) then + write(stdout, "(A,2(1X,I0))") "# B processor grid:", nprow, npcol + write(stdout, "(A,1X,I0,1X,I0)") "# B block size:", mbB, nbB + end if + + call scalafx_creatematrix(mygridA, mm, nn, mbA, nbA, AA, descA) + call scalafx_creatematrix(mygridB, mm, nn, mbB, nbB, BB, descB) + + AA(:,:) = 0 + BB(:,:) = 0 + + write(stdOut,"(A,I0,A,I0,',',I0,A)")'Processor ', mygridA%iproc,' holds A(', shape(AA),& + & ') elements' + + write(stdOut,"(A,I0,A,I0,',',I0,A)")'Processor ', mygridB%iproc,' holds B(', shape(BB),& + & ') elements' + + ! number off elements in A + do ii = 1, size(AA, dim=2) + iGlob = scalafx_indxl2g(ii, descA(NB_), mygridA%mycol, descA(CSRC_), mygridA%ncol) + do jj = 1, size(AA, dim=1) + jGlob = scalafx_indxl2g(jj, descA(MB_), mygridA%myrow, descA(RSRC_), mygridA%nrow) + AA(jj,ii) = numelem(iGlob, jGlob, mm) + end do + end do + + ! A -> B + call blacsfx_gemr2d(mm, nn, aa, 1, 1, descA, bb, 1, 1, descB, mygridA%ctxt) + + aa(:,:) = 0 + + ! B -> A + call blacsfx_gemr2d(mm, nn, bb, 1, 1, descB, aa, 1, 1, descA, mygridA%ctxt) + + allocate(iErr(0:nProc-1)) + iErr(:) = 0 + ! check it's what is expected + if (all(shape(AA) > [0,0])) then + lpCol: do ii = 1, size(AA, dim=2) + iGlob = scalafx_indxl2g(ii, descA(NB_), mygridA%mycol, descA(CSRC_), mygridA%ncol) + if (iGlob > nn) then + cycle lpCol + end if + lpRow: do jj = 1, size(AA, dim=1) + jGlob = scalafx_indxl2g(jj, descA(MB_), mygridA%myrow, descA(RSRC_), mygridA%nrow) + if (jGlob > mm) then + cycle lpRow + end if + if (AA(jj,ii) /= numelem(iGlob, jGlob, mm)) then + write(stdOut,*) "Error on processor ", mygridA%iproc, " Mismatch in element ", jGlob,& + & iGlob + iErr(mygridA%iproc) = numelem(iGlob, jGlob, mm) + exit lpcol + end if + end do lpRow + end do lpCol + end if + + call blacsfx_gsum(mygridA, iErr) + + if (mygridB%lead) then + if (any(iErr /= 0)) then + write(stdOut,*)"Errors for matrix elements" + write(stdOut,*) iErr + end if + end if + + ! Finish blacs. + call blacsfx_exit() + + end subroutine main + +end program test_gemr2d From 98abeb8af6fc7c6dd3811bb50246b822226a88ec Mon Sep 17 00:00:00 2001 From: Ben Hourahine Date: Wed, 20 Apr 2022 23:20:21 +0100 Subject: [PATCH 2/4] syr2k and her2k routines --- lib/pblas.fpp | 44 +++++++++++++++++++++++++++++ lib/pblasfx.fpp | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/lib/pblas.fpp b/lib/pblas.fpp index e5e4cf6..fb4ad85 100644 --- a/lib/pblas.fpp +++ b/lib/pblas.fpp @@ -51,6 +51,37 @@ + +! ************************************************************************ +! *** psyr2k / pher2k +! ************************************************************************ + +#:def interface_psyr2k_pher2k_template(COMMENT, NAME, TYPE, KIND) + + !> Symmetric/hermitian rank-k update (${COMMENT}$). + subroutine ${NAME}$(uplo, trans, nn, kk, alpha, aa, ia, ja, desca, bb, ib, jb, descb, beta, cc,& + & ic, jc, descc) + import + character, intent(in) :: uplo, trans + integer, intent(in) :: nn, kk + real(${KIND}$), intent(in) :: alpha + integer, intent(in) :: desca(*) + ${TYPE}$(${KIND}$), intent(in) :: aa(desca(LLD_), *) + integer, intent(in) :: ia, ja + integer, intent(in) :: descb(*) + ${TYPE}$(${KIND}$), intent(in) :: bb(descb(LLD_), *) + integer, intent(in) :: ib, jb + real(${KIND}$), intent(in) :: beta + integer, intent(in) :: descc(*) + ${TYPE}$(${KIND}$), intent(inout) :: cc(descc(LLD_), *) + integer, intent(in) :: ic, jc + end subroutine ${NAME}$ + +#:enddef interface_psyr2k_pher2k_template + + + + ! ************************************************************************ ! *** psymv / phemv ! ************************************************************************ @@ -194,6 +225,7 @@ module pblas_module public :: psyr, pher public :: psyrk, pherk + public :: psyr2k, pher2k public :: psymv, phemv public :: psymm, phemm public :: pgemm @@ -225,6 +257,18 @@ module pblas_module @:interface_psyrk_pherk_template(dcomplex, pzherk, complex, dp) end interface pherk + !> Symmetric rank-k update. + interface psyr2k + @:interface_psyr2k_pher2k_template(real, pssyr2k, real, sp) + @:interface_psyr2k_pher2k_template(dreal, pdsyr2k, real, dp) + end interface psyr2k + + !> Hermitian rank-k update. + interface pher2k + @:interface_psyr2k_pher2k_template(complex, pcher2k, complex, sp) + @:interface_psyr2k_pher2k_template(dcomplex, pzher2k, complex, dp) + end interface pher2k + !> Symmetric matrix vector product interface psymv @:interface_psymv_phemv_template(real, pssymv, real, sp) diff --git a/lib/pblasfx.fpp b/lib/pblasfx.fpp index 892dd3d..4976dd0 100644 --- a/lib/pblasfx.fpp +++ b/lib/pblasfx.fpp @@ -137,6 +137,66 @@ +#!************************************************************************ +#!*** psyr2k/pher2k +#!************************************************************************ + +#:def pblasfx_psyr2k_pher2k_template(SUFFIX, TYPE, KIND, FUNCTION, NAME) + + !> Symmetric/Hermitian rank-2k update. + !! \param aa Matrix to update with. + !! \param desca Descriptor of aa. + !! \param bb Matrix to update with. + !! \param descb Descriptor of bb. + !! \param cc Matrix to be updated. + !! \param desccc Descriptor of cc. + !! \param uplo "U" for for upper, "L" for lower triangle matrix (default: "L"). + !! \param trans "N" for normal, "T" for transposed aa (default: "N"). + !! \param alpha Prefactor. + subroutine pblasfx_${SUFFIX}$(aa, desca, bb, descb, cc, descc, uplo, trans, alpha, beta,& + & nn, kk, ia, ja, ib, jb, ic, jc) + ${TYPE}$(${KIND}$), intent(in) :: aa(:,:) + integer, intent(in) :: desca(DLEN_) + ${TYPE}$(${KIND}$), intent(in) :: bb(:,:) + integer, intent(in) :: descb(DLEN_) + ${TYPE}$(${KIND}$), intent(inout) :: cc(:,:) + integer, intent(in) :: descc(DLEN_) + character, intent(in), optional :: uplo, trans + real(${KIND}$), intent(in), optional :: alpha, beta + integer, intent(in), optional :: nn, kk + integer, intent(in), optional :: ia, ja, ib, jb, ic, jc + + real(${KIND}$) :: alpha0, beta0 + character :: uplo0, trans0 + integer :: nn0, kk0, ia0, ja0, ib0, jb0, ic0, jc0 + + @:inoptflags(alpha0, alpha, real(1, kind=${KIND}$)) + @:inoptflags(beta0, beta, real(0, kind=${KIND}$)) + @:inoptflags(uplo0, uplo, "L") + @:inoptflags(trans0, trans, "N") + if (trans0 == "N") then + @:inoptflags(nn0, nn, desca(M_)) + @:inoptflags(kk0, kk, desca(N_)) + else + @:inoptflags(nn0, nn, desca(N_)) + @:inoptflags(kk0, kk, desca(M_)) + end if + @:inoptflags(ia0, ia, 1) + @:inoptflags(ja0, ja, 1) + @:inoptflags(ib0, ib, 1) + @:inoptflags(jb0, jb, 1) + @:inoptflags(ic0, ic, 1) + @:inoptflags(jc0, jc, 1) + call ${NAME}$(uplo0, trans0, nn0, kk0, alpha0, aa, ia0, ja0, desca, bb, ib0, jb0, descb, beta0,& + & cc, ic0, jc0, descc) + + end subroutine pblasfx_${SUFFIX}$ + +#:enddef pblasfx_psyr2k_pher2k_template + + + + #! ************************************************************************ #! *** psymm/phemm #! ************************************************************************ @@ -520,6 +580,7 @@ module pblasfx_module public :: pblasfx_psyr, pblasfx_pher public :: pblasfx_psyrk, pblasfx_pherk + public :: pblasfx_psyr2k, pblasfx_pher2k public :: pblasfx_psymv, pblasfx_phemv public :: pblasfx_psymm, pblasfx_phemm public :: pblasfx_pgemm @@ -543,6 +604,14 @@ module pblasfx_module module procedure pblasfx_pherk_complex, pblasfx_pherk_dcomplex end interface pblasfx_pherk + interface pblasfx_psyr2k + module procedure pblasfx_psyr2k_real, pblasfx_psyr2k_dreal + end interface pblasfx_psyr2k + + interface pblasfx_pher2k + module procedure pblasfx_pher2k_complex, pblasfx_pher2k_dcomplex + end interface pblasfx_pher2k + interface pblasfx_psymv module procedure pblasfx_psymv_real, pblasfx_psymv_dreal end interface pblasfx_psymv @@ -593,6 +662,11 @@ contains @:pblasfx_psyrk_pherk_template(pherk_complex, complex, sp, cmplx, pherk) @:pblasfx_psyrk_pherk_template(pherk_dcomplex, complex, dp, cmplx, pherk) + @:pblasfx_psyr2k_pher2k_template(psyr2k_real, real, sp, real, psyr2k) + @:pblasfx_psyr2k_pher2k_template(psyr2k_dreal, real, dp, real, psyr2k) + @:pblasfx_psyr2k_pher2k_template(pher2k_complex, complex, sp, cmplx, pher2k) + @:pblasfx_psyr2k_pher2k_template(pher2k_dcomplex, complex, dp, cmplx, pher2k) + @:pblasfx_psymv_phemv_template(psymv_real, real, sp, real, psymv) @:pblasfx_psymv_phemv_template(psymv_dreal, real, dp, real, psymv) @:pblasfx_psymv_phemv_template(phemv_complex, complex, sp, cmplx, phemv) From d34c9e5101dbb15b9d3dbcb4dc01c3470e3ea61b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Mon, 2 May 2022 17:22:05 +0200 Subject: [PATCH 3/4] Bump version number --- CHANGELOG.rst | 4 ++-- CMakeLists.txt | 2 +- doc/doxygen/Doxyfile | 2 +- doc/sphinx/conf.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 90234f6..7fe22bb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,8 +4,8 @@ Change Log Notable project changes in various releases. -Unreleased -========== +1.1 +=== Added ----- diff --git a/CMakeLists.txt b/CMakeLists.txt index 02d0c28..54a1c2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ include(ScalapackFxUtils) include(${CMAKE_CURRENT_SOURCE_DIR}/config.cmake) -project(ScalapackFx VERSION 1.0.2 LANGUAGES Fortran) +project(ScalapackFx VERSION 1.1.0 LANGUAGES Fortran) setup_build_type() diff --git a/doc/doxygen/Doxyfile b/doc/doxygen/Doxyfile index e7df34c..02397b8 100644 --- a/doc/doxygen/Doxyfile +++ b/doc/doxygen/Doxyfile @@ -32,7 +32,7 @@ PROJECT_NAME = "ScaLAPACKFX" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = "1.0.2" +PROJECT_NUMBER = "1.1.0" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer diff --git a/doc/sphinx/conf.py b/doc/sphinx/conf.py index 712a4d7..e2b1490 100644 --- a/doc/sphinx/conf.py +++ b/doc/sphinx/conf.py @@ -45,10 +45,10 @@ # built documents. # # The short X.Y version. -version = '1.0' +version = '1.1' # The full version, including alpha/beta/rc tags. -release = '1.0.2' +release = '1.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 5e9857db1d13205feb9b4abe3af428354938fe9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 6 May 2022 20:58:23 +0200 Subject: [PATCH 4/4] Update ChangeLog --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7fe22bb..21adf6a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,9 @@ Added * p?gemr2d routines +* psyr2k and pher2k routines + + 1.0.2 =====