diff --git a/cime_config/config_grids.xml b/cime_config/config_grids.xml index add53e750c94..b3d1cf9f7aae 100755 --- a/cime_config/config_grids.xml +++ b/cime_config/config_grids.xml @@ -2936,7 +2936,7 @@ 7268 1 - $DIN_LOC_ROOT/share/domains/domain.ocn.oQU240wLI.160929.nc + $DIN_LOC_ROOT/share/domains/domain.ocn.oQU240wLI_mask.160929.nc oQU240wLI is an MPAS ocean mesh with quasi-uniform 240 km grid cells, nominally 2 degree resolution. Additionally, it has ocean under landice cavities.: diff --git a/cime_config/testmods_dirs/config_pes_tests.xml b/cime_config/testmods_dirs/config_pes_tests.xml index 590c0e6846d2..28c71b7b5259 100644 --- a/cime_config/testmods_dirs/config_pes_tests.xml +++ b/cime_config/testmods_dirs/config_pes_tests.xml @@ -28,6 +28,19 @@ + + + tests+anvil: default, 4 nodes x MAX_MPITASKS_PER_NODE mpi x 1 omp @ root 0 + + -4 + -4 + -4 + -4 + -4 + -4 + + + @@ -57,7 +70,26 @@ + + + test+anvil: any compset on ne4 grid -- 4 nodes pure-MPI + + 108 + 108 + 108 + 36 + 36 + 36 + + + 108 + 108 + 108 + + + + @@ -75,6 +107,62 @@ + + + tests+anvil: pelayout for tri-grid BGC tests with EAM+DOCN + + -8 + -8 + -8 + -8 + -8 + -8 + + + + --compset WCYCL* --res ne30pg2_EC30to60E2r2 on 16 nodes pure-MPI, ~2.7 sypd + + 396 + 396 + 396 + 396 + 180 + 396 + + + 396 + + + + anvil: --compset BGC* --res ne30pg2_r05_EC30to60E2r2 on 30 nodes pure-MPI, ~3 sypd + + 675 + 684 + 684 + 684 + 396 + 684 + + + 684 + + + + + + + + "anvil, GPMPAS-JRA compset, 6 nodes" + + -6 + -6 + -6 + -6 + -6 + -6 + + + @@ -93,5 +181,68 @@ + + + tests+anvil: --compset WCYCL1850 --res northamericax4v1pg2_WC14to60E2r3 on 20 nodes pure-MPI, ~0.8 sypd + + 468 + 468 + 468 + 468 + 252 + 468 + + + 468 + + + + + + + + + tests+anvil: 10x36x1--res conusx4v1_r05_oECv3 --compset F2010 + + -10 + -10 + -10 + -10 + -10 + -10 + + + + + + + + tests+anvil 8x36x1 for F-cases on anvil, to fix testing issues that default to 144 pes on 4 nodes + + -8 + -8 + -8 + -8 + -8 + -8 + + + + + + + + + tests+anvil: elm: anvil PEs for grid l%360x720cru + + -8 + -8 + -8 + -8 + -8 + -8 + + + diff --git a/components/eam/src/physics/crm/samxx/samxx_const.h b/components/eam/src/physics/crm/samxx/samxx_const.h index eda787384e7d..7eb82ca2166a 100644 --- a/components/eam/src/physics/crm/samxx/samxx_const.h +++ b/components/eam/src/physics/crm/samxx/samxx_const.h @@ -209,8 +209,6 @@ int constexpr index_cloud_ice = 0; real constexpr rhor = 1000.; // Density of water, kg/m3 real constexpr rhos = 100.; // Density of snow, kg/m3 real constexpr rhog = 400.; // Density of graupel, kg/m3 -real constexpr tbgmin = 253.16; // Minimum temperature for cloud water., K -real constexpr tbgmax = 273.16; // Maximum temperature for cloud ice, K real constexpr tprmin = 268.16; // Minimum temperature for rain, K real constexpr tprmax = 283.16; // Maximum temperature for snow+graupel, K real constexpr tgrmin = 223.16; // Minimum temperature for snow, K @@ -231,11 +229,38 @@ real constexpr nzeros = 3.e6; // Intersept coeff. for snow real constexpr nzerog = 4.e6; // Intersept coeff. for graupel real constexpr qp_threshold = 1.e-8; // minimal rain/snow water content -real constexpr qcw0 = 1.e-3; -real constexpr qci0 = 1.e-4; real constexpr alphaelq = 1.e-3; real constexpr betaelq = 1.e-3; +//------------------------------------------------------------------------------ +// MMF climate tuning parameters +//------------------------------------------------------------------------------ +#ifdef MMF_TMN +real constexpr tbgmin = MMF_TMN; +#else +real constexpr tbgmin = 253.16; // Min temperature for cloud liq, K +#endif + +#ifdef MMF_TMX +real constexpr tbgmax = MMF_TMX; +#else +real constexpr tbgmax = 273.16; // Max temperature for cloud ice, K +#endif + +#ifdef MMF_QCW +real constexpr qcw0 = MMF_QCW; +#else +real constexpr qcw0 = 1.e-3; // liq autoconversion threshold +#endif + +#ifdef MMF_QCI +real constexpr qci0 = MMF_QCI; +#else +real constexpr qci0 = 1.e-4; // ice autoconversion threshold +#endif +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ + real constexpr crm_accel_coef = 1.0/( (real) nx * (real) ny ); int constexpr plev = PLEV; diff --git a/components/elm/src/data_types/ColumnDataType.F90 b/components/elm/src/data_types/ColumnDataType.F90 index 05cb214d3731..01971982d124 100644 --- a/components/elm/src/data_types/ColumnDataType.F90 +++ b/components/elm/src/data_types/ColumnDataType.F90 @@ -1616,8 +1616,8 @@ subroutine col_ws_init(this, begc, endc, h2osno_input, snow_depth_input, watsat_ if (j > nlevbed) then this%h2osoi_vol(c,j) = 0.0_r8 else - if (use_fates_planthydro .or. use_hydrstress) then - this%h2osoi_vol(c,j) = 0.70_r8*watsat_input(c,j) !0.15_r8 to avoid very dry conditions that cause errors in FATES HYDRO + if (use_fates .or. use_hydrstress) then + this%h2osoi_vol(c,j) = 0.70_r8*watsat_input(c,j) !0.15_r8 to avoid very dry conditions that cause errors in FATES else this%h2osoi_vol(c,j) = 0.15_r8 endif diff --git a/components/mpas-ocean/bld/build-namelist b/components/mpas-ocean/bld/build-namelist index 76da422dc997..79b527e4f402 100755 --- a/components/mpas-ocean/bld/build-namelist +++ b/components/mpas-ocean/bld/build-namelist @@ -782,7 +782,7 @@ if ($OCN_ISMF eq 'coupled') { } elsif ($OCN_ISMF eq 'internal') { add_default($nl, 'config_land_ice_flux_mode', 'val'=>"standalone"); } else { - add_default($nl, 'config_land_ice_flux_mode', 'val'=>"pressure_only"); + add_default($nl, 'config_land_ice_flux_mode'); } add_default($nl, 'config_land_ice_flux_formulation'); add_default($nl, 'config_land_ice_flux_useHollandJenkinsAdvDiff'); diff --git a/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml b/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml index e2bacaff9779..21ae2bafc47c 100644 --- a/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml +++ b/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml @@ -333,12 +333,12 @@ 'off' -'standalone' -'standalone' -'standalone' -'standalone' -'standalone' -'standalone' +'pressure_only' +'pressure_only' +'pressure_only' +'pressure_only' +'pressure_only' +'pressure_only' 'Jenkins' .false. 10.0 diff --git a/components/mpas-ocean/cime_config/buildnml b/components/mpas-ocean/cime_config/buildnml index dda7531576ab..0061f4ff75e9 100755 --- a/components/mpas-ocean/cime_config/buildnml +++ b/components/mpas-ocean/cime_config/buildnml @@ -90,10 +90,10 @@ def buildnml(case, caseroot, compname): decomp_prefix = 'mpas-o.graph.info.' restoring_file = 'sss.monthlyClimatology.PHC2_salx_040803.oEC60to30v3wLI.nc' analysis_mask_file = 'masks_SingleRegionAtlanticWTransportTransects_EC60to30v3wLI_171116.nc' - ic_date = '171031' + ic_date = '230220' ic_prefix = 'oEC60to30v3wLI60lev' if ocn_ic_mode == 'spunup': - ic_date = '171116' + ic_date = '230220' ic_prefix = 'oEC60to30v3wLI60lev.restart_theta_year26' elif ocn_grid == 'ECwISC30to60E1r2': @@ -101,7 +101,7 @@ def buildnml(case, caseroot, compname): decomp_prefix = 'mpas-o.graph.info.' restoring_file = 'sss.PHC2_monthlyClimatology.ECwISC30to60E1r02.200408.nc' analysis_mask_file = 'ECwISC30to60E1r02_transportTransects.nc' - ic_date = '200408' + ic_date = '230220' ic_prefix = 'ocean.ECwISC30to60E1r2' if ocn_ic_mode == 'spunup': logger.warning("WARNING: The specified compset is requesting ocean ICs spunup from a G-case") @@ -128,7 +128,7 @@ def buildnml(case, caseroot, compname): elif ocn_grid == 'oQU240wLI': decomp_date = '160929' decomp_prefix = 'mpas-o.graph.info.' - ic_date = '160929' + ic_date = '230220' ic_prefix = 'ocean.QU.240wLI' if ocn_ic_mode == 'spunup': logger.warning("WARNING: The specified compset is requesting ocean ICs spunup from a G-case") @@ -159,7 +159,7 @@ def buildnml(case, caseroot, compname): decomp_prefix = 'mpas-o.graph.info.' restoring_file = 'sss.monthlyClimatology.PHC2_salx.2004_08_03.180503.nc' analysis_mask_file = 'masks_SingleRegionAtlanticWTransportTransects_RRS30to10v3wLI.nc' - ic_date = '171109' + ic_date = '230220' ic_prefix = 'oRRS30to10v3wLI' if ocn_ic_mode == 'spunup': logger.warning("WARNING: The specified compset is requesting ocean ICs spunup from a G-case") @@ -261,10 +261,10 @@ def buildnml(case, caseroot, compname): decomp_prefix = 'mpas-o.graph.info.' restoring_file = 'sss.PHC2_monthlyClimatology.SOwISC12to60E2r4_nomask.210120.nc' analysis_mask_file = 'SOwISC12to60E2r4_mocBasinsAndTransects20210623.nc' - ic_date = '210107' + ic_date = '230220' ic_prefix = 'ocean.SOwISC12to60E2r4' if ocn_ic_mode == 'spunup': - ic_date = '210203' + ic_date = '230220' ic_prefix = 'mpaso.SOwISC12to60E2r4.rstFromG-anvil' elif ocn_grid == 'ECwISC30to60E2r1': @@ -272,10 +272,10 @@ def buildnml(case, caseroot, compname): decomp_prefix = 'mpas-o.graph.info.' restoring_file = 'sss.PHC2_monthlyClimatology.ECwISC30to60E2r1_nomask.201221.nc' analysis_mask_file = 'ECwISC30to60E2r1_mocBasinsAndTransects20210623.nc' - ic_date = '210413' + ic_date = '230220' ic_prefix = 'ocean.ECwISC30to60E2r1' if ocn_ic_mode == 'spunup': - ic_date = '210414' + ic_date = '230220' ic_prefix = 'mpaso.ECwISC30to60E2r1.rstFromG-anvil' diff --git a/components/mpas-ocean/driver/ocn_comp_mct.F b/components/mpas-ocean/driver/ocn_comp_mct.F index 9c47dc25715b..71fa9a14d061 100644 --- a/components/mpas-ocean/driver/ocn_comp_mct.F +++ b/components/mpas-ocean/driver/ocn_comp_mct.F @@ -790,10 +790,9 @@ end subroutine xml_stream_get_attributes call t_stopf ('mpaso_mct_init') call mpas_pool_get_config(domain % configs, 'config_land_ice_flux_mode', config_land_ice_flux_mode) - if ( trim(config_land_ice_flux_mode) .eq. 'pressure_only' ) then - call seq_infodata_PutData( infodata, ocn_prognostic=.true., ocnrof_prognostic=.true., & - ocn_c2_glcshelf=.false.) - else if ( trim(config_land_ice_flux_mode) .eq. 'standalone' ) then + if ( trim(config_land_ice_flux_mode) == 'off' .or. & + trim(config_land_ice_flux_mode) == 'pressure_only' .or. & + trim(config_land_ice_flux_mode) == 'standalone' ) then call seq_infodata_PutData( infodata, ocn_prognostic=.true., ocnrof_prognostic=.true., & ocn_c2_glcshelf=.false.) else if ( trim(config_land_ice_flux_mode) .eq. 'coupled' ) then @@ -2784,7 +2783,8 @@ subroutine ocn_export_mct(o2x_o, errorCode) !{{{ !JW o2x_o % rAttr(index_o2x_So_htv, n) = landIceHeatTransferVelocity(i) !JW o2x_o % rAttr(index_o2x_So_stv, n) = landIceSaltTransferVelocity(i) - if ( trim(config_land_ice_flux_mode) .ne. 'pressure_only' ) then + if ( trim(config_land_ice_flux_mode) .eq. 'standalone' .or. & + trim(config_land_ice_flux_mode) .eq. 'coupled' ) then o2x_o % rAttr(index_o2x_So_blt, n) = landIceBoundaryLayerTracers(indexBLT,i) o2x_o % rAttr(index_o2x_So_bls, n) = landIceBoundaryLayerTracers(indexBLS,i) o2x_o % rAttr(index_o2x_So_htv, n) = landIceTracerTransferVelocities(indexHeatTrans,i) diff --git a/components/mpas-ocean/src/Registry.xml b/components/mpas-ocean/src/Registry.xml index ec873e494282..c95693a04bb0 100644 --- a/components/mpas-ocean/src/Registry.xml +++ b/components/mpas-ocean/src/Registry.xml @@ -1129,14 +1129,26 @@ possible_values=".true. or .false." /> - + + + @@ -1640,6 +1652,8 @@ + + + + + + @@ -1990,6 +2008,7 @@ immutable="true"> + @@ -2106,6 +2125,8 @@ + + @@ -2742,8 +2763,8 @@ missing_value="FILLVAL" missing_value_mask="edgeMask" packages="forwardMode;analysisMode" /> - + + + = 0.0_RKIND)) cycle + if ((landIceFloatingMask(iCell) == 0) .or. (landIceFreshwaterFlux(iCell) >= 0.0_RKIND)) cycle landIceInterfaceTracers(indexIS,iCell) = freezeInterfaceSalinity(iCell) landIceInterfaceTracers(indexIT,iCell) = freezeInterfaceTemperature(iCell) @@ -593,7 +593,7 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, & end do else ! not using Holland and Jenkins advection/diffusion call compute_melt_fluxes( & - landIceMask, & + landIceFloatingMask, & landIceBoundaryLayerTracers(indexBLT,:), & landIceBoundaryLayerTracers(indexBLS,:), & landIceTracerTransferVelocities(indexHeatTrans,:), & @@ -613,13 +613,13 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, & end if end if - ! modulate the fluxes by the landIceFraction + ! modulate the fluxes by the landIceFloatingFraction do iCell = 1, nCells - if (landIceMask(iCell) == 0) cycle + if (landIceFloatingMask(iCell) == 0) cycle - landIceFreshwaterFlux(iCell) = landIceFraction(iCell)*landIceFreshwaterFlux(iCell) - landIceHeatFlux(iCell) = landIceFraction(iCell)*landIceHeatFlux(iCell) - heatFluxToLandIce(iCell) = landIceFraction(iCell)*heatFluxToLandIce(iCell) + landIceFreshwaterFlux(iCell) = landIceFloatingFraction(iCell)*landIceFreshwaterFlux(iCell) + landIceHeatFlux(iCell) = landIceFloatingFraction(iCell)*landIceHeatFlux(iCell) + heatFluxToLandIce(iCell) = landIceFloatingFraction(iCell)*heatFluxToLandIce(iCell) end do endif ! jenkinsOn or hollandJenkinsOn diff --git a/components/mpas-ocean/src/shared/mpas_ocn_tendency.F b/components/mpas-ocean/src/shared/mpas_ocn_tendency.F index 869f771f6fca..47f016faf2bd 100644 --- a/components/mpas-ocean/src/shared/mpas_ocn_tendency.F +++ b/components/mpas-ocean/src/shared/mpas_ocn_tendency.F @@ -351,7 +351,7 @@ subroutine ocn_tend_vel(domain, tendPool, statePool, forcingPool, & !$acc density, normRelVortEdge, montgomeryPotential, pressure, & !$acc thermExpCoeff, salineContractCoeff, tangentialVelocity, & !$acc layerThickEdgeFlux, kineticEnergyCell, sfcFlxAttCoeff, potentialDensity, & - !$acc vertAleTransportTop, vertViscTopOfEdge, wettingVelocity) + !$acc vertAleTransportTop, vertViscTopOfEdge, wettingVelocityFactor) !$acc enter data & !$acc copyin(tendVel, sfcStress, sfcStressMag, & !$acc ssh, normalVelocity, & @@ -452,15 +452,14 @@ subroutine ocn_tend_vel(domain, tendPool, statePool, forcingPool, & #ifdef MPAS_OPENACC !$acc parallel loop collapse(2) & - !$acc present(tendVel, wettingVelocity) + !$acc present(tendVel, wettingVelocityFactor) #else !$omp parallel !$omp do schedule(runtime) private(k) #endif do iEdge = 1, nEdgesAll do k=1,nVertLevels - tendVel(k,iEdge) = tendVel(k,iEdge)* & - (1.0_RKIND - wettingVelocity(k,iEdge)) + tendVel(k,iEdge) = tendVel(k,iEdge) * (1.0_RKIND - wettingVelocityFactor(k,iEdge)) end do end do #ifndef MPAS_OPENACC diff --git a/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F b/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F index 6b1bef26feeb..8e64618a27ad 100644 --- a/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F +++ b/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F @@ -442,6 +442,10 @@ subroutine init_land_ice_forcing(domain)!{{{ "landIceFraction", "land_ice_forcing", "timeVaryingForcing", "landIceFractionForcing", "linear", & config_time_varying_land_ice_forcing_reference_time, config_time_varying_land_ice_forcing_interval) + call mpas_forcing_init_field(domain % streamManager, forcingGroupHead, "ocn_land_ice_forcing", & + "landIceFloatingFraction", "land_ice_forcing", "timeVaryingForcing", "landIceFloatingFractionForcing", "linear", & + config_time_varying_land_ice_forcing_reference_time, config_time_varying_land_ice_forcing_interval) + call mpas_forcing_init_field(domain % streamManager, forcingGroupHead, "ocn_land_ice_forcing", & "landIceDraft", "land_ice_forcing", "timeVaryingForcing", "landIceDraftForcing", "linear", & config_time_varying_land_ice_forcing_reference_time, config_time_varying_land_ice_forcing_interval) @@ -495,6 +499,8 @@ subroutine land_ice_forcing(streamManager, domain, simulationClock)!{{{ real(kind=RKIND), dimension(:), pointer :: & landIceFractionForcing, & landIceFraction, & + landIceFloatingFractionForcing, & + landIceFloatingFraction, & landIcePressureForcing, & landIcePressure, & landIceDraftForcing, & @@ -525,15 +531,18 @@ subroutine land_ice_forcing(streamManager, domain, simulationClock)!{{{ call MPAS_pool_get_dimension(mesh, "nCells", nCells) call MPAS_pool_get_array(timeVaryingForcingPool, "landIceFractionForcing", landIceFractionForcing) + call MPAS_pool_get_array(timeVaryingForcingPool, "landIceFloatingFractionForcing", landIceFloatingFractionForcing) call MPAS_pool_get_array(timeVaryingForcingPool, "landIcePressureForcing", landIcePressureForcing) call MPAS_pool_get_array(timeVaryingForcingPool, "landIceDraftForcing", landIceDraftForcing) call MPAS_pool_get_array(forcingPool, "landIceFraction", landIceFraction) + call MPAS_pool_get_array(forcingPool, "landIceFloatingFraction", landIceFloatingFraction) call MPAS_pool_get_array(forcingPool, "landIcePressure", landIcePressure) call MPAS_pool_get_array(forcingPool, "landIceDraft", landIceDraft) do iCell = 1, nCells landIceFraction(iCell) = landIceFractionForcing(iCell) + landIceFloatingFraction(iCell) = landIceFloatingFractionForcing(iCell) landIcePressure(iCell) = landIcePressureForcing(iCell) landIceDraft(iCell) = landIceDraftForcing(iCell) diff --git a/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F b/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F index 1f8fb440c287..60eb0eb184c8 100644 --- a/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F +++ b/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F @@ -189,7 +189,7 @@ subroutine ocn_tracer_ideal_age_init(domain,err)!{{{ if (config_use_idealAgeTracers_idealAge_forcing) then ! set mask = 0 for open ocean - ! set mask = 1 under ice shelves + ! set mask = 1 under land ice idealAgeMask = 0.0_RKIND if (associated(landIceMask)) then where (landIceMask /= 0) idealAgeMask(1,:) = 1.0_RKIND diff --git a/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F b/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F index 1d96c5c8c047..a288a6eaf829 100644 --- a/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F +++ b/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F @@ -32,6 +32,7 @@ module ocn_wetting_drying use ocn_diagnostics use ocn_diagnostics_variables use ocn_gm + use ocn_mesh implicit none private @@ -108,16 +109,13 @@ subroutine ocn_wetting_drying_verify( block , minHeight, err)!{{{ !----------------------------------------------------------------- type (mpas_pool_type), pointer :: statePool, meshPool, tendPool - integer, dimension(:), pointer :: minLevelCell, maxLevelCell real (kind=RKIND), dimension(:), pointer :: sshSubcycleNew - real (kind=RKIND), dimension(:), pointer :: bottomDepth integer, pointer :: nCellsSolve integer :: iCell, k integer :: debugUnit real (kind=RKIND), dimension(:,:), pointer :: layerThicknessCur real (kind=RKIND), dimension(:,:), pointer :: layerThicknessNew real (kind=RKIND), dimension(:,:), pointer :: layerThicknessTend - real (kind=RKIND), dimension(:), pointer :: lonCell, latCell real (kind=RKIND) :: minThickness, layerThick character (len=StrKIND) :: debugFilename @@ -134,13 +132,8 @@ subroutine ocn_wetting_drying_verify( block , minHeight, err)!{{{ call mpas_pool_get_array(statePool, 'layerThickness', layerThicknessCur, timeLevel=1) call mpas_pool_get_array(statePool, 'layerThickness', layerThicknessNew, timeLevel=2) call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCellsSolve) - call mpas_pool_get_array(meshPool, 'minLevelCell', minLevelCell) - call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell) call mpas_pool_get_array(tendPool, 'layerThickness', layerThicknessTend) call mpas_pool_get_array(statePool, 'sshSubcycle', sshSubcycleNew, 2) - call mpas_pool_get_array(meshPool, 'bottomDepth', bottomDepth) - call mpas_pool_get_array(meshPool, 'lonCell', lonCell) - call mpas_pool_get_array(meshPool, 'latCell', latCell) err = 0 @@ -151,8 +144,11 @@ subroutine ocn_wetting_drying_verify( block , minHeight, err)!{{{ do iCell = 1, nCellsSolve do k = minLevelCell(iCell), maxLevelCell(iCell) ! use ssh as a proxy too for baroclinic mode - if (trim(config_time_integrator) == 'split_explicit' .or. trim(config_time_integrator) == 'split_implicit') then - layerThick = min(layerThicknessNew(k, iCell), (sshSubcycleNew(iCell)+bottomDepth(iCell))/maxLevelCell(iCell)) + ! Note: wetting-drying currently not supported for either of these time integration methods + if (trim(config_time_integrator) == 'split_explicit' .or. & + trim(config_time_integrator) == 'split_implicit') then + layerThick = min(layerThicknessNew(k, iCell), & + (sshSubcycleNew(iCell) + bottomDepth(iCell))/maxLevelCell(iCell)) else layerThick = layerThicknessNew(k, iCell) end if @@ -237,72 +233,61 @@ subroutine ocn_prevent_drying_rk4(block, dt, rkSubstepWeight, config_zero_drying type (mpas_pool_type), pointer :: tendPool - type (mpas_pool_type), pointer :: meshPool type (mpas_pool_type), pointer :: statePool type (mpas_pool_type), pointer :: provisStatePool + real (kind=RKIND), dimension(:), pointer :: ssh real (kind=RKIND), dimension(:, :), pointer :: layerThicknessCur real (kind=RKIND), dimension(:, :), pointer :: layerThicknessProvis real (kind=RKIND), dimension(:, :), pointer :: normalVelocity - integer, dimension(:), pointer :: minLevelEdgeTop - integer, dimension(:), pointer :: maxLevelEdgeTop - integer, dimension(:), pointer :: maxLevelEdgeBot - integer, pointer :: nEdges integer :: iEdge, k err = 0 call mpas_pool_get_subpool(block % structs, 'tend', tendPool) - call mpas_pool_get_subpool(block % structs, 'mesh', meshPool) call mpas_pool_get_subpool(block % structs, 'state', statePool) call mpas_pool_get_subpool(block % structs, 'provis_state', provisStatePool) + call mpas_pool_get_array(statePool, 'ssh', ssh, 1) call mpas_pool_get_array(statePool, 'normalVelocity', normalVelocity, 1) ! use thickness at n because constraint is h_n + dt*T_h > h_min call mpas_pool_get_array(statePool, 'layerThickness', layerThicknessCur, 1) call mpas_pool_get_array(provisStatePool, 'layerThickness', layerThicknessProvis, 1) - call mpas_pool_get_dimension(block % dimensions, 'nEdges', nEdges) - call mpas_pool_get_array(meshPool, 'minLevelEdgeTop', minLevelEdgeTop) - call mpas_pool_get_array(meshPool, 'maxLevelEdgeTop', maxLevelEdgeTop) - call mpas_pool_get_array(meshPool, 'maxLevelEdgeBot', maxLevelEdgeBot) !$omp parallel !$omp do schedule(runtime) - do iEdge = 1, nEdges - wettingVelocity(:, iEdge) = 0.0_RKIND + do iEdge = 1, nEdgesAll + wettingVelocityFactor(:, iEdge) = 0.0_RKIND end do !$omp end do !$omp end parallel - ! ensure cells stay wet by selectively damping cells with a damping tendency to make sure tendency doesn't dry cells + ! ensure cells stay wet by selectively damping cells with a damping tendency to make + ! sure tendency doesn't dry cells - call ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, & - normalTransportVelocity, rkSubstepWeight, wettingVelocity, err) + call ocn_wetting_drying_wettingVelocity(layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, & + normalTransportVelocity, rkSubstepWeight, wettingVelocityFactor, err) - ! prevent drying from happening with selective wettingVelocity - !$omp parallel - !$omp do schedule(runtime) private(k) - do iEdge = 1, nEdges - do k = minLevelEdgeTop(iEdge), maxLevelEdgeBot(iEdge) - if (abs(normalTransportVelocity(k,iEdge) + wettingVelocity(k,iEdge)) < eps) then - ! prevent spurious flux for close to zero values - normalTransportVelocity(k, iEdge) = 0.0_RKIND - normalVelocity(k, iEdge) = 0.0_RKIND - else if (abs(normalTransportVelocity(k,iEdge) + wettingVelocity(k,iEdge)) <= abs(normalTransportVelocity(k,iEdge))) then - normalTransportVelocity(k, iEdge) = normalTransportVelocity(k, iEdge) + wettingVelocity(k, iEdge) - normalVelocity(k, iEdge) = normalVelocity(k, iEdge) + wettingVelocity(k, iEdge) - end if - - if (abs(wettingVelocity(k, iEdge)) > 0.0_RKIND .and. config_zero_drying_velocity) then - normalTransportVelocity(k, iEdge) = 0.0_RKIND - normalVelocity(k, iEdge) = 0.0_RKIND - end if + ! prevent drying from happening with selective wettingVelocityFactor + if (config_zero_drying_velocity) then + !$omp parallel + !$omp do schedule(runtime) private(k) + do iEdge = 1, nEdgesAll + do k = minLevelEdgeTop(iEdge), maxLevelEdgeBot(iEdge) + + if (abs(wettingVelocityFactor(k, iEdge)) > 0.0_RKIND) then + normalTransportVelocity(k, iEdge) = (1.0_RKIND - & + wettingVelocityFactor(k, iEdge)) * normalTransportVelocity(k, iEdge) + normalVelocity(k, iEdge) = (1.0_RKIND - & + wettingVelocityFactor(k, iEdge)) * normalVelocity(k, iEdge) + end if + end do end do - end do - !$omp end do - !$omp end parallel + !$omp end do + !$omp end parallel + end if end subroutine ocn_prevent_drying_rk4 !}}} @@ -319,9 +304,9 @@ end subroutine ocn_prevent_drying_rk4 !}}} !> to prevent cells from drying. ! !----------------------------------------------------------------------- - subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, & + subroutine ocn_wetting_drying_wettingVelocity(layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, & - normalVelocity, dt, wettingVelocity, err)!{{{ + normalVelocity, dt, wettingVelocityFactor, err)!{{{ !----------------------------------------------------------------- ! @@ -329,9 +314,6 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, laye ! !----------------------------------------------------------------- - type (mpas_pool_type), intent(in) :: & - meshPool !< Input: horizonal mesh information - real (kind=RKIND), dimension(:,:), intent(in) :: & layerThicknessCur !< Input: layer thickness at old time @@ -354,7 +336,7 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, laye !----------------------------------------------------------------- real (kind=RKIND), dimension(:,:), intent(inout) :: & - wettingVelocity !< Input/Output: velocity wettingVelocityency + wettingVelocityFactor !< Input/Output: velocity wettingVelocityFactor !----------------------------------------------------------------- ! @@ -370,74 +352,71 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, laye ! !----------------------------------------------------------------- - integer :: iEdge, iCell, k, i - integer, pointer :: nVertLevels, nCells - integer, dimension(:), pointer :: nEdgesOnCell - integer, dimension(:), pointer :: minLevelCell, maxLevelCell - integer, dimension(:), pointer :: minLevelEdgeTop, maxLevelEdgeBot - integer, dimension(:,:), pointer :: edgesOnCell - integer, dimension(:,:), pointer :: edgeSignOnCell + integer :: cell1, cell2, iEdge, iCell, k, i - real (kind=RKIND) :: divOutFlux - real (kind=RKIND) :: invAreaCell + real (kind=RKIND) :: divFlux, divOutFlux real (kind=RKIND) :: layerThickness - real (kind=RKIND), dimension(:), pointer :: dvEdge - real (kind=RKIND), dimension(:), pointer :: areaCell + real (kind=RKIND) :: hCrit, hRampMin, hEdgeTotal + + character (len=100) :: log_string err = 0 - call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell) - call mpas_pool_get_array(meshPool, 'areaCell', areaCell) - call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) - call mpas_pool_get_array(meshPool, 'edgeSignOnCell', edgeSignOnCell) - call mpas_pool_get_array(meshPool, 'minLevelCell', minLevelCell) - call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell) - call mpas_pool_get_array(meshPool, 'minLevelEdgeTop', minLevelEdgeTop) - call mpas_pool_get_array(meshPool, 'maxLevelEdgeBot', maxLevelEdgeBot) - call mpas_pool_get_array(meshPool, 'dvEdge', dvEdge) - call mpas_pool_get_dimension(meshPool, 'nCells', nCells) - call mpas_pool_get_dimension(meshPool, 'nVertLevels', nVertLevels) + hCrit = config_drying_min_cell_height + + if (.not. config_zero_drying_velocity) return ! need predicted transport velocity to limit drying flux !$omp parallel - !$omp do schedule(runtime) private(invAreaCell, i, iEdge, k, divOutFlux, layerThickness) - do iCell = 1, nCells - invAreaCell = 1.0_RKIND / areaCell(iCell) - ! can switch with maxLevelEdgeBot(iEdge) + !$omp do schedule(runtime) private(i, iEdge, k, divOutFlux, layerThickness) + do iCell = 1, nCellsAll do k = minLevelCell(iCell), maxLevelCell(iCell) divOutFlux = 0.0_RKIND layerThickness = min(layerThicknessProvis(k, iCell), layerThicknessCur(k, iCell)) do i = 1, nEdgesOnCell(iCell) iEdge = edgesOnCell(i, iCell) - if (k <= maxLevelEdgeBot(iEdge) .or. k >= minLevelEdgeTop(iEdge)) then + if (k <= maxLevelEdgeTop(iEdge) .and. k >= minLevelEdgeBot(iEdge)) then ! only consider divergence flux leaving the cell if ( normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) < 0.0_RKIND ) then - divOutFlux = divOutFlux + normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) & - * layerThickEdgeFlux(k, iEdge) * dvEdge(iEdge) * invAreaCell + divOutFlux = divOutFlux + & + normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) * & + layerThickEdgeFlux(k, iEdge) * dvEdge(iEdge) * & + invAreaCell(iCell) end if end if end do + layerThickness = layerThickness + dt * divOutFlux - ! if layer thickness is too small, limit divergence flux outwards with opposite velocity - if ((layerThickness + dt*divOutFlux ) <= (config_drying_min_cell_height + config_drying_safety_height)) then - ! limit divOutFlux out of cell to keep it wet - divOutFlux = abs(divOutFlux) - divOutFlux = (layerThickness - (config_drying_min_cell_height + eps)) / (dt*divOutFlux + eps) - + ! if layer thickness is too small, limit divergence flux outwards with + ! opposite velocity + if (layerThickness <= & + hCrit + config_drying_safety_height) then + do i = 1, nEdgesOnCell(iCell) + iEdge = edgesOnCell(i, iCell) + if (k <= maxLevelEdgeBot(iEdge) .and. k >= minLevelEdgeTop(iEdge)) then + if ( normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) <= 0.0_RKIND ) then + wettingVelocityFactor(k, iEdge) = 1.0_RKIND + end if + end if + end do + elseif (config_zero_drying_velocity_ramp .and. & + (layerThickness > & + hCrit + config_drying_safety_height) .and. & + (layerThickness <= config_zero_drying_velocity_ramp_hmax)) then + + hRampMin = config_zero_drying_velocity_ramp_hmin + ! Following O'Dea et al. (2021), if total upwinded wct is less than + ! 2*critical thickness, apply damping at each edge do i = 1, nEdgesOnCell(iCell) iEdge = edgesOnCell(i, iCell) - if (k <= maxLevelEdgeBot(iEdge) .or. k >= minLevelEdgeTop(iEdge)) then + if (k <= maxLevelEdgeBot(iEdge) .and. k >= minLevelEdgeTop(iEdge)) then if ( normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) <= 0.0_RKIND ) then - ! each outgoing velocity is penalized (but not the incoming, wetting velocities) - ! square the fractional term to make values near zero go to zero much quicker (to prevent threshold from being hit) - wettingVelocity(k, iEdge) = - (min(max(0.0_RKIND, 1.0_RKIND - (divOutFlux*divOutFlux)), 1.0_RKIND)) * normalVelocity(k, iEdge) - ! just go with simple boolean approach for zero wetting velocity for debugging purposes - if (config_zero_drying_velocity) then - wettingVelocity(k, iEdge) = 1.0_RKIND - end if + wettingVelocityFactor(k, iEdge) = 1.0_RKIND - & + tanh(50.0_RKIND * (layerThickness - hRampMin)/hRampMin) end if end if end do + end if end do diff --git a/share/util/shr_reprosum_mod.F90 b/share/util/shr_reprosum_mod.F90 index cde01474fdea..06631657cfee 100644 --- a/share/util/shr_reprosum_mod.F90 +++ b/share/util/shr_reprosum_mod.F90 @@ -282,10 +282,10 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & ! models use the same base, e.g. ! radix(1.0_r8) == radix(1_i8) ! for real*8 and integer*8. If not, then the alternative algorithm -! mentioned above and described below is used instead. The integer -! representation must also have enough digits for the potential growth -! of the sum for each level, so could conceivably be too small for a -! large number of summands. +! DDPDD mentioned above and described below is used instead. The +! integer representation must also have enough digits for the +! potential growth of the sum for each level, so could conceivably be +! too small for a large number of summands. ! ! Upper bounds on the total number of summands and on all intermediate ! sums are calculated as @@ -322,26 +322,38 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & ! conversion of the vector of integer representation to a floating ! point value may be subject to rounding errors. Before the ! conversion, the vector of integers is adjusted so that all elements -! have the same sign, eliminating the possibility of catastrophic -! cancellation. These are integer operations, so no accuracy is -! lost. Next, each element of the vector of integers is converted to a -! floating point value and added into the intermediate sum, in -! smallest to largest order (in absolute value). Any rounding error is -! in the least significant digit for each intermediate sum, and the -! exponents for each summand are monotonically increasing, so the -! rounding errors do not accumulate. Thus, the relative difference -! between the 'exact' value and that calculated by this algorithm will -! be approximately machine epsilon for 64-bit real values, and any -! difference will be restricted to the 16th significant digit in a -! decimal representation. This does mean that the values calculated -! with the integer*8 internal representation could differ from those -! calculated using the integer*4 internal representation, but the -! differences will be in the least significant digits. Similarly, the -! results may change (in the least significant digits) with a change -! in system, compiler, or compiler flags. However, for a fixed -! internal representation, processor architecture, compiler, and -! compiler options, the result will (still) be reproducible with -! respect to MPI task and OpenMP thread counts. +! have the same sign, and so that the value, in absolute value, at a +! given level is strictly less than what can be represented at the +! next lower level (larger exponent) and strictly greater than what +! can represented at the next higher level (smaller exponent). Since +! all elements have the same sign, the sign is set to positive +! temporarily and then restored when the conversion to floating point +! is complete. These are all integer operations, so no accuracy is +! lost. These adjustments eliminate the possibility of catastrophic +! cancellation. Also, when converting the individual elements to +! floating point values and summing them, the summation is now +! equivalent to concatenating the digits in the mantissas for the +! component summands. In consequence, in the final step when each +! element of this modified vector of integers is converted to a +! floating point value and added into the intermediate sum, any +! rounding is limited to the least significant digit representable +! in the final floating point sum. +! +! Any such rounding error will be sensitive to the particular floating +! values generated from the integer vector, and so will be +! sensitive to the number of levels in the vector and the implicit +! exponent associated with each level, which are themselves functions +! of the numbers of MPI tasks and OpenMP threads and the number of +! digits representable in an integer. To avoid this sensitivity, +! (effectively) generate a new integer vector in which each component +! integer has a fixed number of significant digits (e.g., +! digits(1.0_r8)) and generate the floating point values from these +! before summing. (See comments in code for more details.) This +! creates a sequence of floating point values to be summed that are +! independent of, for example, the numbers of MPI tasks and OpenMP +! threads or whether using integer*8 or integer*4 internal +! representations in the integer vector, and thus ensure +! reproducibility with respect to these options. ! ! Description of optional parameters for integer vector algorithm: !----------------------------------------------------------------- @@ -1135,7 +1147,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & ! ! Local workspace ! - integer, parameter :: max_jlevel = & + integer, parameter :: max_svlevel_factor = & 1 + (digits(1_i8)/digits(1.0_r8)) integer(i8) :: i8_arr_tlsum_level(-(extra_levels-1):max_level,nflds,omp_nthreads) @@ -1149,9 +1161,10 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & integer(i8) :: i8_arr_gsum_level((max_level+extra_levels+2)*nflds) ! integer vector representing global ! sum - integer(i8) :: IX_8 ! integer representation of current - ! jlevels of X_8 ('part' of - ! i8_arr_gsum_level) + integer(i8) :: i8_gsum_level(-(extra_levels-1):max_level) + ! integer vector representing global + ! sum for one field + integer(i8) :: IX_8 ! integer representation of r8 value integer(i8) :: i8_sign ! sign global sum integer(i8) :: i8_radix ! radix for i8 variables (and r8 ! variables by earlier if-test) @@ -1164,7 +1177,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & integer :: isum_beg(omp_nthreads), isum_end(omp_nthreads) ! range of summand indices for each ! OpenMP thread - integer :: ifld, isum, ithread + integer :: ifld, isum, ithread, jlevel ! loop variables integer :: arr_exp ! exponent of summand integer :: arr_shift ! exponent used to generate integer @@ -1178,13 +1191,28 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & integer :: min_level ! index of minimum levels (including ! extra levels) for i8_arr_tlsum_level integer :: ioffset ! offset(ifld) - integer :: jlevel ! number of floating point 'pieces' - ! extracted from a given i8 integer + integer :: svlevel ! number of summands in summand_vector integer :: ierr ! MPI error return - integer :: LX(max_jlevel) ! exponent of X_8 (see below) + integer :: LX ! exponent of X_8 (see below) integer :: veclth ! total length of i8_arr_lsum_level - integer :: sum_digits ! lower bound on number of significant - ! in integer expansion of sum + integer :: i8_digit_count ! number of digits in integer + ! expansion of sum + integer :: i8_begin_level ! level starting from in + ! creating next 'exactly representable' + ! floating point value from modified + ! integer expansion of the sum + integer :: i8_trunc_level ! level at which the number of digits in + ! the modified integer expansion of the + ! sum exceeds the number of representable + ! digits in the floating point sum + integer :: i8_trunc_loc ! location of last digit at i8_trunc_level + ! in the modified integer expansion of the + ! sum that is representable in the floating + ! point sum + integer(i8) :: i8_trunc_level_rem + ! truncated digits at i8_trunc_level + ! in the modified integer expansion + ! of the sum integer :: curr_exp ! exponent of partial sum during ! reconstruction from integer vector integer :: corr_exp ! exponent of current summand in @@ -1193,18 +1221,21 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & real(r8) :: arr_frac ! fraction of summand real(r8) :: arr_remainder ! part of summand remaining after ! current level of integer expansion - real(r8) :: X_8(max_jlevel) ! r8 vector representation of current + real(r8) :: X_8 ! r8 representation of current ! i8_arr_gsum_level - real(r8) :: RX_8 ! r8 representation of difference - ! between current i8_arr_gsum_level - ! and current jlevels of X_8 - ! (== IX_8). Also used in final - ! scaling step - - logical :: first ! flag used to indicate that just - ! beginning reconstruction of sum - ! from integer vector - + real(r8) :: RX_8 ! r8 representation of (other) + ! integers used in calculation. + real(r8) :: summand_vector((max_level+extra_levels)*max_svlevel_factor) + ! vector of r8 values generated from + ! integer vector representation to be + ! summed to generate global sum + + logical :: first_stepd_iteration + ! flag used to indicate whether first + ! time through process of converting + ! vector of integers into a floating + ! point value, as it requires + ! special logic ! !------------------------------------------------------------------------ ! Save radix of i8 variables in an i8 variable @@ -1312,7 +1343,6 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & endif enddo - ! Postprocess integer vector to eliminate possibility of overflow ! during subsequent sum over threads and tasks, as per earlier ! comment on logic behind definition of max_nsummands. If value at a @@ -1425,27 +1455,79 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & call t_stopf("repro_sum_allr_i8") #endif + call t_startf('repro_sum_finalsum') ! Construct global sum from integer vector representation: ! 1) arr_max_shift is the shift applied to fraction(arr_gmax) . -! When shifting back, need to 'add back in' true arr_gmax exponent. This was -! removed implicitly by working only with the fraction . -! 2) Want to add levels into sum in reverse order (smallest to largest). However, -! even this can generate floating point rounding errors if signs of integers -! alternate. To avoid this, do some arithmetic with integer vectors so that all -! components have the same sign. This should keep relative difference between -! using different integer sizes (e.g. i8 and i4) to machine epsilon -! 3) Assignment to X_8 will usually lose accuracy since maximum integer -! size is greater than the max number of 'digits' in r8 value (if xmax_nsummands -! correction not very large). Calculate remainder and add in first (since -! smaller). One correction is sufficient for r8 (53 digits) and i8 (63 digits). -! For r4 (24 digits) may need to correct twice. Code is written in a general -! fashion, to work no matter how many corrections are necessary (assuming -! max_jlevel parameter calculation is correct). +! When shifting back, need to 'add back in' the true arr_gmax exponent. +! This was removed implicitly by working only with the fraction. +! 2) To avoid the possibility of catastrophic cancellation, and +! an unacceptable floating point rounding error, can do some arithmetic +! with the integer vector so that all components have the same sign. +! 3) If convert each integer in the integer vector to a floating +! point value and then add these together, smallest to largest, to +! calculate the final sum, there may be roundoff error in the least +! significant digit. This error will be sensitive to the particular +! floating values generated from the integer vector, and so will be +! sensitive to the number of levels in the vector and the implicit +! exponent associated with each level. So this approach is not +! guaranteed to be reproducible with respect to a change in the +! number of MPI tasks and OpenMP threads (as this changes the +! definition of max_nsummands, and thus also arr_max_shift). It is +! also not guaranteed to be reproducible with respect to changing +! the integer size, e.g. from i8 to i4, as this also changes +! arr_max_shift. However, can eliminate this potential loss of +! reproducibility by taking the following steps. +! a) Manipulate the integer vector so that +! i) the component values do not 'overlap', that is, the value +! represented by a component is strictly less than the value +! represented by the least significant digit in the previous +! component, and +! ii) all components are positive (saving the sign to be restored +! to the final result). +! b) Identify the digit in the resulting integer vector that is the +! last representable in the floating point representation, then +! truncate the vector at this point, i.e., all digits of lesser +! significance in the given component and all components +! representing digits of lesser significance (call this the +! remainder). +! c) Convert each integer component in the modified integer vector +! to its corresponding floating point value and sum the +! sequence. (Order is unimportant, as explained below, but here +! add largest to smallest.) +! d) Repeat (b) and (c) for the remainder (recursively, as +! necessary). +! e) Sum all floating point numbers generated by step (c), smallest +! to largest. +! f) Restore the sign. +! With the manipulations in (a) and (b), the summation in (c) is +! equivalent to concatenating the digits in the mantissas for the +! component summands, so rounding is irrelevant (so far). Repeating +! this with the remainder(s) generates a sequence of 'exact' +! floating point numbers. Summing these can still generate a +! rounding error in the least significant digit in the largest +! floating point value (which is the last representable digit in the +! final result), but the floating point values being summed and +! order of summation are independent of the number of levels and +! implicit exponents, so reproducibility is ensured. +! +! Note that assignment of an i8 integer value to an r8 floating point +! variable in step (c) can lead to a loss of accuracy because the +! maximum number of digits in the i8 integer can be greater than the +! maximum number of digits representable in the r8 variable (if the +! xmax_nsummands correction is not very large). With the same sign +! and nonoverlapping properties of the integer components, these lost +! digits will also not be representable in the final sum. The process +! described above of truncating at this last representable digit, and +! then separately generating floating point value(s) for the +! remainder, takes care of this automatically. Similar reasoning +! applies to r4 floating point values with either i8 or i4 integer +! components. recompute = .false. do ifld=1,nflds arr_gsum(ifld) = 0.0_r8 ioffset = offset(ifld) + svlevel = 0 ! If validate is .true., test whether the summand upper bound ! was exceeded on any of the MPI tasks @@ -1456,9 +1538,17 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & endif if (.not. recompute) then +! Copy integer vector for current field from i8_arr_gsum_level, so that +! can be modified without changing i8_arr_gsum_level. (Preserving +! i8_arr_gsum_level unchanged is not necessary, but is convenient for debugging +! and makes indexing clearer and less error prone.) + i8_gsum_level(:) = 0_i8 + do ilevel=min_level,max_levels(ifld) + i8_gsum_level(ilevel) = i8_arr_gsum_level(ioffset+ilevel) + enddo -! Preprocess integer vector: -! a) If value larger than or equal to (radix(1_i8)**arr_max_shift), +! Preprocess integer vector (as described in 3(a) above): +! i) If value larger than or equal to (radix(1_i8)**arr_max_shift), ! add this 'overlap' to the value at the next smaller level ! in the vector, resulting in nonoverlapping ranges for each ! component. @@ -1489,144 +1579,248 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & ! -(extra_levels-1), does not have to be less than ! (radix(1_i8)**arr_max_shift) for this 'nonoverlap' property to ! hold. - do ilevel=max_levels(ifld),min_level+1,-1 - if (abs(i8_arr_gsum_level(ioffset+ilevel)) >= & - (i8_radix**arr_max_shift)) then - - IX_8 = i8_arr_gsum_level(ioffset+ilevel) & + do ilevel=max_levels(ifld),min_level+1,-1 + if (abs(i8_gsum_level(ilevel)) >= & + (i8_radix**arr_max_shift)) then + + IX_8 = i8_gsum_level(ilevel) & / (i8_radix**arr_max_shift) - i8_arr_gsum_level(ioffset+ilevel-1) = & - i8_arr_gsum_level(ioffset+ilevel-1) + IX_8 - + i8_gsum_level(ilevel-1) = & + i8_gsum_level(ilevel-1) + IX_8 + IX_8 = IX_8*(i8_radix**arr_max_shift) - i8_arr_gsum_level(ioffset+ilevel) = & - i8_arr_gsum_level(ioffset+ilevel) - IX_8 - endif - enddo -! b) Working consecutively from the first level with a nonzero value -! up to level max_levels(ifld), subtract +/- 1 from level with -! larger exponent (e.g., ilevel) and add +/- -! (i8_radix**arr_max_shift) to level with smaller exponent -! (ilevel+1), when necessary, so that the value at ilevel+1 -! has the same sign as the value at ilevel. Treat a zero value at -! ilevel+1 as always a different sign from the value at ilevel so -! that the process always makes this nonzero. (Otherwise, the -! wrong sign could be reintroduced by subtracting from a zero -! value at the next step.) When finished with the process values -! at all levels are either greater than or equal to zero or all -! are less than or equal to zero. Note that this can decrease (but -! not increase) the absolute value at level -(extra_levels-1) by -! 1. All other levels are now less than or equal to -! (radix(1_i8)**arr_max_shift) in absolute value rather than -! strictly less than. - ilevel = min_level - do while ((i8_arr_gsum_level(ioffset+ilevel) == 0_i8) & - .and. (ilevel < max_levels(ifld))) - ilevel = ilevel + 1 - enddo -! - if (ilevel < max_levels(ifld)) then - if (i8_arr_gsum_level(ioffset+ilevel) > 0_i8) then - i8_sign = 1_i8 - else - i8_sign = -1_i8 - endif - do jlevel=ilevel,max_levels(ifld)-1 - if ((sign(1_i8,i8_arr_gsum_level(ioffset+jlevel)) & - /= sign(1_i8,i8_arr_gsum_level(ioffset+jlevel+1))) & - .or. (i8_arr_gsum_level(ioffset+jlevel+1) == 0_i8)) then - i8_arr_gsum_level(ioffset+jlevel) = & - i8_arr_gsum_level(ioffset+jlevel) - i8_sign - i8_arr_gsum_level(ioffset+jlevel+1) = & - i8_arr_gsum_level(ioffset+jlevel+1) & - + i8_sign*(i8_radix**arr_max_shift) - endif - enddo + i8_gsum_level(ilevel) = & + i8_gsum_level(ilevel) - IX_8 + endif + enddo + +! ii) Working consecutively from the first level with a nonzero value +! up to level max_levels(ifld), subtract +/- 1 from level with +! larger exponent (e.g., ilevel) and add +/- +! (i8_radix**arr_max_shift) to level with smaller exponent +! (ilevel+1), when necessary, so that the value at ilevel+1 +! has the same sign as the value at ilevel. Treat a zero value at +! ilevel+1 as always a different sign from the value at ilevel so +! that the process always makes this nonzero. (Otherwise, the +! wrong sign could be reintroduced by subtracting from a zero +! value at the next step.) When finished with the process values +! at all levels are either greater than or equal to zero or all +! are less than or equal to zero. Note that this can decrease +! (but not increase) the absolute value at level +! -(extra_levels-1) by 1. All other levels are now less than or +! equal to (radix(1_i8)**arr_max_shift) in absolute value rather +! than strictly less than. + ilevel = min_level + do while ((i8_gsum_level(ilevel) == 0_i8) & + .and. (ilevel < max_levels(ifld))) + ilevel = ilevel + 1 + enddo +! + if (i8_gsum_level(ilevel) < 0_i8) then + i8_sign = -1_i8 + else + i8_sign = 1_i8 + endif +! + if (ilevel < max_levels(ifld)) then + do jlevel=ilevel,max_levels(ifld)-1 + if ((sign(1_i8,i8_gsum_level(jlevel)) & + /= sign(1_i8,i8_gsum_level(jlevel+1)))& + .or. (i8_gsum_level(jlevel+1) == 0_i8)) then + i8_gsum_level(jlevel) = & + i8_gsum_level(jlevel) - i8_sign + i8_gsum_level(jlevel+1) = & + i8_gsum_level(jlevel+1) & + + i8_sign*(i8_radix**arr_max_shift) + endif + enddo endif -! Start with maximum shift, and work up to larger values - arr_shift = arr_gmax_exp(ifld) & - - max_levels(ifld)*arr_max_shift - curr_exp = 0 - first = .true. - do ilevel=max_levels(ifld),min_level,-1 - - if (i8_arr_gsum_level(ioffset+ilevel) /= 0_i8) then - jlevel = 1 - -! r8 representation of higher order bits in integer - X_8(jlevel) = i8_arr_gsum_level(ioffset+ilevel) - LX(jlevel) = exponent(X_8(jlevel)) - -! Calculate remainder - IX_8 = int(X_8(jlevel),i8) - RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8) - -! Repeat using remainder - do while (RX_8 /= 0.0_r8) - jlevel = jlevel + 1 - X_8(jlevel) = RX_8 - LX(jlevel) = exponent(RX_8) - IX_8 = IX_8 + int(RX_8,i8) - RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8) - enddo +! iii) If 'same sign' is negative, then change to positive +! temporarily. + if (i8_sign < 0_i8) then + do jlevel=ilevel,max_levels(ifld) + i8_gsum_level(jlevel) = -i8_gsum_level(jlevel) + enddo + endif + +! iv) Nonoverlap property can be lost after imposition of same sign +! over components. Reintroduce this property (retaining same sign +! property). Note that carryover is never more than '1' to the +! next smaller level, so, again, no intermediate or final sums +! will exceed the maximum value representable in i8, including +! level -(extra_levels-1) as long as digits(1_i8) >= 4. + do ilevel=max_levels(ifld),min_level+1,-1 + if (abs(i8_gsum_level(ilevel)) >= & + (i8_radix**arr_max_shift)) then + + IX_8 = i8_gsum_level(ilevel)/ & + (i8_radix**arr_max_shift) + i8_gsum_level(ilevel-1) = & + i8_gsum_level(ilevel-1) + IX_8 + + IX_8 = IX_8*(i8_radix**arr_max_shift) + i8_gsum_level(ilevel) = & + i8_gsum_level(ilevel) - IX_8 + endif + enddo -! Add in contributions, smaller to larger, rescaling for each -! addition to guarantee that exponent of working summand is always -! larger than minexponent - do while (jlevel > 0) - if (first) then - curr_exp = LX(jlevel) + arr_shift - arr_gsum(ifld) = fraction(X_8(jlevel)) - first = .false. +! Step 3(d): iterate over steps 3(b) and 3(c), truncating integer +! vector to 'fit' into a floating point value, then repeating with +! remainder + first_stepd_iteration = .true. + arr_shift = arr_gmax_exp(ifld) - (min_level)*arr_max_shift + i8_digit_count = 0 + i8_begin_level = min_level + do while (i8_begin_level <= max_levels(ifld)) + +! Determine at which level the total number of integer digits equals +! or exceeds the number of digits representable in the floating point +! sum. Then determine which digit at this level is the last +! representable in the floating point sum. Note that this location +! (i8_trunc_loc) is zero-based, i.e. smallest digit is at location +! 0. Note that the exponent is a count of the number of digits for the +! first nonzero level. All subsequent levels contribute arr_max_shift +! digits. + i8_trunc_loc = 0 + i8_trunc_level = max_levels(ifld) + do ilevel=i8_begin_level,max_levels(ifld) + if (first_stepd_iteration) then +! Special logic for first time through. Subsequent iterations treat +! leading zeroes as significant. + if (i8_digit_count == 0) then + if (i8_gsum_level(ilevel) /= 0_i8) then + X_8 = i8_gsum_level(ilevel) + LX = exponent(X_8) +! Note that even if i8_gsum_level(ilevel) is truncated when assigned +! to X_8, the exponent LX will still capture the original number of +! digits. + else + LX = 0 + endif else - corr_exp = curr_exp - (LX(jlevel) + arr_shift) - arr_gsum(ifld) = fraction(X_8(jlevel)) & - + scale(arr_gsum(ifld),corr_exp) - curr_exp = LX(jlevel) + arr_shift + LX = arr_max_shift endif - jlevel = jlevel - 1 - enddo + else +! If i8_digit_count /= 0 during the first iteration +! (ilevel == i8_begin_level), then there is a remainder left at the +! previous i8_trunc_level and LX should be set to zero for this +! iteration. + if ((ilevel == i8_begin_level) .and. (i8_digit_count /= 0)) then + LX = 0 + else + LX = arr_max_shift + endif + endif + if (i8_digit_count + LX >= digits(1.0_r8)) then + i8_trunc_level = ilevel + i8_trunc_loc = (i8_digit_count + LX) - digits(1.0_r8) + exit + else + i8_digit_count = i8_digit_count + LX + endif + enddo + first_stepd_iteration = .false. + +! Truncate at i8_trunc_loc as needed and determine what the remainder +! is. + if (i8_trunc_loc == 0) then +! No truncation is necessary, and remainder is just the components +! for the remaining levels + i8_trunc_level_rem = 0 + else +! Shift right to identify the digits to be preserved and truncate +! there + IX_8 = i8_gsum_level(i8_trunc_level)/ & + (i8_radix**i8_trunc_loc) +! Shift left to put digits in the correct location (right fill with +! zeroes) + IX_8 = IX_8*(i8_radix**i8_trunc_loc) +! Calculate local remainder + i8_trunc_level_rem = (i8_gsum_level(i8_trunc_level) - IX_8) +! Update level with the truncated value + i8_gsum_level(i8_trunc_level) = IX_8 + endif + +! Calculate floating point value corresponding to modified integer +! vector. Note that, by construction, i8 integer value will fit into +! r8 floating point value, so do not need to test for this. + svlevel = svlevel + 1 + summand_vector(svlevel) = 0.0_r8 + do ilevel=i8_begin_level,i8_trunc_level + if (i8_gsum_level(ilevel) /= 0_i8) then + +! Convert integer to floating point representation + X_8 = i8_gsum_level(ilevel) + LX = exponent(X_8) + +! Add to vector of floating point summands, scaling first if exponent +! is too small to apply directly + curr_exp = LX + arr_shift + if (curr_exp >= MINEXPONENT(1.0_r8)) then + summand_vector(svlevel) = & + summand_vector(svlevel) + set_exponent(X_8,curr_exp) + else + RX_8 = set_exponent(X_8, & + curr_exp-MINEXPONENT(1.0_r8)) + summand_vector(svlevel) = & + summand_vector(svlevel) + scale(RX_8,MINEXPONENT(1.0_r8)) + endif + + endif + +! Note that same arr_shift should be used for next 'step 3(d)' +! iteration if i8_trunc_loc > 0. + if ((ilevel < i8_trunc_level) .or. (i8_trunc_loc == 0)) then + arr_shift = arr_shift - arr_max_shift + endif + + enddo + if (i8_trunc_loc == 0) then + i8_digit_count = 0 + i8_begin_level = i8_trunc_level + 1 + else + i8_digit_count = i8_trunc_loc + i8_begin_level = i8_trunc_level + i8_gsum_level(i8_trunc_level) = i8_trunc_level_rem endif + + enddo - arr_shift = arr_shift + arr_max_shift +! Step 3(e): sum vector of floating point values, smallest to largest + arr_gsum(ifld) = 0.0_r8 + do jlevel=svlevel,1,-1 + arr_gsum(ifld) = arr_gsum(ifld) + summand_vector(jlevel) enddo -! Apply final exponent correction, scaling first if exponent is too small -! to apply directly - corr_exp = curr_exp + exponent(arr_gsum(ifld)) - if (corr_exp >= MINEXPONENT(1.0_r8)) then - arr_gsum(ifld) = set_exponent(arr_gsum(ifld),corr_exp) - else - RX_8 = set_exponent(arr_gsum(ifld), & - corr_exp-MINEXPONENT(1.0_r8)) - arr_gsum(ifld) = scale(RX_8,MINEXPONENT(1.0_r8)) - endif +! Step 3(f): restore the sign + arr_gsum(ifld) = i8_sign*arr_gsum(ifld) -! If validate is .true. and some precision lost, test whether 'too much' -! was lost, due to too loose an upper bound, too stringent a limit on number -! of levels of expansion, cancellation, .... Calculated by comparing lower -! bound on number of sigificant digits with number of digits in 1.0_r8 . +! If validate is .true. and some precision lost, test whether 'too +! much' was lost, due to too loose an upper bound, too stringent a +! limit on number of levels of expansion, cancellation, ... +! Calculated by comparing lower bound on number of significant digits +! with number of digits in 1.0_r8 . if (validate) then if (i8_arr_gsum_level(ioffset-voffset+2) /= 0_i8) then -! Find first nonzero level and use exponent for this level, then assume all -! subsequent levels contribute arr_max_shift digits. - sum_digits = 0 +! Find first nonzero level and use exponent for this level, then +! assume all subsequent levels contribute arr_max_shift digits. + i8_digit_count = 0 do ilevel=min_level,max_levels(ifld) - if (sum_digits == 0) then + if (i8_digit_count == 0) then if (i8_arr_gsum_level(ioffset+ilevel) /= 0_i8) then - X_8(1) = i8_arr_gsum_level(ioffset+ilevel) - LX(1) = exponent(X_8(1)) - sum_digits = LX(1) + X_8 = i8_arr_gsum_level(ioffset+ilevel) + LX = exponent(X_8) + i8_digit_count = LX endif else - sum_digits = sum_digits + arr_max_shift + i8_digit_count = i8_digit_count + arr_max_shift endif enddo - if (sum_digits < digits(1.0_r8)) then + if (i8_digit_count < digits(1.0_r8)) then recompute = .true. endif endif @@ -1635,6 +1829,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & endif enddo + call t_stopf('repro_sum_finalsum') end subroutine shr_reprosum_int @@ -1723,7 +1918,6 @@ logical function shr_reprosum_tolExceeded (name, nflds, master, & shr_reprosum_tolExceeded = .true. endif - end function shr_reprosum_tolExceeded ! @@ -1848,7 +2042,6 @@ subroutine DDPDD (dda, ddb, len, itype) ddb(i) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 ) enddo - end subroutine DDPDD ! !------------------------------------------------------------------------