diff --git a/cime_config/config_grids.xml b/cime_config/config_grids.xml
index add53e750c94..b3d1cf9f7aae 100755
--- a/cime_config/config_grids.xml
+++ b/cime_config/config_grids.xml
@@ -2936,7 +2936,7 @@
7268
1
- $DIN_LOC_ROOT/share/domains/domain.ocn.oQU240wLI.160929.nc
+ $DIN_LOC_ROOT/share/domains/domain.ocn.oQU240wLI_mask.160929.nc
oQU240wLI is an MPAS ocean mesh with quasi-uniform 240 km grid cells, nominally 2 degree resolution. Additionally, it has ocean under landice cavities.:
diff --git a/cime_config/testmods_dirs/config_pes_tests.xml b/cime_config/testmods_dirs/config_pes_tests.xml
index 590c0e6846d2..28c71b7b5259 100644
--- a/cime_config/testmods_dirs/config_pes_tests.xml
+++ b/cime_config/testmods_dirs/config_pes_tests.xml
@@ -28,6 +28,19 @@
+
+
+ tests+anvil: default, 4 nodes x MAX_MPITASKS_PER_NODE mpi x 1 omp @ root 0
+
+ -4
+ -4
+ -4
+ -4
+ -4
+ -4
+
+
+
@@ -57,7 +70,26 @@
+
+
+ test+anvil: any compset on ne4 grid -- 4 nodes pure-MPI
+
+ 108
+ 108
+ 108
+ 36
+ 36
+ 36
+
+
+ 108
+ 108
+ 108
+
+
+
+
@@ -75,6 +107,62 @@
+
+
+ tests+anvil: pelayout for tri-grid BGC tests with EAM+DOCN
+
+ -8
+ -8
+ -8
+ -8
+ -8
+ -8
+
+
+
+ --compset WCYCL* --res ne30pg2_EC30to60E2r2 on 16 nodes pure-MPI, ~2.7 sypd
+
+ 396
+ 396
+ 396
+ 396
+ 180
+ 396
+
+
+ 396
+
+
+
+ anvil: --compset BGC* --res ne30pg2_r05_EC30to60E2r2 on 30 nodes pure-MPI, ~3 sypd
+
+ 675
+ 684
+ 684
+ 684
+ 396
+ 684
+
+
+ 684
+
+
+
+
+
+
+
+ "anvil, GPMPAS-JRA compset, 6 nodes"
+
+ -6
+ -6
+ -6
+ -6
+ -6
+ -6
+
+
+
@@ -93,5 +181,68 @@
+
+
+ tests+anvil: --compset WCYCL1850 --res northamericax4v1pg2_WC14to60E2r3 on 20 nodes pure-MPI, ~0.8 sypd
+
+ 468
+ 468
+ 468
+ 468
+ 252
+ 468
+
+
+ 468
+
+
+
+
+
+
+
+
+ tests+anvil: 10x36x1--res conusx4v1_r05_oECv3 --compset F2010
+
+ -10
+ -10
+ -10
+ -10
+ -10
+ -10
+
+
+
+
+
+
+
+ tests+anvil 8x36x1 for F-cases on anvil, to fix testing issues that default to 144 pes on 4 nodes
+
+ -8
+ -8
+ -8
+ -8
+ -8
+ -8
+
+
+
+
+
+
+
+
+ tests+anvil: elm: anvil PEs for grid l%360x720cru
+
+ -8
+ -8
+ -8
+ -8
+ -8
+ -8
+
+
+
diff --git a/components/eam/src/physics/crm/samxx/samxx_const.h b/components/eam/src/physics/crm/samxx/samxx_const.h
index eda787384e7d..7eb82ca2166a 100644
--- a/components/eam/src/physics/crm/samxx/samxx_const.h
+++ b/components/eam/src/physics/crm/samxx/samxx_const.h
@@ -209,8 +209,6 @@ int constexpr index_cloud_ice = 0;
real constexpr rhor = 1000.; // Density of water, kg/m3
real constexpr rhos = 100.; // Density of snow, kg/m3
real constexpr rhog = 400.; // Density of graupel, kg/m3
-real constexpr tbgmin = 253.16; // Minimum temperature for cloud water., K
-real constexpr tbgmax = 273.16; // Maximum temperature for cloud ice, K
real constexpr tprmin = 268.16; // Minimum temperature for rain, K
real constexpr tprmax = 283.16; // Maximum temperature for snow+graupel, K
real constexpr tgrmin = 223.16; // Minimum temperature for snow, K
@@ -231,11 +229,38 @@ real constexpr nzeros = 3.e6; // Intersept coeff. for snow
real constexpr nzerog = 4.e6; // Intersept coeff. for graupel
real constexpr qp_threshold = 1.e-8; // minimal rain/snow water content
-real constexpr qcw0 = 1.e-3;
-real constexpr qci0 = 1.e-4;
real constexpr alphaelq = 1.e-3;
real constexpr betaelq = 1.e-3;
+//------------------------------------------------------------------------------
+// MMF climate tuning parameters
+//------------------------------------------------------------------------------
+#ifdef MMF_TMN
+real constexpr tbgmin = MMF_TMN;
+#else
+real constexpr tbgmin = 253.16; // Min temperature for cloud liq, K
+#endif
+
+#ifdef MMF_TMX
+real constexpr tbgmax = MMF_TMX;
+#else
+real constexpr tbgmax = 273.16; // Max temperature for cloud ice, K
+#endif
+
+#ifdef MMF_QCW
+real constexpr qcw0 = MMF_QCW;
+#else
+real constexpr qcw0 = 1.e-3; // liq autoconversion threshold
+#endif
+
+#ifdef MMF_QCI
+real constexpr qci0 = MMF_QCI;
+#else
+real constexpr qci0 = 1.e-4; // ice autoconversion threshold
+#endif
+//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+
real constexpr crm_accel_coef = 1.0/( (real) nx * (real) ny );
int constexpr plev = PLEV;
diff --git a/components/elm/src/data_types/ColumnDataType.F90 b/components/elm/src/data_types/ColumnDataType.F90
index 05cb214d3731..01971982d124 100644
--- a/components/elm/src/data_types/ColumnDataType.F90
+++ b/components/elm/src/data_types/ColumnDataType.F90
@@ -1616,8 +1616,8 @@ subroutine col_ws_init(this, begc, endc, h2osno_input, snow_depth_input, watsat_
if (j > nlevbed) then
this%h2osoi_vol(c,j) = 0.0_r8
else
- if (use_fates_planthydro .or. use_hydrstress) then
- this%h2osoi_vol(c,j) = 0.70_r8*watsat_input(c,j) !0.15_r8 to avoid very dry conditions that cause errors in FATES HYDRO
+ if (use_fates .or. use_hydrstress) then
+ this%h2osoi_vol(c,j) = 0.70_r8*watsat_input(c,j) !0.15_r8 to avoid very dry conditions that cause errors in FATES
else
this%h2osoi_vol(c,j) = 0.15_r8
endif
diff --git a/components/mpas-ocean/bld/build-namelist b/components/mpas-ocean/bld/build-namelist
index 76da422dc997..79b527e4f402 100755
--- a/components/mpas-ocean/bld/build-namelist
+++ b/components/mpas-ocean/bld/build-namelist
@@ -782,7 +782,7 @@ if ($OCN_ISMF eq 'coupled') {
} elsif ($OCN_ISMF eq 'internal') {
add_default($nl, 'config_land_ice_flux_mode', 'val'=>"standalone");
} else {
- add_default($nl, 'config_land_ice_flux_mode', 'val'=>"pressure_only");
+ add_default($nl, 'config_land_ice_flux_mode');
}
add_default($nl, 'config_land_ice_flux_formulation');
add_default($nl, 'config_land_ice_flux_useHollandJenkinsAdvDiff');
diff --git a/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml b/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml
index e2bacaff9779..21ae2bafc47c 100644
--- a/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml
+++ b/components/mpas-ocean/bld/namelist_files/namelist_defaults_mpaso.xml
@@ -333,12 +333,12 @@
'off'
-'standalone'
-'standalone'
-'standalone'
-'standalone'
-'standalone'
-'standalone'
+'pressure_only'
+'pressure_only'
+'pressure_only'
+'pressure_only'
+'pressure_only'
+'pressure_only'
'Jenkins'
.false.
10.0
diff --git a/components/mpas-ocean/cime_config/buildnml b/components/mpas-ocean/cime_config/buildnml
index dda7531576ab..0061f4ff75e9 100755
--- a/components/mpas-ocean/cime_config/buildnml
+++ b/components/mpas-ocean/cime_config/buildnml
@@ -90,10 +90,10 @@ def buildnml(case, caseroot, compname):
decomp_prefix = 'mpas-o.graph.info.'
restoring_file = 'sss.monthlyClimatology.PHC2_salx_040803.oEC60to30v3wLI.nc'
analysis_mask_file = 'masks_SingleRegionAtlanticWTransportTransects_EC60to30v3wLI_171116.nc'
- ic_date = '171031'
+ ic_date = '230220'
ic_prefix = 'oEC60to30v3wLI60lev'
if ocn_ic_mode == 'spunup':
- ic_date = '171116'
+ ic_date = '230220'
ic_prefix = 'oEC60to30v3wLI60lev.restart_theta_year26'
elif ocn_grid == 'ECwISC30to60E1r2':
@@ -101,7 +101,7 @@ def buildnml(case, caseroot, compname):
decomp_prefix = 'mpas-o.graph.info.'
restoring_file = 'sss.PHC2_monthlyClimatology.ECwISC30to60E1r02.200408.nc'
analysis_mask_file = 'ECwISC30to60E1r02_transportTransects.nc'
- ic_date = '200408'
+ ic_date = '230220'
ic_prefix = 'ocean.ECwISC30to60E1r2'
if ocn_ic_mode == 'spunup':
logger.warning("WARNING: The specified compset is requesting ocean ICs spunup from a G-case")
@@ -128,7 +128,7 @@ def buildnml(case, caseroot, compname):
elif ocn_grid == 'oQU240wLI':
decomp_date = '160929'
decomp_prefix = 'mpas-o.graph.info.'
- ic_date = '160929'
+ ic_date = '230220'
ic_prefix = 'ocean.QU.240wLI'
if ocn_ic_mode == 'spunup':
logger.warning("WARNING: The specified compset is requesting ocean ICs spunup from a G-case")
@@ -159,7 +159,7 @@ def buildnml(case, caseroot, compname):
decomp_prefix = 'mpas-o.graph.info.'
restoring_file = 'sss.monthlyClimatology.PHC2_salx.2004_08_03.180503.nc'
analysis_mask_file = 'masks_SingleRegionAtlanticWTransportTransects_RRS30to10v3wLI.nc'
- ic_date = '171109'
+ ic_date = '230220'
ic_prefix = 'oRRS30to10v3wLI'
if ocn_ic_mode == 'spunup':
logger.warning("WARNING: The specified compset is requesting ocean ICs spunup from a G-case")
@@ -261,10 +261,10 @@ def buildnml(case, caseroot, compname):
decomp_prefix = 'mpas-o.graph.info.'
restoring_file = 'sss.PHC2_monthlyClimatology.SOwISC12to60E2r4_nomask.210120.nc'
analysis_mask_file = 'SOwISC12to60E2r4_mocBasinsAndTransects20210623.nc'
- ic_date = '210107'
+ ic_date = '230220'
ic_prefix = 'ocean.SOwISC12to60E2r4'
if ocn_ic_mode == 'spunup':
- ic_date = '210203'
+ ic_date = '230220'
ic_prefix = 'mpaso.SOwISC12to60E2r4.rstFromG-anvil'
elif ocn_grid == 'ECwISC30to60E2r1':
@@ -272,10 +272,10 @@ def buildnml(case, caseroot, compname):
decomp_prefix = 'mpas-o.graph.info.'
restoring_file = 'sss.PHC2_monthlyClimatology.ECwISC30to60E2r1_nomask.201221.nc'
analysis_mask_file = 'ECwISC30to60E2r1_mocBasinsAndTransects20210623.nc'
- ic_date = '210413'
+ ic_date = '230220'
ic_prefix = 'ocean.ECwISC30to60E2r1'
if ocn_ic_mode == 'spunup':
- ic_date = '210414'
+ ic_date = '230220'
ic_prefix = 'mpaso.ECwISC30to60E2r1.rstFromG-anvil'
diff --git a/components/mpas-ocean/driver/ocn_comp_mct.F b/components/mpas-ocean/driver/ocn_comp_mct.F
index 9c47dc25715b..71fa9a14d061 100644
--- a/components/mpas-ocean/driver/ocn_comp_mct.F
+++ b/components/mpas-ocean/driver/ocn_comp_mct.F
@@ -790,10 +790,9 @@ end subroutine xml_stream_get_attributes
call t_stopf ('mpaso_mct_init')
call mpas_pool_get_config(domain % configs, 'config_land_ice_flux_mode', config_land_ice_flux_mode)
- if ( trim(config_land_ice_flux_mode) .eq. 'pressure_only' ) then
- call seq_infodata_PutData( infodata, ocn_prognostic=.true., ocnrof_prognostic=.true., &
- ocn_c2_glcshelf=.false.)
- else if ( trim(config_land_ice_flux_mode) .eq. 'standalone' ) then
+ if ( trim(config_land_ice_flux_mode) == 'off' .or. &
+ trim(config_land_ice_flux_mode) == 'pressure_only' .or. &
+ trim(config_land_ice_flux_mode) == 'standalone' ) then
call seq_infodata_PutData( infodata, ocn_prognostic=.true., ocnrof_prognostic=.true., &
ocn_c2_glcshelf=.false.)
else if ( trim(config_land_ice_flux_mode) .eq. 'coupled' ) then
@@ -2784,7 +2783,8 @@ subroutine ocn_export_mct(o2x_o, errorCode) !{{{
!JW o2x_o % rAttr(index_o2x_So_htv, n) = landIceHeatTransferVelocity(i)
!JW o2x_o % rAttr(index_o2x_So_stv, n) = landIceSaltTransferVelocity(i)
- if ( trim(config_land_ice_flux_mode) .ne. 'pressure_only' ) then
+ if ( trim(config_land_ice_flux_mode) .eq. 'standalone' .or. &
+ trim(config_land_ice_flux_mode) .eq. 'coupled' ) then
o2x_o % rAttr(index_o2x_So_blt, n) = landIceBoundaryLayerTracers(indexBLT,i)
o2x_o % rAttr(index_o2x_So_bls, n) = landIceBoundaryLayerTracers(indexBLS,i)
o2x_o % rAttr(index_o2x_So_htv, n) = landIceTracerTransferVelocities(indexHeatTrans,i)
diff --git a/components/mpas-ocean/src/Registry.xml b/components/mpas-ocean/src/Registry.xml
index ec873e494282..c95693a04bb0 100644
--- a/components/mpas-ocean/src/Registry.xml
+++ b/components/mpas-ocean/src/Registry.xml
@@ -1129,14 +1129,26 @@
possible_values=".true. or .false."
/>
-
+
+
+
@@ -1640,6 +1652,8 @@
+
+
+
+
+
+
@@ -1990,6 +2008,7 @@
immutable="true">
+
@@ -2106,6 +2125,8 @@
+
+
@@ -2742,8 +2763,8 @@
missing_value="FILLVAL" missing_value_mask="edgeMask"
packages="forwardMode;analysisMode"
/>
-
+
+
+
= 0.0_RKIND)) cycle
+ if ((landIceFloatingMask(iCell) == 0) .or. (landIceFreshwaterFlux(iCell) >= 0.0_RKIND)) cycle
landIceInterfaceTracers(indexIS,iCell) = freezeInterfaceSalinity(iCell)
landIceInterfaceTracers(indexIT,iCell) = freezeInterfaceTemperature(iCell)
@@ -593,7 +593,7 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, &
end do
else ! not using Holland and Jenkins advection/diffusion
call compute_melt_fluxes( &
- landIceMask, &
+ landIceFloatingMask, &
landIceBoundaryLayerTracers(indexBLT,:), &
landIceBoundaryLayerTracers(indexBLS,:), &
landIceTracerTransferVelocities(indexHeatTrans,:), &
@@ -613,13 +613,13 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, &
end if
end if
- ! modulate the fluxes by the landIceFraction
+ ! modulate the fluxes by the landIceFloatingFraction
do iCell = 1, nCells
- if (landIceMask(iCell) == 0) cycle
+ if (landIceFloatingMask(iCell) == 0) cycle
- landIceFreshwaterFlux(iCell) = landIceFraction(iCell)*landIceFreshwaterFlux(iCell)
- landIceHeatFlux(iCell) = landIceFraction(iCell)*landIceHeatFlux(iCell)
- heatFluxToLandIce(iCell) = landIceFraction(iCell)*heatFluxToLandIce(iCell)
+ landIceFreshwaterFlux(iCell) = landIceFloatingFraction(iCell)*landIceFreshwaterFlux(iCell)
+ landIceHeatFlux(iCell) = landIceFloatingFraction(iCell)*landIceHeatFlux(iCell)
+ heatFluxToLandIce(iCell) = landIceFloatingFraction(iCell)*heatFluxToLandIce(iCell)
end do
endif ! jenkinsOn or hollandJenkinsOn
diff --git a/components/mpas-ocean/src/shared/mpas_ocn_tendency.F b/components/mpas-ocean/src/shared/mpas_ocn_tendency.F
index 869f771f6fca..47f016faf2bd 100644
--- a/components/mpas-ocean/src/shared/mpas_ocn_tendency.F
+++ b/components/mpas-ocean/src/shared/mpas_ocn_tendency.F
@@ -351,7 +351,7 @@ subroutine ocn_tend_vel(domain, tendPool, statePool, forcingPool, &
!$acc density, normRelVortEdge, montgomeryPotential, pressure, &
!$acc thermExpCoeff, salineContractCoeff, tangentialVelocity, &
!$acc layerThickEdgeFlux, kineticEnergyCell, sfcFlxAttCoeff, potentialDensity, &
- !$acc vertAleTransportTop, vertViscTopOfEdge, wettingVelocity)
+ !$acc vertAleTransportTop, vertViscTopOfEdge, wettingVelocityFactor)
!$acc enter data &
!$acc copyin(tendVel, sfcStress, sfcStressMag, &
!$acc ssh, normalVelocity, &
@@ -452,15 +452,14 @@ subroutine ocn_tend_vel(domain, tendPool, statePool, forcingPool, &
#ifdef MPAS_OPENACC
!$acc parallel loop collapse(2) &
- !$acc present(tendVel, wettingVelocity)
+ !$acc present(tendVel, wettingVelocityFactor)
#else
!$omp parallel
!$omp do schedule(runtime) private(k)
#endif
do iEdge = 1, nEdgesAll
do k=1,nVertLevels
- tendVel(k,iEdge) = tendVel(k,iEdge)* &
- (1.0_RKIND - wettingVelocity(k,iEdge))
+ tendVel(k,iEdge) = tendVel(k,iEdge) * (1.0_RKIND - wettingVelocityFactor(k,iEdge))
end do
end do
#ifndef MPAS_OPENACC
diff --git a/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F b/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F
index 6b1bef26feeb..8e64618a27ad 100644
--- a/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F
+++ b/components/mpas-ocean/src/shared/mpas_ocn_time_varying_forcing.F
@@ -442,6 +442,10 @@ subroutine init_land_ice_forcing(domain)!{{{
"landIceFraction", "land_ice_forcing", "timeVaryingForcing", "landIceFractionForcing", "linear", &
config_time_varying_land_ice_forcing_reference_time, config_time_varying_land_ice_forcing_interval)
+ call mpas_forcing_init_field(domain % streamManager, forcingGroupHead, "ocn_land_ice_forcing", &
+ "landIceFloatingFraction", "land_ice_forcing", "timeVaryingForcing", "landIceFloatingFractionForcing", "linear", &
+ config_time_varying_land_ice_forcing_reference_time, config_time_varying_land_ice_forcing_interval)
+
call mpas_forcing_init_field(domain % streamManager, forcingGroupHead, "ocn_land_ice_forcing", &
"landIceDraft", "land_ice_forcing", "timeVaryingForcing", "landIceDraftForcing", "linear", &
config_time_varying_land_ice_forcing_reference_time, config_time_varying_land_ice_forcing_interval)
@@ -495,6 +499,8 @@ subroutine land_ice_forcing(streamManager, domain, simulationClock)!{{{
real(kind=RKIND), dimension(:), pointer :: &
landIceFractionForcing, &
landIceFraction, &
+ landIceFloatingFractionForcing, &
+ landIceFloatingFraction, &
landIcePressureForcing, &
landIcePressure, &
landIceDraftForcing, &
@@ -525,15 +531,18 @@ subroutine land_ice_forcing(streamManager, domain, simulationClock)!{{{
call MPAS_pool_get_dimension(mesh, "nCells", nCells)
call MPAS_pool_get_array(timeVaryingForcingPool, "landIceFractionForcing", landIceFractionForcing)
+ call MPAS_pool_get_array(timeVaryingForcingPool, "landIceFloatingFractionForcing", landIceFloatingFractionForcing)
call MPAS_pool_get_array(timeVaryingForcingPool, "landIcePressureForcing", landIcePressureForcing)
call MPAS_pool_get_array(timeVaryingForcingPool, "landIceDraftForcing", landIceDraftForcing)
call MPAS_pool_get_array(forcingPool, "landIceFraction", landIceFraction)
+ call MPAS_pool_get_array(forcingPool, "landIceFloatingFraction", landIceFloatingFraction)
call MPAS_pool_get_array(forcingPool, "landIcePressure", landIcePressure)
call MPAS_pool_get_array(forcingPool, "landIceDraft", landIceDraft)
do iCell = 1, nCells
landIceFraction(iCell) = landIceFractionForcing(iCell)
+ landIceFloatingFraction(iCell) = landIceFloatingFractionForcing(iCell)
landIcePressure(iCell) = landIcePressureForcing(iCell)
landIceDraft(iCell) = landIceDraftForcing(iCell)
diff --git a/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F b/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F
index 1f8fb440c287..60eb0eb184c8 100644
--- a/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F
+++ b/components/mpas-ocean/src/shared/mpas_ocn_tracer_ideal_age.F
@@ -189,7 +189,7 @@ subroutine ocn_tracer_ideal_age_init(domain,err)!{{{
if (config_use_idealAgeTracers_idealAge_forcing) then
! set mask = 0 for open ocean
- ! set mask = 1 under ice shelves
+ ! set mask = 1 under land ice
idealAgeMask = 0.0_RKIND
if (associated(landIceMask)) then
where (landIceMask /= 0) idealAgeMask(1,:) = 1.0_RKIND
diff --git a/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F b/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F
index 1d96c5c8c047..a288a6eaf829 100644
--- a/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F
+++ b/components/mpas-ocean/src/shared/mpas_ocn_wetting_drying.F
@@ -32,6 +32,7 @@ module ocn_wetting_drying
use ocn_diagnostics
use ocn_diagnostics_variables
use ocn_gm
+ use ocn_mesh
implicit none
private
@@ -108,16 +109,13 @@ subroutine ocn_wetting_drying_verify( block , minHeight, err)!{{{
!-----------------------------------------------------------------
type (mpas_pool_type), pointer :: statePool, meshPool, tendPool
- integer, dimension(:), pointer :: minLevelCell, maxLevelCell
real (kind=RKIND), dimension(:), pointer :: sshSubcycleNew
- real (kind=RKIND), dimension(:), pointer :: bottomDepth
integer, pointer :: nCellsSolve
integer :: iCell, k
integer :: debugUnit
real (kind=RKIND), dimension(:,:), pointer :: layerThicknessCur
real (kind=RKIND), dimension(:,:), pointer :: layerThicknessNew
real (kind=RKIND), dimension(:,:), pointer :: layerThicknessTend
- real (kind=RKIND), dimension(:), pointer :: lonCell, latCell
real (kind=RKIND) :: minThickness, layerThick
character (len=StrKIND) :: debugFilename
@@ -134,13 +132,8 @@ subroutine ocn_wetting_drying_verify( block , minHeight, err)!{{{
call mpas_pool_get_array(statePool, 'layerThickness', layerThicknessCur, timeLevel=1)
call mpas_pool_get_array(statePool, 'layerThickness', layerThicknessNew, timeLevel=2)
call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCellsSolve)
- call mpas_pool_get_array(meshPool, 'minLevelCell', minLevelCell)
- call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell)
call mpas_pool_get_array(tendPool, 'layerThickness', layerThicknessTend)
call mpas_pool_get_array(statePool, 'sshSubcycle', sshSubcycleNew, 2)
- call mpas_pool_get_array(meshPool, 'bottomDepth', bottomDepth)
- call mpas_pool_get_array(meshPool, 'lonCell', lonCell)
- call mpas_pool_get_array(meshPool, 'latCell', latCell)
err = 0
@@ -151,8 +144,11 @@ subroutine ocn_wetting_drying_verify( block , minHeight, err)!{{{
do iCell = 1, nCellsSolve
do k = minLevelCell(iCell), maxLevelCell(iCell)
! use ssh as a proxy too for baroclinic mode
- if (trim(config_time_integrator) == 'split_explicit' .or. trim(config_time_integrator) == 'split_implicit') then
- layerThick = min(layerThicknessNew(k, iCell), (sshSubcycleNew(iCell)+bottomDepth(iCell))/maxLevelCell(iCell))
+ ! Note: wetting-drying currently not supported for either of these time integration methods
+ if (trim(config_time_integrator) == 'split_explicit' .or. &
+ trim(config_time_integrator) == 'split_implicit') then
+ layerThick = min(layerThicknessNew(k, iCell), &
+ (sshSubcycleNew(iCell) + bottomDepth(iCell))/maxLevelCell(iCell))
else
layerThick = layerThicknessNew(k, iCell)
end if
@@ -237,72 +233,61 @@ subroutine ocn_prevent_drying_rk4(block, dt, rkSubstepWeight, config_zero_drying
type (mpas_pool_type), pointer :: tendPool
- type (mpas_pool_type), pointer :: meshPool
type (mpas_pool_type), pointer :: statePool
type (mpas_pool_type), pointer :: provisStatePool
+ real (kind=RKIND), dimension(:), pointer :: ssh
real (kind=RKIND), dimension(:, :), pointer :: layerThicknessCur
real (kind=RKIND), dimension(:, :), pointer :: layerThicknessProvis
real (kind=RKIND), dimension(:, :), pointer :: normalVelocity
- integer, dimension(:), pointer :: minLevelEdgeTop
- integer, dimension(:), pointer :: maxLevelEdgeTop
- integer, dimension(:), pointer :: maxLevelEdgeBot
- integer, pointer :: nEdges
integer :: iEdge, k
err = 0
call mpas_pool_get_subpool(block % structs, 'tend', tendPool)
- call mpas_pool_get_subpool(block % structs, 'mesh', meshPool)
call mpas_pool_get_subpool(block % structs, 'state', statePool)
call mpas_pool_get_subpool(block % structs, 'provis_state', provisStatePool)
+ call mpas_pool_get_array(statePool, 'ssh', ssh, 1)
call mpas_pool_get_array(statePool, 'normalVelocity', normalVelocity, 1)
! use thickness at n because constraint is h_n + dt*T_h > h_min
call mpas_pool_get_array(statePool, 'layerThickness', layerThicknessCur, 1)
call mpas_pool_get_array(provisStatePool, 'layerThickness', layerThicknessProvis, 1)
- call mpas_pool_get_dimension(block % dimensions, 'nEdges', nEdges)
- call mpas_pool_get_array(meshPool, 'minLevelEdgeTop', minLevelEdgeTop)
- call mpas_pool_get_array(meshPool, 'maxLevelEdgeTop', maxLevelEdgeTop)
- call mpas_pool_get_array(meshPool, 'maxLevelEdgeBot', maxLevelEdgeBot)
!$omp parallel
!$omp do schedule(runtime)
- do iEdge = 1, nEdges
- wettingVelocity(:, iEdge) = 0.0_RKIND
+ do iEdge = 1, nEdgesAll
+ wettingVelocityFactor(:, iEdge) = 0.0_RKIND
end do
!$omp end do
!$omp end parallel
- ! ensure cells stay wet by selectively damping cells with a damping tendency to make sure tendency doesn't dry cells
+ ! ensure cells stay wet by selectively damping cells with a damping tendency to make
+ ! sure tendency doesn't dry cells
- call ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, &
- normalTransportVelocity, rkSubstepWeight, wettingVelocity, err)
+ call ocn_wetting_drying_wettingVelocity(layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, &
+ normalTransportVelocity, rkSubstepWeight, wettingVelocityFactor, err)
- ! prevent drying from happening with selective wettingVelocity
- !$omp parallel
- !$omp do schedule(runtime) private(k)
- do iEdge = 1, nEdges
- do k = minLevelEdgeTop(iEdge), maxLevelEdgeBot(iEdge)
- if (abs(normalTransportVelocity(k,iEdge) + wettingVelocity(k,iEdge)) < eps) then
- ! prevent spurious flux for close to zero values
- normalTransportVelocity(k, iEdge) = 0.0_RKIND
- normalVelocity(k, iEdge) = 0.0_RKIND
- else if (abs(normalTransportVelocity(k,iEdge) + wettingVelocity(k,iEdge)) <= abs(normalTransportVelocity(k,iEdge))) then
- normalTransportVelocity(k, iEdge) = normalTransportVelocity(k, iEdge) + wettingVelocity(k, iEdge)
- normalVelocity(k, iEdge) = normalVelocity(k, iEdge) + wettingVelocity(k, iEdge)
- end if
-
- if (abs(wettingVelocity(k, iEdge)) > 0.0_RKIND .and. config_zero_drying_velocity) then
- normalTransportVelocity(k, iEdge) = 0.0_RKIND
- normalVelocity(k, iEdge) = 0.0_RKIND
- end if
+ ! prevent drying from happening with selective wettingVelocityFactor
+ if (config_zero_drying_velocity) then
+ !$omp parallel
+ !$omp do schedule(runtime) private(k)
+ do iEdge = 1, nEdgesAll
+ do k = minLevelEdgeTop(iEdge), maxLevelEdgeBot(iEdge)
+
+ if (abs(wettingVelocityFactor(k, iEdge)) > 0.0_RKIND) then
+ normalTransportVelocity(k, iEdge) = (1.0_RKIND - &
+ wettingVelocityFactor(k, iEdge)) * normalTransportVelocity(k, iEdge)
+ normalVelocity(k, iEdge) = (1.0_RKIND - &
+ wettingVelocityFactor(k, iEdge)) * normalVelocity(k, iEdge)
+ end if
+ end do
end do
- end do
- !$omp end do
- !$omp end parallel
+ !$omp end do
+ !$omp end parallel
+ end if
end subroutine ocn_prevent_drying_rk4 !}}}
@@ -319,9 +304,9 @@ end subroutine ocn_prevent_drying_rk4 !}}}
!> to prevent cells from drying.
!
!-----------------------------------------------------------------------
- subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, &
+ subroutine ocn_wetting_drying_wettingVelocity(layerThickEdgeFlux, layerThicknessCur, layerThicknessProvis, &
- normalVelocity, dt, wettingVelocity, err)!{{{
+ normalVelocity, dt, wettingVelocityFactor, err)!{{{
!-----------------------------------------------------------------
!
@@ -329,9 +314,6 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, laye
!
!-----------------------------------------------------------------
- type (mpas_pool_type), intent(in) :: &
- meshPool !< Input: horizonal mesh information
-
real (kind=RKIND), dimension(:,:), intent(in) :: &
layerThicknessCur !< Input: layer thickness at old time
@@ -354,7 +336,7 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, laye
!-----------------------------------------------------------------
real (kind=RKIND), dimension(:,:), intent(inout) :: &
- wettingVelocity !< Input/Output: velocity wettingVelocityency
+ wettingVelocityFactor !< Input/Output: velocity wettingVelocityFactor
!-----------------------------------------------------------------
!
@@ -370,74 +352,71 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThickEdgeFlux, laye
!
!-----------------------------------------------------------------
- integer :: iEdge, iCell, k, i
- integer, pointer :: nVertLevels, nCells
- integer, dimension(:), pointer :: nEdgesOnCell
- integer, dimension(:), pointer :: minLevelCell, maxLevelCell
- integer, dimension(:), pointer :: minLevelEdgeTop, maxLevelEdgeBot
- integer, dimension(:,:), pointer :: edgesOnCell
- integer, dimension(:,:), pointer :: edgeSignOnCell
+ integer :: cell1, cell2, iEdge, iCell, k, i
- real (kind=RKIND) :: divOutFlux
- real (kind=RKIND) :: invAreaCell
+ real (kind=RKIND) :: divFlux, divOutFlux
real (kind=RKIND) :: layerThickness
- real (kind=RKIND), dimension(:), pointer :: dvEdge
- real (kind=RKIND), dimension(:), pointer :: areaCell
+ real (kind=RKIND) :: hCrit, hRampMin, hEdgeTotal
+
+ character (len=100) :: log_string
err = 0
- call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell)
- call mpas_pool_get_array(meshPool, 'areaCell', areaCell)
- call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell)
- call mpas_pool_get_array(meshPool, 'edgeSignOnCell', edgeSignOnCell)
- call mpas_pool_get_array(meshPool, 'minLevelCell', minLevelCell)
- call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell)
- call mpas_pool_get_array(meshPool, 'minLevelEdgeTop', minLevelEdgeTop)
- call mpas_pool_get_array(meshPool, 'maxLevelEdgeBot', maxLevelEdgeBot)
- call mpas_pool_get_array(meshPool, 'dvEdge', dvEdge)
- call mpas_pool_get_dimension(meshPool, 'nCells', nCells)
- call mpas_pool_get_dimension(meshPool, 'nVertLevels', nVertLevels)
+ hCrit = config_drying_min_cell_height
+
+ if (.not. config_zero_drying_velocity) return
! need predicted transport velocity to limit drying flux
!$omp parallel
- !$omp do schedule(runtime) private(invAreaCell, i, iEdge, k, divOutFlux, layerThickness)
- do iCell = 1, nCells
- invAreaCell = 1.0_RKIND / areaCell(iCell)
- ! can switch with maxLevelEdgeBot(iEdge)
+ !$omp do schedule(runtime) private(i, iEdge, k, divOutFlux, layerThickness)
+ do iCell = 1, nCellsAll
do k = minLevelCell(iCell), maxLevelCell(iCell)
divOutFlux = 0.0_RKIND
layerThickness = min(layerThicknessProvis(k, iCell), layerThicknessCur(k, iCell))
do i = 1, nEdgesOnCell(iCell)
iEdge = edgesOnCell(i, iCell)
- if (k <= maxLevelEdgeBot(iEdge) .or. k >= minLevelEdgeTop(iEdge)) then
+ if (k <= maxLevelEdgeTop(iEdge) .and. k >= minLevelEdgeBot(iEdge)) then
! only consider divergence flux leaving the cell
if ( normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) < 0.0_RKIND ) then
- divOutFlux = divOutFlux + normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) &
- * layerThickEdgeFlux(k, iEdge) * dvEdge(iEdge) * invAreaCell
+ divOutFlux = divOutFlux + &
+ normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) * &
+ layerThickEdgeFlux(k, iEdge) * dvEdge(iEdge) * &
+ invAreaCell(iCell)
end if
end if
end do
+ layerThickness = layerThickness + dt * divOutFlux
- ! if layer thickness is too small, limit divergence flux outwards with opposite velocity
- if ((layerThickness + dt*divOutFlux ) <= (config_drying_min_cell_height + config_drying_safety_height)) then
- ! limit divOutFlux out of cell to keep it wet
- divOutFlux = abs(divOutFlux)
- divOutFlux = (layerThickness - (config_drying_min_cell_height + eps)) / (dt*divOutFlux + eps)
-
+ ! if layer thickness is too small, limit divergence flux outwards with
+ ! opposite velocity
+ if (layerThickness <= &
+ hCrit + config_drying_safety_height) then
+ do i = 1, nEdgesOnCell(iCell)
+ iEdge = edgesOnCell(i, iCell)
+ if (k <= maxLevelEdgeBot(iEdge) .and. k >= minLevelEdgeTop(iEdge)) then
+ if ( normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) <= 0.0_RKIND ) then
+ wettingVelocityFactor(k, iEdge) = 1.0_RKIND
+ end if
+ end if
+ end do
+ elseif (config_zero_drying_velocity_ramp .and. &
+ (layerThickness > &
+ hCrit + config_drying_safety_height) .and. &
+ (layerThickness <= config_zero_drying_velocity_ramp_hmax)) then
+
+ hRampMin = config_zero_drying_velocity_ramp_hmin
+ ! Following O'Dea et al. (2021), if total upwinded wct is less than
+ ! 2*critical thickness, apply damping at each edge
do i = 1, nEdgesOnCell(iCell)
iEdge = edgesOnCell(i, iCell)
- if (k <= maxLevelEdgeBot(iEdge) .or. k >= minLevelEdgeTop(iEdge)) then
+ if (k <= maxLevelEdgeBot(iEdge) .and. k >= minLevelEdgeTop(iEdge)) then
if ( normalVelocity(k, iEdge) * edgeSignOnCell(i, iCell) <= 0.0_RKIND ) then
- ! each outgoing velocity is penalized (but not the incoming, wetting velocities)
- ! square the fractional term to make values near zero go to zero much quicker (to prevent threshold from being hit)
- wettingVelocity(k, iEdge) = - (min(max(0.0_RKIND, 1.0_RKIND - (divOutFlux*divOutFlux)), 1.0_RKIND)) * normalVelocity(k, iEdge)
- ! just go with simple boolean approach for zero wetting velocity for debugging purposes
- if (config_zero_drying_velocity) then
- wettingVelocity(k, iEdge) = 1.0_RKIND
- end if
+ wettingVelocityFactor(k, iEdge) = 1.0_RKIND - &
+ tanh(50.0_RKIND * (layerThickness - hRampMin)/hRampMin)
end if
end if
end do
+
end if
end do
diff --git a/share/util/shr_reprosum_mod.F90 b/share/util/shr_reprosum_mod.F90
index cde01474fdea..06631657cfee 100644
--- a/share/util/shr_reprosum_mod.F90
+++ b/share/util/shr_reprosum_mod.F90
@@ -282,10 +282,10 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, &
! models use the same base, e.g.
! radix(1.0_r8) == radix(1_i8)
! for real*8 and integer*8. If not, then the alternative algorithm
-! mentioned above and described below is used instead. The integer
-! representation must also have enough digits for the potential growth
-! of the sum for each level, so could conceivably be too small for a
-! large number of summands.
+! DDPDD mentioned above and described below is used instead. The
+! integer representation must also have enough digits for the
+! potential growth of the sum for each level, so could conceivably be
+! too small for a large number of summands.
!
! Upper bounds on the total number of summands and on all intermediate
! sums are calculated as
@@ -322,26 +322,38 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, &
! conversion of the vector of integer representation to a floating
! point value may be subject to rounding errors. Before the
! conversion, the vector of integers is adjusted so that all elements
-! have the same sign, eliminating the possibility of catastrophic
-! cancellation. These are integer operations, so no accuracy is
-! lost. Next, each element of the vector of integers is converted to a
-! floating point value and added into the intermediate sum, in
-! smallest to largest order (in absolute value). Any rounding error is
-! in the least significant digit for each intermediate sum, and the
-! exponents for each summand are monotonically increasing, so the
-! rounding errors do not accumulate. Thus, the relative difference
-! between the 'exact' value and that calculated by this algorithm will
-! be approximately machine epsilon for 64-bit real values, and any
-! difference will be restricted to the 16th significant digit in a
-! decimal representation. This does mean that the values calculated
-! with the integer*8 internal representation could differ from those
-! calculated using the integer*4 internal representation, but the
-! differences will be in the least significant digits. Similarly, the
-! results may change (in the least significant digits) with a change
-! in system, compiler, or compiler flags. However, for a fixed
-! internal representation, processor architecture, compiler, and
-! compiler options, the result will (still) be reproducible with
-! respect to MPI task and OpenMP thread counts.
+! have the same sign, and so that the value, in absolute value, at a
+! given level is strictly less than what can be represented at the
+! next lower level (larger exponent) and strictly greater than what
+! can represented at the next higher level (smaller exponent). Since
+! all elements have the same sign, the sign is set to positive
+! temporarily and then restored when the conversion to floating point
+! is complete. These are all integer operations, so no accuracy is
+! lost. These adjustments eliminate the possibility of catastrophic
+! cancellation. Also, when converting the individual elements to
+! floating point values and summing them, the summation is now
+! equivalent to concatenating the digits in the mantissas for the
+! component summands. In consequence, in the final step when each
+! element of this modified vector of integers is converted to a
+! floating point value and added into the intermediate sum, any
+! rounding is limited to the least significant digit representable
+! in the final floating point sum.
+!
+! Any such rounding error will be sensitive to the particular floating
+! values generated from the integer vector, and so will be
+! sensitive to the number of levels in the vector and the implicit
+! exponent associated with each level, which are themselves functions
+! of the numbers of MPI tasks and OpenMP threads and the number of
+! digits representable in an integer. To avoid this sensitivity,
+! (effectively) generate a new integer vector in which each component
+! integer has a fixed number of significant digits (e.g.,
+! digits(1.0_r8)) and generate the floating point values from these
+! before summing. (See comments in code for more details.) This
+! creates a sequence of floating point values to be summed that are
+! independent of, for example, the numbers of MPI tasks and OpenMP
+! threads or whether using integer*8 or integer*4 internal
+! representations in the integer vector, and thus ensure
+! reproducibility with respect to these options.
!
! Description of optional parameters for integer vector algorithm:
!-----------------------------------------------------------------
@@ -1135,7 +1147,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
!
! Local workspace
!
- integer, parameter :: max_jlevel = &
+ integer, parameter :: max_svlevel_factor = &
1 + (digits(1_i8)/digits(1.0_r8))
integer(i8) :: i8_arr_tlsum_level(-(extra_levels-1):max_level,nflds,omp_nthreads)
@@ -1149,9 +1161,10 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
integer(i8) :: i8_arr_gsum_level((max_level+extra_levels+2)*nflds)
! integer vector representing global
! sum
- integer(i8) :: IX_8 ! integer representation of current
- ! jlevels of X_8 ('part' of
- ! i8_arr_gsum_level)
+ integer(i8) :: i8_gsum_level(-(extra_levels-1):max_level)
+ ! integer vector representing global
+ ! sum for one field
+ integer(i8) :: IX_8 ! integer representation of r8 value
integer(i8) :: i8_sign ! sign global sum
integer(i8) :: i8_radix ! radix for i8 variables (and r8
! variables by earlier if-test)
@@ -1164,7 +1177,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
integer :: isum_beg(omp_nthreads), isum_end(omp_nthreads)
! range of summand indices for each
! OpenMP thread
- integer :: ifld, isum, ithread
+ integer :: ifld, isum, ithread, jlevel
! loop variables
integer :: arr_exp ! exponent of summand
integer :: arr_shift ! exponent used to generate integer
@@ -1178,13 +1191,28 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
integer :: min_level ! index of minimum levels (including
! extra levels) for i8_arr_tlsum_level
integer :: ioffset ! offset(ifld)
- integer :: jlevel ! number of floating point 'pieces'
- ! extracted from a given i8 integer
+ integer :: svlevel ! number of summands in summand_vector
integer :: ierr ! MPI error return
- integer :: LX(max_jlevel) ! exponent of X_8 (see below)
+ integer :: LX ! exponent of X_8 (see below)
integer :: veclth ! total length of i8_arr_lsum_level
- integer :: sum_digits ! lower bound on number of significant
- ! in integer expansion of sum
+ integer :: i8_digit_count ! number of digits in integer
+ ! expansion of sum
+ integer :: i8_begin_level ! level starting from in
+ ! creating next 'exactly representable'
+ ! floating point value from modified
+ ! integer expansion of the sum
+ integer :: i8_trunc_level ! level at which the number of digits in
+ ! the modified integer expansion of the
+ ! sum exceeds the number of representable
+ ! digits in the floating point sum
+ integer :: i8_trunc_loc ! location of last digit at i8_trunc_level
+ ! in the modified integer expansion of the
+ ! sum that is representable in the floating
+ ! point sum
+ integer(i8) :: i8_trunc_level_rem
+ ! truncated digits at i8_trunc_level
+ ! in the modified integer expansion
+ ! of the sum
integer :: curr_exp ! exponent of partial sum during
! reconstruction from integer vector
integer :: corr_exp ! exponent of current summand in
@@ -1193,18 +1221,21 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
real(r8) :: arr_frac ! fraction of summand
real(r8) :: arr_remainder ! part of summand remaining after
! current level of integer expansion
- real(r8) :: X_8(max_jlevel) ! r8 vector representation of current
+ real(r8) :: X_8 ! r8 representation of current
! i8_arr_gsum_level
- real(r8) :: RX_8 ! r8 representation of difference
- ! between current i8_arr_gsum_level
- ! and current jlevels of X_8
- ! (== IX_8). Also used in final
- ! scaling step
-
- logical :: first ! flag used to indicate that just
- ! beginning reconstruction of sum
- ! from integer vector
-
+ real(r8) :: RX_8 ! r8 representation of (other)
+ ! integers used in calculation.
+ real(r8) :: summand_vector((max_level+extra_levels)*max_svlevel_factor)
+ ! vector of r8 values generated from
+ ! integer vector representation to be
+ ! summed to generate global sum
+
+ logical :: first_stepd_iteration
+ ! flag used to indicate whether first
+ ! time through process of converting
+ ! vector of integers into a floating
+ ! point value, as it requires
+ ! special logic
!
!------------------------------------------------------------------------
! Save radix of i8 variables in an i8 variable
@@ -1312,7 +1343,6 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
endif
enddo
-
! Postprocess integer vector to eliminate possibility of overflow
! during subsequent sum over threads and tasks, as per earlier
! comment on logic behind definition of max_nsummands. If value at a
@@ -1425,27 +1455,79 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
call t_stopf("repro_sum_allr_i8")
#endif
+ call t_startf('repro_sum_finalsum')
! Construct global sum from integer vector representation:
! 1) arr_max_shift is the shift applied to fraction(arr_gmax) .
-! When shifting back, need to 'add back in' true arr_gmax exponent. This was
-! removed implicitly by working only with the fraction .
-! 2) Want to add levels into sum in reverse order (smallest to largest). However,
-! even this can generate floating point rounding errors if signs of integers
-! alternate. To avoid this, do some arithmetic with integer vectors so that all
-! components have the same sign. This should keep relative difference between
-! using different integer sizes (e.g. i8 and i4) to machine epsilon
-! 3) Assignment to X_8 will usually lose accuracy since maximum integer
-! size is greater than the max number of 'digits' in r8 value (if xmax_nsummands
-! correction not very large). Calculate remainder and add in first (since
-! smaller). One correction is sufficient for r8 (53 digits) and i8 (63 digits).
-! For r4 (24 digits) may need to correct twice. Code is written in a general
-! fashion, to work no matter how many corrections are necessary (assuming
-! max_jlevel parameter calculation is correct).
+! When shifting back, need to 'add back in' the true arr_gmax exponent.
+! This was removed implicitly by working only with the fraction.
+! 2) To avoid the possibility of catastrophic cancellation, and
+! an unacceptable floating point rounding error, can do some arithmetic
+! with the integer vector so that all components have the same sign.
+! 3) If convert each integer in the integer vector to a floating
+! point value and then add these together, smallest to largest, to
+! calculate the final sum, there may be roundoff error in the least
+! significant digit. This error will be sensitive to the particular
+! floating values generated from the integer vector, and so will be
+! sensitive to the number of levels in the vector and the implicit
+! exponent associated with each level. So this approach is not
+! guaranteed to be reproducible with respect to a change in the
+! number of MPI tasks and OpenMP threads (as this changes the
+! definition of max_nsummands, and thus also arr_max_shift). It is
+! also not guaranteed to be reproducible with respect to changing
+! the integer size, e.g. from i8 to i4, as this also changes
+! arr_max_shift. However, can eliminate this potential loss of
+! reproducibility by taking the following steps.
+! a) Manipulate the integer vector so that
+! i) the component values do not 'overlap', that is, the value
+! represented by a component is strictly less than the value
+! represented by the least significant digit in the previous
+! component, and
+! ii) all components are positive (saving the sign to be restored
+! to the final result).
+! b) Identify the digit in the resulting integer vector that is the
+! last representable in the floating point representation, then
+! truncate the vector at this point, i.e., all digits of lesser
+! significance in the given component and all components
+! representing digits of lesser significance (call this the
+! remainder).
+! c) Convert each integer component in the modified integer vector
+! to its corresponding floating point value and sum the
+! sequence. (Order is unimportant, as explained below, but here
+! add largest to smallest.)
+! d) Repeat (b) and (c) for the remainder (recursively, as
+! necessary).
+! e) Sum all floating point numbers generated by step (c), smallest
+! to largest.
+! f) Restore the sign.
+! With the manipulations in (a) and (b), the summation in (c) is
+! equivalent to concatenating the digits in the mantissas for the
+! component summands, so rounding is irrelevant (so far). Repeating
+! this with the remainder(s) generates a sequence of 'exact'
+! floating point numbers. Summing these can still generate a
+! rounding error in the least significant digit in the largest
+! floating point value (which is the last representable digit in the
+! final result), but the floating point values being summed and
+! order of summation are independent of the number of levels and
+! implicit exponents, so reproducibility is ensured.
+!
+! Note that assignment of an i8 integer value to an r8 floating point
+! variable in step (c) can lead to a loss of accuracy because the
+! maximum number of digits in the i8 integer can be greater than the
+! maximum number of digits representable in the r8 variable (if the
+! xmax_nsummands correction is not very large). With the same sign
+! and nonoverlapping properties of the integer components, these lost
+! digits will also not be representable in the final sum. The process
+! described above of truncating at this last representable digit, and
+! then separately generating floating point value(s) for the
+! remainder, takes care of this automatically. Similar reasoning
+! applies to r4 floating point values with either i8 or i4 integer
+! components.
recompute = .false.
do ifld=1,nflds
arr_gsum(ifld) = 0.0_r8
ioffset = offset(ifld)
+ svlevel = 0
! If validate is .true., test whether the summand upper bound
! was exceeded on any of the MPI tasks
@@ -1456,9 +1538,17 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
endif
if (.not. recompute) then
+! Copy integer vector for current field from i8_arr_gsum_level, so that
+! can be modified without changing i8_arr_gsum_level. (Preserving
+! i8_arr_gsum_level unchanged is not necessary, but is convenient for debugging
+! and makes indexing clearer and less error prone.)
+ i8_gsum_level(:) = 0_i8
+ do ilevel=min_level,max_levels(ifld)
+ i8_gsum_level(ilevel) = i8_arr_gsum_level(ioffset+ilevel)
+ enddo
-! Preprocess integer vector:
-! a) If value larger than or equal to (radix(1_i8)**arr_max_shift),
+! Preprocess integer vector (as described in 3(a) above):
+! i) If value larger than or equal to (radix(1_i8)**arr_max_shift),
! add this 'overlap' to the value at the next smaller level
! in the vector, resulting in nonoverlapping ranges for each
! component.
@@ -1489,144 +1579,248 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
! -(extra_levels-1), does not have to be less than
! (radix(1_i8)**arr_max_shift) for this 'nonoverlap' property to
! hold.
- do ilevel=max_levels(ifld),min_level+1,-1
- if (abs(i8_arr_gsum_level(ioffset+ilevel)) >= &
- (i8_radix**arr_max_shift)) then
-
- IX_8 = i8_arr_gsum_level(ioffset+ilevel) &
+ do ilevel=max_levels(ifld),min_level+1,-1
+ if (abs(i8_gsum_level(ilevel)) >= &
+ (i8_radix**arr_max_shift)) then
+
+ IX_8 = i8_gsum_level(ilevel) &
/ (i8_radix**arr_max_shift)
- i8_arr_gsum_level(ioffset+ilevel-1) = &
- i8_arr_gsum_level(ioffset+ilevel-1) + IX_8
-
+ i8_gsum_level(ilevel-1) = &
+ i8_gsum_level(ilevel-1) + IX_8
+
IX_8 = IX_8*(i8_radix**arr_max_shift)
- i8_arr_gsum_level(ioffset+ilevel) = &
- i8_arr_gsum_level(ioffset+ilevel) - IX_8
- endif
- enddo
-! b) Working consecutively from the first level with a nonzero value
-! up to level max_levels(ifld), subtract +/- 1 from level with
-! larger exponent (e.g., ilevel) and add +/-
-! (i8_radix**arr_max_shift) to level with smaller exponent
-! (ilevel+1), when necessary, so that the value at ilevel+1
-! has the same sign as the value at ilevel. Treat a zero value at
-! ilevel+1 as always a different sign from the value at ilevel so
-! that the process always makes this nonzero. (Otherwise, the
-! wrong sign could be reintroduced by subtracting from a zero
-! value at the next step.) When finished with the process values
-! at all levels are either greater than or equal to zero or all
-! are less than or equal to zero. Note that this can decrease (but
-! not increase) the absolute value at level -(extra_levels-1) by
-! 1. All other levels are now less than or equal to
-! (radix(1_i8)**arr_max_shift) in absolute value rather than
-! strictly less than.
- ilevel = min_level
- do while ((i8_arr_gsum_level(ioffset+ilevel) == 0_i8) &
- .and. (ilevel < max_levels(ifld)))
- ilevel = ilevel + 1
- enddo
-!
- if (ilevel < max_levels(ifld)) then
- if (i8_arr_gsum_level(ioffset+ilevel) > 0_i8) then
- i8_sign = 1_i8
- else
- i8_sign = -1_i8
- endif
- do jlevel=ilevel,max_levels(ifld)-1
- if ((sign(1_i8,i8_arr_gsum_level(ioffset+jlevel)) &
- /= sign(1_i8,i8_arr_gsum_level(ioffset+jlevel+1))) &
- .or. (i8_arr_gsum_level(ioffset+jlevel+1) == 0_i8)) then
- i8_arr_gsum_level(ioffset+jlevel) = &
- i8_arr_gsum_level(ioffset+jlevel) - i8_sign
- i8_arr_gsum_level(ioffset+jlevel+1) = &
- i8_arr_gsum_level(ioffset+jlevel+1) &
- + i8_sign*(i8_radix**arr_max_shift)
- endif
- enddo
+ i8_gsum_level(ilevel) = &
+ i8_gsum_level(ilevel) - IX_8
+ endif
+ enddo
+
+! ii) Working consecutively from the first level with a nonzero value
+! up to level max_levels(ifld), subtract +/- 1 from level with
+! larger exponent (e.g., ilevel) and add +/-
+! (i8_radix**arr_max_shift) to level with smaller exponent
+! (ilevel+1), when necessary, so that the value at ilevel+1
+! has the same sign as the value at ilevel. Treat a zero value at
+! ilevel+1 as always a different sign from the value at ilevel so
+! that the process always makes this nonzero. (Otherwise, the
+! wrong sign could be reintroduced by subtracting from a zero
+! value at the next step.) When finished with the process values
+! at all levels are either greater than or equal to zero or all
+! are less than or equal to zero. Note that this can decrease
+! (but not increase) the absolute value at level
+! -(extra_levels-1) by 1. All other levels are now less than or
+! equal to (radix(1_i8)**arr_max_shift) in absolute value rather
+! than strictly less than.
+ ilevel = min_level
+ do while ((i8_gsum_level(ilevel) == 0_i8) &
+ .and. (ilevel < max_levels(ifld)))
+ ilevel = ilevel + 1
+ enddo
+!
+ if (i8_gsum_level(ilevel) < 0_i8) then
+ i8_sign = -1_i8
+ else
+ i8_sign = 1_i8
+ endif
+!
+ if (ilevel < max_levels(ifld)) then
+ do jlevel=ilevel,max_levels(ifld)-1
+ if ((sign(1_i8,i8_gsum_level(jlevel)) &
+ /= sign(1_i8,i8_gsum_level(jlevel+1)))&
+ .or. (i8_gsum_level(jlevel+1) == 0_i8)) then
+ i8_gsum_level(jlevel) = &
+ i8_gsum_level(jlevel) - i8_sign
+ i8_gsum_level(jlevel+1) = &
+ i8_gsum_level(jlevel+1) &
+ + i8_sign*(i8_radix**arr_max_shift)
+ endif
+ enddo
endif
-! Start with maximum shift, and work up to larger values
- arr_shift = arr_gmax_exp(ifld) &
- - max_levels(ifld)*arr_max_shift
- curr_exp = 0
- first = .true.
- do ilevel=max_levels(ifld),min_level,-1
-
- if (i8_arr_gsum_level(ioffset+ilevel) /= 0_i8) then
- jlevel = 1
-
-! r8 representation of higher order bits in integer
- X_8(jlevel) = i8_arr_gsum_level(ioffset+ilevel)
- LX(jlevel) = exponent(X_8(jlevel))
-
-! Calculate remainder
- IX_8 = int(X_8(jlevel),i8)
- RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8)
-
-! Repeat using remainder
- do while (RX_8 /= 0.0_r8)
- jlevel = jlevel + 1
- X_8(jlevel) = RX_8
- LX(jlevel) = exponent(RX_8)
- IX_8 = IX_8 + int(RX_8,i8)
- RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8)
- enddo
+! iii) If 'same sign' is negative, then change to positive
+! temporarily.
+ if (i8_sign < 0_i8) then
+ do jlevel=ilevel,max_levels(ifld)
+ i8_gsum_level(jlevel) = -i8_gsum_level(jlevel)
+ enddo
+ endif
+
+! iv) Nonoverlap property can be lost after imposition of same sign
+! over components. Reintroduce this property (retaining same sign
+! property). Note that carryover is never more than '1' to the
+! next smaller level, so, again, no intermediate or final sums
+! will exceed the maximum value representable in i8, including
+! level -(extra_levels-1) as long as digits(1_i8) >= 4.
+ do ilevel=max_levels(ifld),min_level+1,-1
+ if (abs(i8_gsum_level(ilevel)) >= &
+ (i8_radix**arr_max_shift)) then
+
+ IX_8 = i8_gsum_level(ilevel)/ &
+ (i8_radix**arr_max_shift)
+ i8_gsum_level(ilevel-1) = &
+ i8_gsum_level(ilevel-1) + IX_8
+
+ IX_8 = IX_8*(i8_radix**arr_max_shift)
+ i8_gsum_level(ilevel) = &
+ i8_gsum_level(ilevel) - IX_8
+ endif
+ enddo
-! Add in contributions, smaller to larger, rescaling for each
-! addition to guarantee that exponent of working summand is always
-! larger than minexponent
- do while (jlevel > 0)
- if (first) then
- curr_exp = LX(jlevel) + arr_shift
- arr_gsum(ifld) = fraction(X_8(jlevel))
- first = .false.
+! Step 3(d): iterate over steps 3(b) and 3(c), truncating integer
+! vector to 'fit' into a floating point value, then repeating with
+! remainder
+ first_stepd_iteration = .true.
+ arr_shift = arr_gmax_exp(ifld) - (min_level)*arr_max_shift
+ i8_digit_count = 0
+ i8_begin_level = min_level
+ do while (i8_begin_level <= max_levels(ifld))
+
+! Determine at which level the total number of integer digits equals
+! or exceeds the number of digits representable in the floating point
+! sum. Then determine which digit at this level is the last
+! representable in the floating point sum. Note that this location
+! (i8_trunc_loc) is zero-based, i.e. smallest digit is at location
+! 0. Note that the exponent is a count of the number of digits for the
+! first nonzero level. All subsequent levels contribute arr_max_shift
+! digits.
+ i8_trunc_loc = 0
+ i8_trunc_level = max_levels(ifld)
+ do ilevel=i8_begin_level,max_levels(ifld)
+ if (first_stepd_iteration) then
+! Special logic for first time through. Subsequent iterations treat
+! leading zeroes as significant.
+ if (i8_digit_count == 0) then
+ if (i8_gsum_level(ilevel) /= 0_i8) then
+ X_8 = i8_gsum_level(ilevel)
+ LX = exponent(X_8)
+! Note that even if i8_gsum_level(ilevel) is truncated when assigned
+! to X_8, the exponent LX will still capture the original number of
+! digits.
+ else
+ LX = 0
+ endif
else
- corr_exp = curr_exp - (LX(jlevel) + arr_shift)
- arr_gsum(ifld) = fraction(X_8(jlevel)) &
- + scale(arr_gsum(ifld),corr_exp)
- curr_exp = LX(jlevel) + arr_shift
+ LX = arr_max_shift
endif
- jlevel = jlevel - 1
- enddo
+ else
+! If i8_digit_count /= 0 during the first iteration
+! (ilevel == i8_begin_level), then there is a remainder left at the
+! previous i8_trunc_level and LX should be set to zero for this
+! iteration.
+ if ((ilevel == i8_begin_level) .and. (i8_digit_count /= 0)) then
+ LX = 0
+ else
+ LX = arr_max_shift
+ endif
+ endif
+ if (i8_digit_count + LX >= digits(1.0_r8)) then
+ i8_trunc_level = ilevel
+ i8_trunc_loc = (i8_digit_count + LX) - digits(1.0_r8)
+ exit
+ else
+ i8_digit_count = i8_digit_count + LX
+ endif
+ enddo
+ first_stepd_iteration = .false.
+
+! Truncate at i8_trunc_loc as needed and determine what the remainder
+! is.
+ if (i8_trunc_loc == 0) then
+! No truncation is necessary, and remainder is just the components
+! for the remaining levels
+ i8_trunc_level_rem = 0
+ else
+! Shift right to identify the digits to be preserved and truncate
+! there
+ IX_8 = i8_gsum_level(i8_trunc_level)/ &
+ (i8_radix**i8_trunc_loc)
+! Shift left to put digits in the correct location (right fill with
+! zeroes)
+ IX_8 = IX_8*(i8_radix**i8_trunc_loc)
+! Calculate local remainder
+ i8_trunc_level_rem = (i8_gsum_level(i8_trunc_level) - IX_8)
+! Update level with the truncated value
+ i8_gsum_level(i8_trunc_level) = IX_8
+ endif
+
+! Calculate floating point value corresponding to modified integer
+! vector. Note that, by construction, i8 integer value will fit into
+! r8 floating point value, so do not need to test for this.
+ svlevel = svlevel + 1
+ summand_vector(svlevel) = 0.0_r8
+ do ilevel=i8_begin_level,i8_trunc_level
+ if (i8_gsum_level(ilevel) /= 0_i8) then
+
+! Convert integer to floating point representation
+ X_8 = i8_gsum_level(ilevel)
+ LX = exponent(X_8)
+
+! Add to vector of floating point summands, scaling first if exponent
+! is too small to apply directly
+ curr_exp = LX + arr_shift
+ if (curr_exp >= MINEXPONENT(1.0_r8)) then
+ summand_vector(svlevel) = &
+ summand_vector(svlevel) + set_exponent(X_8,curr_exp)
+ else
+ RX_8 = set_exponent(X_8, &
+ curr_exp-MINEXPONENT(1.0_r8))
+ summand_vector(svlevel) = &
+ summand_vector(svlevel) + scale(RX_8,MINEXPONENT(1.0_r8))
+ endif
+
+ endif
+
+! Note that same arr_shift should be used for next 'step 3(d)'
+! iteration if i8_trunc_loc > 0.
+ if ((ilevel < i8_trunc_level) .or. (i8_trunc_loc == 0)) then
+ arr_shift = arr_shift - arr_max_shift
+ endif
+
+ enddo
+ if (i8_trunc_loc == 0) then
+ i8_digit_count = 0
+ i8_begin_level = i8_trunc_level + 1
+ else
+ i8_digit_count = i8_trunc_loc
+ i8_begin_level = i8_trunc_level
+ i8_gsum_level(i8_trunc_level) = i8_trunc_level_rem
endif
+
+ enddo
- arr_shift = arr_shift + arr_max_shift
+! Step 3(e): sum vector of floating point values, smallest to largest
+ arr_gsum(ifld) = 0.0_r8
+ do jlevel=svlevel,1,-1
+ arr_gsum(ifld) = arr_gsum(ifld) + summand_vector(jlevel)
enddo
-! Apply final exponent correction, scaling first if exponent is too small
-! to apply directly
- corr_exp = curr_exp + exponent(arr_gsum(ifld))
- if (corr_exp >= MINEXPONENT(1.0_r8)) then
- arr_gsum(ifld) = set_exponent(arr_gsum(ifld),corr_exp)
- else
- RX_8 = set_exponent(arr_gsum(ifld), &
- corr_exp-MINEXPONENT(1.0_r8))
- arr_gsum(ifld) = scale(RX_8,MINEXPONENT(1.0_r8))
- endif
+! Step 3(f): restore the sign
+ arr_gsum(ifld) = i8_sign*arr_gsum(ifld)
-! If validate is .true. and some precision lost, test whether 'too much'
-! was lost, due to too loose an upper bound, too stringent a limit on number
-! of levels of expansion, cancellation, .... Calculated by comparing lower
-! bound on number of sigificant digits with number of digits in 1.0_r8 .
+! If validate is .true. and some precision lost, test whether 'too
+! much' was lost, due to too loose an upper bound, too stringent a
+! limit on number of levels of expansion, cancellation, ...
+! Calculated by comparing lower bound on number of significant digits
+! with number of digits in 1.0_r8 .
if (validate) then
if (i8_arr_gsum_level(ioffset-voffset+2) /= 0_i8) then
-! Find first nonzero level and use exponent for this level, then assume all
-! subsequent levels contribute arr_max_shift digits.
- sum_digits = 0
+! Find first nonzero level and use exponent for this level, then
+! assume all subsequent levels contribute arr_max_shift digits.
+ i8_digit_count = 0
do ilevel=min_level,max_levels(ifld)
- if (sum_digits == 0) then
+ if (i8_digit_count == 0) then
if (i8_arr_gsum_level(ioffset+ilevel) /= 0_i8) then
- X_8(1) = i8_arr_gsum_level(ioffset+ilevel)
- LX(1) = exponent(X_8(1))
- sum_digits = LX(1)
+ X_8 = i8_arr_gsum_level(ioffset+ilevel)
+ LX = exponent(X_8)
+ i8_digit_count = LX
endif
else
- sum_digits = sum_digits + arr_max_shift
+ i8_digit_count = i8_digit_count + arr_max_shift
endif
enddo
- if (sum_digits < digits(1.0_r8)) then
+ if (i8_digit_count < digits(1.0_r8)) then
recompute = .true.
endif
endif
@@ -1635,6 +1829,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
endif
enddo
+ call t_stopf('repro_sum_finalsum')
end subroutine shr_reprosum_int
@@ -1723,7 +1918,6 @@ logical function shr_reprosum_tolExceeded (name, nflds, master, &
shr_reprosum_tolExceeded = .true.
endif
-
end function shr_reprosum_tolExceeded
!
@@ -1848,7 +2042,6 @@ subroutine DDPDD (dda, ddb, len, itype)
ddb(i) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 )
enddo
-
end subroutine DDPDD
!
!------------------------------------------------------------------------