diff --git a/share/util/shr_reprosum_mod.F90 b/share/util/shr_reprosum_mod.F90
index cde01474fdea..06631657cfee 100644
--- a/share/util/shr_reprosum_mod.F90
+++ b/share/util/shr_reprosum_mod.F90
@@ -282,10 +282,10 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands,     &
 ! models use the same base, e.g.  
 !  radix(1.0_r8) == radix(1_i8)
 ! for real*8 and integer*8. If not, then the alternative algorithm
-! mentioned above and described below is used instead. The integer
-! representation must also have enough digits for the potential growth
-! of the sum for each level, so could conceivably be too small for a
-! large number of summands. 
+! DDPDD mentioned above and described below is used instead. The
+! integer representation must also have enough digits for the
+! potential growth of the sum for each level, so could conceivably be
+! too small for a large number of summands. 
 !
 ! Upper bounds on the total number of summands and on all intermediate
 ! sums are calculated as
@@ -322,26 +322,38 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands,     &
 ! conversion of the vector of integer representation to a floating
 ! point value may be subject to rounding errors. Before the
 ! conversion, the vector of integers is adjusted so that all elements
-! have the same sign, eliminating the possibility of catastrophic
-! cancellation. These are integer operations, so no accuracy is
-! lost. Next, each element of the vector of integers is converted to a
-! floating point value and added into the intermediate sum, in
-! smallest to largest order (in absolute value). Any rounding error is
-! in the least significant digit for each intermediate sum, and the
-! exponents for each summand are monotonically increasing, so the
-! rounding errors do not accumulate. Thus, the relative difference
-! between the 'exact' value and that calculated by this algorithm will
-! be approximately machine epsilon for 64-bit real values, and any
-! difference will be restricted to the 16th significant digit in a
-! decimal representation. This does mean that the values calculated
-! with the integer*8 internal representation could differ from those
-! calculated using the integer*4 internal representation, but the
-! differences will be in the least significant digits. Similarly, the
-! results may change (in the least significant digits) with a change
-! in system, compiler, or compiler flags. However, for a fixed
-! internal representation, processor architecture, compiler, and
-! compiler options, the result will (still) be reproducible with
-! respect to MPI task and OpenMP thread counts.
+! have the same sign, and so that the value, in absolute value, at a
+! given level is strictly less than what can be represented at the
+! next lower level (larger exponent) and strictly greater than what
+! can represented at the next higher level (smaller exponent). Since
+! all elements have the same sign, the sign is set to positive
+! temporarily and then restored when the conversion to floating point
+! is complete. These are all integer operations, so no accuracy is
+! lost. These adjustments eliminate the possibility of catastrophic
+! cancellation. Also, when converting the individual elements to
+! floating point values and summing them, the summation is now
+! equivalent to concatenating the digits in the mantissas for the
+! component summands. In consequence, in the final step when each
+! element of this modified vector of integers is converted to a
+! floating point value and added into the intermediate sum, any
+! rounding is limited to the least significant digit representable
+! in the final floating point sum.
+!
+! Any such rounding error will be sensitive to the particular floating
+! values generated from the integer vector, and so will be 
+! sensitive to the number of levels in the vector and the implicit
+! exponent associated with each level, which are themselves functions
+! of the numbers of MPI tasks and OpenMP threads and the number of
+! digits representable in an integer. To avoid this sensitivity,
+! (effectively) generate a new integer vector in which each component
+! integer has a fixed number of significant digits (e.g.,
+! digits(1.0_r8)) and generate the floating point values from these
+! before summing. (See comments in code for more details.) This
+! creates a sequence of floating point values to be summed that are
+! independent of, for example, the numbers of MPI tasks and OpenMP
+! threads or whether using integer*8 or integer*4 internal
+! representations in the integer vector, and thus ensure
+! reproducibility with respect to these options.
 !
 ! Description of optional parameters for integer vector algorithm:
 !-----------------------------------------------------------------
@@ -1135,7 +1147,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
 !
 ! Local workspace
 !
-      integer, parameter  :: max_jlevel = &
+      integer, parameter  :: max_svlevel_factor = &
                                 1 + (digits(1_i8)/digits(1.0_r8))
 
       integer(i8) :: i8_arr_tlsum_level(-(extra_levels-1):max_level,nflds,omp_nthreads)
@@ -1149,9 +1161,10 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
       integer(i8) :: i8_arr_gsum_level((max_level+extra_levels+2)*nflds)
                                    ! integer vector representing global
                                    !  sum
-      integer(i8) :: IX_8          ! integer representation of current
-                                   !  jlevels of X_8 ('part' of
-                                   !  i8_arr_gsum_level)
+      integer(i8) :: i8_gsum_level(-(extra_levels-1):max_level)
+                                   ! integer vector representing global
+                                   !  sum for one field
+      integer(i8) :: IX_8          ! integer representation of r8 value
       integer(i8) :: i8_sign       ! sign global sum
       integer(i8) :: i8_radix      ! radix for i8 variables (and r8
                                    !  variables by earlier if-test)
@@ -1164,7 +1177,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
       integer :: isum_beg(omp_nthreads), isum_end(omp_nthreads)
                                    ! range of summand indices for each
                                    !  OpenMP thread
-      integer :: ifld, isum, ithread
+      integer :: ifld, isum, ithread, jlevel
                                    ! loop variables
       integer :: arr_exp           ! exponent of summand
       integer :: arr_shift         ! exponent used to generate integer
@@ -1178,13 +1191,28 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
       integer :: min_level         ! index of minimum levels (including
                                    !  extra levels) for i8_arr_tlsum_level
       integer :: ioffset           ! offset(ifld)
-      integer :: jlevel            ! number of floating point 'pieces'
-                                   !  extracted from a given i8 integer
+      integer :: svlevel           ! number of summands in summand_vector
       integer :: ierr              ! MPI error return
-      integer :: LX(max_jlevel)    ! exponent of X_8 (see below)
+      integer :: LX                ! exponent of X_8 (see below)
       integer :: veclth            ! total length of i8_arr_lsum_level
-      integer :: sum_digits        ! lower bound on number of significant
-                                   !  in integer expansion of sum
+      integer :: i8_digit_count    ! number of digits in integer
+                                   !  expansion of sum
+      integer :: i8_begin_level    ! level starting from in
+                                   !  creating next 'exactly representable'
+                                   !  floating point value from modified
+                                   !  integer expansion of the sum
+      integer :: i8_trunc_level    ! level at which the number of digits in
+                                   !  the modified integer expansion of the
+                                   !  sum exceeds the number of representable
+                                   !  digits in the floating point sum
+      integer :: i8_trunc_loc      ! location of last digit at i8_trunc_level
+                                   !  in the modified integer expansion of the
+                                   !  sum that is representable in the floating
+                                   !  point sum
+      integer(i8) :: i8_trunc_level_rem
+                                   ! truncated digits at i8_trunc_level
+                                   !  in the modified integer expansion
+                                   !  of the sum
       integer :: curr_exp          ! exponent of partial sum during
                                    !  reconstruction from integer vector
       integer :: corr_exp          ! exponent of current summand in
@@ -1193,18 +1221,21 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
       real(r8) :: arr_frac         ! fraction of summand
       real(r8) :: arr_remainder    ! part of summand remaining after
                                    !  current level of integer expansion
-      real(r8) :: X_8(max_jlevel)  ! r8 vector representation of current
+      real(r8) :: X_8              ! r8 representation of current
                                    !  i8_arr_gsum_level
-      real(r8) :: RX_8             ! r8 representation of difference
-                                   !  between current i8_arr_gsum_level
-                                   !  and current jlevels of X_8
-                                   !  (== IX_8). Also used in final
-                                   !  scaling step
-
-      logical :: first             ! flag used to indicate that just
-                                   !  beginning reconstruction of sum
-                                   !  from integer vector
-
+      real(r8) :: RX_8             ! r8 representation of (other)
+                                   !  integers used in calculation.
+      real(r8) :: summand_vector((max_level+extra_levels)*max_svlevel_factor)
+                                   ! vector of r8 values generated from
+                                   !  integer vector representation to be
+                                   !  summed to generate global sum
+
+      logical :: first_stepd_iteration
+                                   ! flag used to indicate whether first
+                                   !  time through process of converting
+                                   !  vector of integers into a floating
+                                   !  point value, as it requires
+                                   !  special logic
 !
 !------------------------------------------------------------------------
 ! Save radix of i8 variables in an i8 variable
@@ -1312,7 +1343,6 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
             endif
 
           enddo
-
 ! Postprocess integer vector to eliminate possibility of overflow
 ! during subsequent sum over threads and tasks, as per earlier
 ! comment on logic behind definition of max_nsummands. If value at a
@@ -1425,27 +1455,79 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
       call t_stopf("repro_sum_allr_i8")
 #endif
 
+      call t_startf('repro_sum_finalsum')
 ! Construct global sum from integer vector representation:
 !  1) arr_max_shift is the shift applied to fraction(arr_gmax) .
-!   When shifting back, need to 'add back in' true arr_gmax exponent. This was
-!   removed implicitly by working only with the fraction .
-!  2) Want to add levels into sum in reverse order (smallest to largest). However,
-!   even this can generate floating point rounding errors if signs of integers
-!   alternate. To avoid this, do some arithmetic with integer vectors so that all
-!   components have the same sign. This should keep relative difference between
-!   using different integer sizes (e.g. i8 and i4) to machine epsilon
-!  3) Assignment to X_8 will usually lose accuracy since maximum integer
-!   size is greater than the max number of 'digits' in r8 value (if xmax_nsummands
-!   correction not very large). Calculate remainder and add in first (since
-!   smaller). One correction is sufficient for r8 (53 digits) and i8 (63 digits).
-!   For r4 (24 digits) may need to correct twice. Code is written in a general
-!   fashion, to work no matter how many corrections are necessary (assuming
-!   max_jlevel parameter calculation is correct).
+!   When shifting back, need to 'add back in' the true arr_gmax exponent.
+!   This was removed implicitly by working only with the fraction.
+!  2) To avoid the possibility of catastrophic cancellation, and
+!   an unacceptable floating point rounding error, can do some arithmetic
+!   with the integer vector so that all components have the same sign.
+!  3) If convert each integer in the integer vector to a floating
+!   point value and then add these together, smallest to largest, to
+!   calculate the final sum, there may be roundoff error in the least
+!   significant digit. This error will be sensitive to the particular
+!   floating values generated from the integer vector, and so will be
+!   sensitive to the number of levels in the vector and the implicit
+!   exponent associated with each level. So this approach is not
+!   guaranteed to be reproducible with respect to a change in the
+!   number of MPI tasks and OpenMP threads (as this changes the
+!   definition of max_nsummands, and thus also arr_max_shift). It is
+!   also not guaranteed to be reproducible with respect to changing
+!   the integer size, e.g. from i8 to i4, as this also changes
+!   arr_max_shift. However, can eliminate this potential loss of
+!   reproducibility by taking the following steps. 
+!   a) Manipulate the integer vector so that
+!    i)  the component values do not 'overlap', that is, the value
+!        represented by a component is strictly less than the value
+!        represented by the least significant digit in the previous
+!        component, and 
+!    ii) all components are positive (saving the sign to be restored
+!        to the final result).
+!   b) Identify the digit in the resulting integer vector that is the
+!      last representable in the floating point representation, then
+!      truncate the vector at this point, i.e., all digits of lesser
+!      significance in the given component and all components
+!      representing digits of lesser significance (call this the
+!      remainder). 
+!   c) Convert each integer component in the modified integer vector
+!      to its corresponding floating point value and sum the
+!      sequence. (Order is unimportant, as explained below, but here
+!      add largest to smallest.) 
+!   d) Repeat (b) and (c) for the remainder (recursively, as
+!      necessary).
+!   e) Sum all floating point numbers generated by step (c), smallest
+!      to largest. 
+!   f) Restore the sign.
+!   With the manipulations in (a) and (b), the summation in (c) is
+!   equivalent to concatenating the digits in the mantissas for the
+!   component summands, so rounding is irrelevant (so far). Repeating
+!   this with the remainder(s) generates a sequence of 'exact'
+!   floating point numbers. Summing these can still generate a
+!   rounding error in the least significant digit in the largest
+!   floating point value (which is the last representable digit in the
+!   final result), but the floating point values being summed and
+!   order of summation are independent of the number of levels and
+!   implicit exponents, so reproducibility is ensured.
+!  
+!  Note that assignment of an i8 integer value to an r8 floating point
+!  variable in step (c) can lead to a loss of accuracy because the
+!  maximum number of digits in the i8 integer can be greater than the
+!  maximum number of digits representable in the r8 variable (if the
+!  xmax_nsummands correction is not very large). With the same sign
+!  and nonoverlapping properties of the integer components, these lost
+!  digits will also not be representable in the final sum. The process
+!  described above of truncating at this last representable digit, and
+!  then separately generating floating point value(s) for the
+!  remainder, takes care of this automatically. Similar reasoning
+!  applies to r4 floating point values with either i8 or i4 integer
+!  components.
 
       recompute = .false.
       do ifld=1,nflds
          arr_gsum(ifld) = 0.0_r8
          ioffset = offset(ifld)
+         svlevel = 0
 
 ! If validate is .true., test whether the summand upper bound
 ! was exceeded on any of the MPI tasks
@@ -1456,9 +1538,17 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
          endif
 
          if (.not. recompute) then
+! Copy integer vector for current field from i8_arr_gsum_level, so that
+! can be modified without changing i8_arr_gsum_level. (Preserving
+! i8_arr_gsum_level unchanged is not necessary, but is convenient for debugging
+! and makes indexing clearer and less error prone.)
+            i8_gsum_level(:) = 0_i8
+            do ilevel=min_level,max_levels(ifld)
+               i8_gsum_level(ilevel) = i8_arr_gsum_level(ioffset+ilevel)
+            enddo
 
-! Preprocess integer vector:
-!  a) If value larger than or equal to (radix(1_i8)**arr_max_shift),
+! Preprocess integer vector (as described in 3(a) above):
+!  i) If value larger than or equal to (radix(1_i8)**arr_max_shift),
 !     add this 'overlap' to the value at the next smaller level
 !     in the vector, resulting in nonoverlapping ranges for each
 !     component.
@@ -1489,144 +1579,248 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
 !     -(extra_levels-1), does not have to be less than
 !     (radix(1_i8)**arr_max_shift) for this 'nonoverlap' property to
 !     hold. 
-           do ilevel=max_levels(ifld),min_level+1,-1
-              if (abs(i8_arr_gsum_level(ioffset+ilevel)) >= &
-                     (i8_radix**arr_max_shift)) then
-
-                  IX_8 = i8_arr_gsum_level(ioffset+ilevel) &
+            do ilevel=max_levels(ifld),min_level+1,-1
+               if (abs(i8_gsum_level(ilevel)) >= &
+                      (i8_radix**arr_max_shift)) then
+                  
+                  IX_8 = i8_gsum_level(ilevel) &
                          / (i8_radix**arr_max_shift)
-                  i8_arr_gsum_level(ioffset+ilevel-1) = &
-                     i8_arr_gsum_level(ioffset+ilevel-1) + IX_8
-
+                  i8_gsum_level(ilevel-1) = &
+                     i8_gsum_level(ilevel-1) + IX_8
+                  
                   IX_8 = IX_8*(i8_radix**arr_max_shift)
-                  i8_arr_gsum_level(ioffset+ilevel)   = &
-                     i8_arr_gsum_level(ioffset+ilevel) - IX_8
-              endif
-           enddo
-!  b) Working consecutively from the first level with a nonzero value
-!     up to level max_levels(ifld), subtract +/- 1 from level with
-!     larger exponent (e.g., ilevel) and add  +/-
-!     (i8_radix**arr_max_shift) to level with smaller exponent
-!     (ilevel+1), when necessary, so that the value at ilevel+1
-!     has the same sign as the value at ilevel. Treat a zero value at
-!     ilevel+1 as always a different sign from the value at ilevel so
-!     that the process always makes this nonzero. (Otherwise, the
-!     wrong sign could be reintroduced by subtracting from a zero
-!     value at the next step.) When finished with the process values
-!     at all levels are either greater than or equal to zero or all
-!     are less than or equal to zero. Note that this can decrease (but
-!     not increase) the absolute value at level -(extra_levels-1) by
-!     1. All other levels are now less than or equal to
-!     (radix(1_i8)**arr_max_shift) in absolute value rather than
-!     strictly less than. 
-           ilevel = min_level
-           do while ((i8_arr_gsum_level(ioffset+ilevel) == 0_i8) &
-                     .and. (ilevel < max_levels(ifld)))
-             ilevel = ilevel + 1
-           enddo
-!
-           if (ilevel < max_levels(ifld)) then
-              if (i8_arr_gsum_level(ioffset+ilevel) > 0_i8) then
-                 i8_sign = 1_i8
-              else
-                 i8_sign = -1_i8
-              endif
-              do jlevel=ilevel,max_levels(ifld)-1
-                 if ((sign(1_i8,i8_arr_gsum_level(ioffset+jlevel)) &
-                      /= sign(1_i8,i8_arr_gsum_level(ioffset+jlevel+1))) &
-                     .or. (i8_arr_gsum_level(ioffset+jlevel+1) == 0_i8)) then
-                    i8_arr_gsum_level(ioffset+jlevel)   = &
-                       i8_arr_gsum_level(ioffset+jlevel) - i8_sign
-                    i8_arr_gsum_level(ioffset+jlevel+1) = &
-                       i8_arr_gsum_level(ioffset+jlevel+1) &
-                       + i8_sign*(i8_radix**arr_max_shift)
-                 endif
-              enddo
+                  i8_gsum_level(ilevel)   = &
+                     i8_gsum_level(ilevel) - IX_8
+               endif
+            enddo
+
+!  ii) Working consecutively from the first level with a nonzero value
+!      up to level max_levels(ifld), subtract +/- 1 from level with
+!      larger exponent (e.g., ilevel) and add  +/-
+!      (i8_radix**arr_max_shift) to level with smaller exponent
+!      (ilevel+1), when necessary, so that the value at ilevel+1
+!      has the same sign as the value at ilevel. Treat a zero value at
+!      ilevel+1 as always a different sign from the value at ilevel so
+!      that the process always makes this nonzero. (Otherwise, the
+!      wrong sign could be reintroduced by subtracting from a zero
+!      value at the next step.) When finished with the process values
+!      at all levels are either greater than or equal to zero or all
+!      are less than or equal to zero. Note that this can decrease
+!      (but not increase) the absolute value at level
+!      -(extra_levels-1) by 1. All other levels are now less than or
+!      equal to (radix(1_i8)**arr_max_shift) in absolute value rather
+!      than strictly less than. 
+            ilevel = min_level
+            do while ((i8_gsum_level(ilevel) == 0_i8) &
+                      .and. (ilevel < max_levels(ifld)))
+               ilevel = ilevel + 1
+            enddo
+!
+            if (i8_gsum_level(ilevel) < 0_i8) then
+               i8_sign = -1_i8
+            else
+               i8_sign = 1_i8
+            endif
+!            
+            if (ilevel < max_levels(ifld)) then
+               do jlevel=ilevel,max_levels(ifld)-1
+                  if ((sign(1_i8,i8_gsum_level(jlevel)) &
+                       /= sign(1_i8,i8_gsum_level(jlevel+1)))&
+                      .or. (i8_gsum_level(jlevel+1) == 0_i8)) then
+                     i8_gsum_level(jlevel)   = &
+                       i8_gsum_level(jlevel) - i8_sign
+                     i8_gsum_level(jlevel+1) = &
+                        i8_gsum_level(jlevel+1) &
+                        + i8_sign*(i8_radix**arr_max_shift)
+                  endif
+               enddo
             endif
 
-! Start with maximum shift, and work up to larger values
-            arr_shift = arr_gmax_exp(ifld) &
-                        - max_levels(ifld)*arr_max_shift
-            curr_exp = 0
-            first = .true.
-            do ilevel=max_levels(ifld),min_level,-1
-
-               if (i8_arr_gsum_level(ioffset+ilevel) /= 0_i8) then
-                  jlevel = 1
-
-! r8 representation of higher order bits in integer
-                  X_8(jlevel) = i8_arr_gsum_level(ioffset+ilevel)
-                  LX(jlevel)  = exponent(X_8(jlevel))
-
-! Calculate remainder
-                  IX_8 = int(X_8(jlevel),i8)
-                  RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8)
-
-! Repeat using remainder
-                  do while (RX_8 /= 0.0_r8)
-                     jlevel = jlevel + 1
-                     X_8(jlevel) = RX_8
-                     LX(jlevel) = exponent(RX_8)
-                     IX_8 = IX_8 + int(RX_8,i8)
-                     RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8)
-                  enddo
+!  iii) If 'same sign' is negative, then change to positive
+!       temporarily. 
+            if (i8_sign < 0_i8) then
+               do jlevel=ilevel,max_levels(ifld)
+                  i8_gsum_level(jlevel) = -i8_gsum_level(jlevel)
+               enddo
+            endif
+
+!  iv) Nonoverlap property can be lost after imposition of same sign
+!      over components. Reintroduce this property (retaining same sign
+!      property). Note that carryover is never more than '1' to the
+!      next smaller level, so, again, no intermediate or final sums
+!      will exceed the maximum value representable in i8, including
+!      level -(extra_levels-1) as long as digits(1_i8) >= 4. 
+            do ilevel=max_levels(ifld),min_level+1,-1
+               if (abs(i8_gsum_level(ilevel)) >= &
+                      (i8_radix**arr_max_shift)) then
+                  
+                  IX_8 = i8_gsum_level(ilevel)/ &
+                         (i8_radix**arr_max_shift)
+                  i8_gsum_level(ilevel-1) = &
+                     i8_gsum_level(ilevel-1) + IX_8
+                  
+                  IX_8 = IX_8*(i8_radix**arr_max_shift)
+                  i8_gsum_level(ilevel)   = &
+                     i8_gsum_level(ilevel) - IX_8
+               endif
+            enddo
 
-! Add in contributions, smaller to larger, rescaling for each
-! addition to guarantee that exponent of working summand is always
-! larger than minexponent
-                  do while (jlevel > 0)
-                     if (first) then
-                        curr_exp = LX(jlevel) + arr_shift
-                        arr_gsum(ifld) = fraction(X_8(jlevel))
-                        first = .false.
+! Step 3(d): iterate over steps 3(b) and 3(c), truncating integer
+! vector to 'fit' into a floating point value, then repeating with
+! remainder
+            first_stepd_iteration = .true.
+            arr_shift = arr_gmax_exp(ifld) - (min_level)*arr_max_shift
+            i8_digit_count = 0
+            i8_begin_level = min_level
+            do while (i8_begin_level <= max_levels(ifld))
+   
+! Determine at which level the total number of integer digits equals
+! or exceeds the number of digits representable in the floating point
+! sum. Then determine which digit at this level is the last
+! representable in the floating point sum. Note that this location
+! (i8_trunc_loc) is zero-based, i.e. smallest digit is at location
+! 0. Note that the exponent is a count of the number of digits for the
+! first nonzero level. All subsequent levels contribute arr_max_shift
+! digits.
+               i8_trunc_loc = 0
+               i8_trunc_level = max_levels(ifld)
+               do ilevel=i8_begin_level,max_levels(ifld)
+                  if (first_stepd_iteration) then
+! Special logic for first time through. Subsequent iterations treat
+! leading zeroes as significant.                  
+                     if (i8_digit_count == 0) then
+                        if (i8_gsum_level(ilevel) /= 0_i8) then
+                           X_8 = i8_gsum_level(ilevel)
+                           LX  = exponent(X_8)
+! Note that even if i8_gsum_level(ilevel) is truncated when assigned
+! to X_8, the exponent LX will still capture the original number of
+! digits.
+                        else
+                           LX = 0
+                        endif
                      else
-                        corr_exp = curr_exp - (LX(jlevel) + arr_shift)
-                        arr_gsum(ifld) = fraction(X_8(jlevel)) &
-                                       + scale(arr_gsum(ifld),corr_exp)
-                        curr_exp = LX(jlevel) + arr_shift
+                        LX = arr_max_shift
                      endif
-                     jlevel = jlevel - 1
-                  enddo
+                  else
+! If i8_digit_count /= 0 during the first iteration
+! (ilevel == i8_begin_level), then there is a remainder left at the
+! previous i8_trunc_level and LX should be set to zero for this
+! iteration.
+                     if ((ilevel == i8_begin_level) .and. (i8_digit_count /= 0)) then
+                        LX = 0
+                     else
+                        LX = arr_max_shift
+                     endif
+                  endif
+                  if (i8_digit_count + LX >= digits(1.0_r8)) then
+                     i8_trunc_level = ilevel
+                     i8_trunc_loc = (i8_digit_count + LX) - digits(1.0_r8)
+                     exit
+                  else
+                     i8_digit_count = i8_digit_count + LX
+                  endif
+               enddo
+               first_stepd_iteration = .false.
+
+! Truncate at i8_trunc_loc as needed and determine what the remainder
+! is. 
+               if (i8_trunc_loc == 0) then
+! No truncation is necessary, and remainder is just the components
+! for the remaining levels
+                  i8_trunc_level_rem = 0
+               else
+! Shift right to identify the digits to be preserved and truncate
+! there
+                  IX_8 = i8_gsum_level(i8_trunc_level)/ &
+                         (i8_radix**i8_trunc_loc)
+! Shift left to put digits in the correct location (right fill with
+! zeroes)
+                  IX_8 = IX_8*(i8_radix**i8_trunc_loc)
+! Calculate local remainder
+                  i8_trunc_level_rem = (i8_gsum_level(i8_trunc_level) - IX_8)
+! Update level with the truncated value
+                  i8_gsum_level(i8_trunc_level) = IX_8
+               endif
+                  
+! Calculate floating point value corresponding to modified integer
+! vector. Note that, by construction, i8 integer value will fit into
+! r8 floating point value, so do not need to test for this.
+               svlevel = svlevel + 1
+               summand_vector(svlevel) = 0.0_r8
+               do ilevel=i8_begin_level,i8_trunc_level
+                  if (i8_gsum_level(ilevel) /= 0_i8) then
+
+! Convert integer to floating point representation
+                     X_8 = i8_gsum_level(ilevel)
+                     LX  = exponent(X_8)
+
+! Add to vector of floating point summands, scaling first if exponent
+! is too small to apply directly
+                     curr_exp = LX + arr_shift
+                     if (curr_exp >= MINEXPONENT(1.0_r8)) then
+                        summand_vector(svlevel) = &
+                           summand_vector(svlevel) + set_exponent(X_8,curr_exp)
+                     else
+                        RX_8 = set_exponent(X_8, &
+                                            curr_exp-MINEXPONENT(1.0_r8))
+                        summand_vector(svlevel) = &
+                           summand_vector(svlevel) + scale(RX_8,MINEXPONENT(1.0_r8))
+                     endif
+                     
+                  endif
+
+! Note that same arr_shift should be used for next 'step 3(d)'
+! iteration if i8_trunc_loc > 0.
+                  if ((ilevel < i8_trunc_level) .or. (i8_trunc_loc == 0)) then
+                     arr_shift = arr_shift - arr_max_shift
+                  endif
+
+               enddo
 
+               if (i8_trunc_loc == 0) then
+                  i8_digit_count = 0
+                  i8_begin_level = i8_trunc_level + 1
+               else
+                  i8_digit_count = i8_trunc_loc
+                  i8_begin_level = i8_trunc_level
+                  i8_gsum_level(i8_trunc_level) = i8_trunc_level_rem
                endif
+            
+            enddo
 
-               arr_shift = arr_shift + arr_max_shift
+! Step 3(e): sum vector of floating point values, smallest to largest
+            arr_gsum(ifld) = 0.0_r8
+            do jlevel=svlevel,1,-1
+               arr_gsum(ifld) = arr_gsum(ifld) + summand_vector(jlevel)
             enddo
 
-! Apply final exponent correction, scaling first if exponent is too small
-! to apply directly
-            corr_exp = curr_exp + exponent(arr_gsum(ifld))
-            if (corr_exp >= MINEXPONENT(1.0_r8)) then
-               arr_gsum(ifld) = set_exponent(arr_gsum(ifld),corr_exp)
-            else
-               RX_8 = set_exponent(arr_gsum(ifld), &
-                                   corr_exp-MINEXPONENT(1.0_r8))
-               arr_gsum(ifld) = scale(RX_8,MINEXPONENT(1.0_r8))
-            endif
+! Step 3(f): restore the sign
+            arr_gsum(ifld) = i8_sign*arr_gsum(ifld)
 
-! If validate is .true. and some precision lost, test whether 'too much'
-! was lost, due to too loose an upper bound, too stringent a limit on number
-! of levels of expansion, cancellation, .... Calculated by comparing lower
-! bound on number of sigificant digits with number of digits in 1.0_r8 .
+! If validate is .true. and some precision lost, test whether 'too
+! much' was lost, due to too loose an upper bound, too stringent a
+! limit on number of levels of expansion, cancellation, ...
+! Calculated by comparing lower bound on number of significant digits
+! with number of digits in 1.0_r8 .
             if (validate) then
                if (i8_arr_gsum_level(ioffset-voffset+2) /= 0_i8) then
 
-! Find first nonzero level and use exponent for this level, then assume all
-! subsequent levels contribute arr_max_shift digits.
-                  sum_digits = 0
+! Find first nonzero level and use exponent for this level, then
+! assume all subsequent levels contribute arr_max_shift digits.
+                  i8_digit_count = 0
                   do ilevel=min_level,max_levels(ifld)
-                     if (sum_digits == 0) then
+                     if (i8_digit_count == 0) then
                         if (i8_arr_gsum_level(ioffset+ilevel) /= 0_i8) then
-                           X_8(1) = i8_arr_gsum_level(ioffset+ilevel)
-                           LX(1)  = exponent(X_8(1))
-                           sum_digits = LX(1)
+                           X_8 = i8_arr_gsum_level(ioffset+ilevel)
+                           LX  = exponent(X_8)
+                           i8_digit_count = LX
                         endif
                      else
-                        sum_digits = sum_digits + arr_max_shift
+                        i8_digit_count = i8_digit_count + arr_max_shift
                      endif
                   enddo
 
-                  if (sum_digits < digits(1.0_r8)) then
+                  if (i8_digit_count < digits(1.0_r8)) then
                      recompute = .true.
                   endif
                endif
@@ -1635,6 +1829,7 @@ subroutine shr_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
          endif
 
       enddo
+      call t_stopf('repro_sum_finalsum')
 
    end subroutine shr_reprosum_int
 
@@ -1723,7 +1918,6 @@ logical function shr_reprosum_tolExceeded (name, nflds, master, &
          shr_reprosum_tolExceeded = .true.
        endif
 
-
    end function shr_reprosum_tolExceeded
 
 !
@@ -1848,7 +2042,6 @@ subroutine DDPDD (dda, ddb, len, itype)
          ddb(i) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 )
       enddo
 
-
    end subroutine DDPDD
 !
 !------------------------------------------------------------------------