Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor lazy_regrid.py for wflow diagnostics #2024

Merged
merged 13 commits into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions esmvaltool/diag_scripts/hydrology/compute_chunks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Re-chunk the time dimension, to be used by the regrid processor.

For large cubes, regridding to a high resolution grid increases the size
of the data. To reduce memory use, we re-chunk the time dimension.

Related iris issue:
https://github.com/SciTools/iris/issues/3808
"""
import numpy as np


def compute_chunks(src, tgt):
"""Compute the chunk sizes needed to regrid src to tgt."""
block_bytes = 50 * (1 << 20) # 50 MB block size

if src.dtype == np.float32:
dtype_bytes = 4 # size of float32 in bytes
else:
dtype_bytes = 8 # size of float64 in bytes

ntime = src.coord('time').shape[0]
tgt_nlat = tgt.coord('latitude').shape[0]
tgt_nlon = tgt.coord('longitude').shape[0]

# Define blocks along the time dimension
min_nblocks = int(ntime * tgt_nlat * tgt_nlon * dtype_bytes / block_bytes)
min_nblocks = max(min_nblocks, 1)
timefull = ntime // min_nblocks
timepart = ntime % timefull

nfullblocks = ntime // timefull
npartblocks = int(timepart > 0)

time_chunks = (timefull, ) * nfullblocks + (timepart, ) * npartblocks
src_chunks = (
time_chunks,
(src.coord('latitude').shape[0], ),
(src.coord('longitude').shape[0], ),
)
return src_chunks
103 changes: 0 additions & 103 deletions esmvaltool/diag_scripts/hydrology/lazy_regrid.py

This file was deleted.

24 changes: 16 additions & 8 deletions esmvaltool/diag_scripts/hydrology/wflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import logging
from pathlib import Path

import iris
import numpy as np
from osgeo import gdal
import iris
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am in awe about how Codacy is not complaining about this import order 😁

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Me too, but I already asked Sarah to check something like that lately and it turns out I was the one that's wrong... This one is even more strange though!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm, the prospector did not complain about it, and the pylint in vscode says: third party import "from esmvalcore.preprocessor import regrid" should be placed before "import iris"pylint.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's possible prospector didn't run the pylint bit and the CI let it pass - it does that from time to time when there are lots of commits

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks better now πŸ‘


from esmvalcore.preprocessor import regrid
from esmvaltool.diag_scripts.hydrology.derive_evspsblpot import debruin_pet
from esmvaltool.diag_scripts.hydrology.lazy_regrid import lazy_regrid
from esmvaltool.diag_scripts.hydrology.compute_chunks import compute_chunks
from esmvaltool.diag_scripts.shared import (ProvenanceLogger,
get_diagnostic_filename,
group_metadata, run_diagnostic)
Expand Down Expand Up @@ -39,6 +40,13 @@ def create_provenance_record():
return record


def rechunk_and_regrid(src, tgt, scheme):
"""Rechunk cube src and regrid it onto the grid of cube tgt."""
src_chunks = compute_chunks(src, tgt)
src.data = src.lazy_data().rechunk(src_chunks)
return regrid(src, tgt, scheme)


def get_input_cubes(metadata):
"""Create a dict with all (preprocessed) input files."""
provenance = create_provenance_record()
Expand Down Expand Up @@ -97,7 +105,7 @@ def regrid_temperature(src_temp, src_height, target_height, scheme):
src_slt = src_temp.copy(data=src_temp.core_data() + src_dtemp.core_data())

# Interpolate sea-level temperature to target grid
target_slt = lazy_regrid(src_slt, target_height, scheme)
target_slt = rechunk_and_regrid(src_slt, target_height, scheme)

# Convert sea-level temperature to new target elevation
target_dtemp = lapse_rate_correction(target_height)
Expand Down Expand Up @@ -216,7 +224,7 @@ def main(cfg):

logger.info("Processing variable precipitation_flux")
scheme = cfg['regrid']
pr_dem = lazy_regrid(all_vars['pr'], dem, scheme)
pr_dem = rechunk_and_regrid(all_vars['pr'], dem, scheme)

logger.info("Processing variable temperature")
tas_dem = regrid_temperature(
Expand All @@ -229,12 +237,12 @@ def main(cfg):
logger.info("Processing variable potential evapotranspiration")
if 'evspsblpot' in all_vars:
pet = all_vars['evspsblpot']
pet_dem = lazy_regrid(pet, dem, scheme)
pet_dem = rechunk_and_regrid(pet, dem, scheme)
else:
logger.info("Potential evapotransporation not available, deriving")
psl_dem = lazy_regrid(all_vars['psl'], dem, scheme)
rsds_dem = lazy_regrid(all_vars['rsds'], dem, scheme)
rsdt_dem = lazy_regrid(all_vars['rsdt'], dem, scheme)
psl_dem = rechunk_and_regrid(all_vars['psl'], dem, scheme)
rsds_dem = rechunk_and_regrid(all_vars['rsds'], dem, scheme)
rsdt_dem = rechunk_and_regrid(all_vars['rsdt'], dem, scheme)
pet_dem = debruin_pet(
tas=tas_dem,
psl=psl_dem,
Expand Down
10 changes: 5 additions & 5 deletions esmvaltool/recipes/hydrology/recipe_wflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
documentation:
description: |
Pre-processes climate data for the WFlow hydrological model.

authors:
- kalverla_peter
- camphuijsen_jaro
- alidoost_sarah
- aerts_jerom
- andela_bouwe

projects:
- ewatercycle

references:
- acknow_project

Expand All @@ -40,7 +40,7 @@ diagnostics:
mip: day
preprocessor: rough_cutout
start_year: 1990
end_year: 1990
end_year: 2001
pr: *daily_var
# evspsblpot: # doesn't exist for ERA-Interim.
# Reconstruct evspsblpot using:
Expand All @@ -53,5 +53,5 @@ diagnostics:
script:
script: hydrology/wflow.py
basin: Meuse
dem_file: 'wflow/wflow_dem_Meuse.nc'
dem_file: 'wflow_parameterset/meuse/staticmaps/wflow_dem.map'
regrid: area_weighted