Skip to content

Commit

Permalink
Review changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
pp-mo committed Aug 20, 2019
1 parent f35bbd5 commit 02d387c
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 23 deletions.
43 changes: 21 additions & 22 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@

import dask
import dask.array as da
import dask.array.core
import dask.config
import dask.utils

import numpy as np
import numpy.ma as ma
Expand Down Expand Up @@ -101,27 +101,17 @@ def _optimum_chunksize(chunks, shape,
"chunks = [c[0] for c in normalise_chunks('auto', ...)]".
"""
# Return chunks unchanged, for types of invocation we don't comprehend.
if (any(elem <= 0 for elem in shape) or
not isinstance(chunks, Iterable) or
len(chunks) != len(shape)):
# Don't modify chunks for special values like -1, (0,), 'auto',
# or if shape contains 0 or -1 (like raw landsea-mask data proxies).
return chunks

# Set the chunksize limit.
if limit is None:
# Fetch the default 'optimal' chunksize from the dask config.
limit = dask.config.get('array.chunk-size')
# Convert to bytes
limit = da.core.parse_bytes(limit)
limit = dask.utils.parse_bytes(limit)

point_size_limit = limit / dtype.itemsize

# Create result chunks, starting with a copy of the input.
result = list(chunks)
if shape is None:
shape = result[:]

if np.prod(result) < point_size_limit:
# If size is less than maximum, expand the chunks, multiplying later
Expand Down Expand Up @@ -155,32 +145,41 @@ def as_lazy_data(data, chunks=None, asarray=False):
Args:
* data:
An array. This will be converted to a dask array.
* data (array-like):
An indexable object with 'shape', 'dtype' and 'ndim' properties.
This will be converted to a dask array.
Kwargs:
* chunks:
Describes how the created dask array should be split up. Defaults to a
value first defined in biggus (being `8 * 1024 * 1024 * 2`).
For more information see
http://dask.pydata.org/en/latest/array-creation.html#chunks.
* chunks (list of int):
If present, a source chunk shape, e.g. for a chunked netcdf variable.
* asarray:
* asarray (bool):
If True, then chunks will be converted to instances of `ndarray`.
Set to False (default) to pass passed chunks through unchanged.
Returns:
The input array converted to a dask array.
.. note::
The result chunk size is a multiple of 'chunks', if given, up to the
dask default chunksize, i.e. `dask.config.get('array.chunk-size'),
or the full data shape if that is smaller.
If 'chunks' is not given, the result has chunks of the full data shape,
but reduced by a factor if that exceeds the dask default chunksize.
"""
if chunks is None:
# No existing chunks : Make a chunk the shape of the entire input array
# (but we will subdivide it if too big).
chunks = list(data.shape)

# Expand or reduce the basic chunk shape to an optimum size.
chunks = _optimum_chunksize(chunks, shape=data.shape, dtype=data.dtype)
# Adjust chunk size for better dask performance,
# NOTE: but only if no shape dimension is zero, so that we can handle the
# PPDataProxy of "raw" landsea-masked fields, which have a shape of (0, 0).
if all(elem > 0 for elem in data.shape):
# Expand or reduce the basic chunk shape to an optimum size.
chunks = _optimum_chunksize(chunks, shape=data.shape, dtype=data.dtype)

if isinstance(data, ma.core.MaskedConstant):
data = ma.masked_array(data.data, mask=data.mask)
Expand Down
4 changes: 3 additions & 1 deletion lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,10 @@ def _get_cf_var_data(cf_var, filename):
netCDF4.default_fillvals[cf_var.dtype.str[1:]])
proxy = NetCDFDataProxy(cf_var.shape, dtype, filename, cf_var.cf_name,
fill_value)
# Get the chunking specified for the variable : this is either a shape, or
# maybe the string "contiguous".
chunks = cf_var.cf_data.chunking()
# Chunks can be an iterable, or `'contiguous'`.
# In the "contiguous" case, pass chunks=None to 'as_lazy_data'.
if chunks == 'contiguous':
chunks = None
return as_lazy_data(proxy, chunks=chunks)
Expand Down
9 changes: 9 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_as_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ def test_large_specific_chunk_passthrough(self):
dtype=np.dtype('f4'))])
self.assertEqual(result.shape, huge_test_shape)

def test_shapeless_data(self):
# Check that chunk optimisation is skipped if shape contains a zero.
limitcall_patch = self.patch('iris._lazy_data._optimum_chunksize')
test_shape = (2, 1, 0, 2)
data = self._dummydata(test_shape)
result = as_lazy_data(data, chunks=test_shape)
self.assertFalse(limitcall_patch.called)
self.assertEqual(result.shape, test_shape)


if __name__ == '__main__':
tests.main()

0 comments on commit 02d387c

Please sign in to comment.