Skip to content

Commit

Permalink
Review changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
pp-mo committed Aug 20, 2019
1 parent f35bbd5 commit cdc23ab
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 27 deletions.
47 changes: 21 additions & 26 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@
from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

try: # Python 3
from collections.abc import Iterable
except ImportError: # Python 2
from collections import Iterable
from functools import wraps

import dask
import dask.array as da
import dask.array.core
import dask.config
import dask.utils

import numpy as np
import numpy.ma as ma
Expand Down Expand Up @@ -101,27 +97,17 @@ def _optimum_chunksize(chunks, shape,
"chunks = [c[0] for c in normalise_chunks('auto', ...)]".
"""
# Return chunks unchanged, for types of invocation we don't comprehend.
if (any(elem <= 0 for elem in shape) or
not isinstance(chunks, Iterable) or
len(chunks) != len(shape)):
# Don't modify chunks for special values like -1, (0,), 'auto',
# or if shape contains 0 or -1 (like raw landsea-mask data proxies).
return chunks

# Set the chunksize limit.
if limit is None:
# Fetch the default 'optimal' chunksize from the dask config.
limit = dask.config.get('array.chunk-size')
# Convert to bytes
limit = da.core.parse_bytes(limit)
limit = dask.utils.parse_bytes(limit)

point_size_limit = limit / dtype.itemsize

# Create result chunks, starting with a copy of the input.
result = list(chunks)
if shape is None:
shape = result[:]

if np.prod(result) < point_size_limit:
# If size is less than maximum, expand the chunks, multiplying later
Expand Down Expand Up @@ -155,32 +141,41 @@ def as_lazy_data(data, chunks=None, asarray=False):
Args:
* data:
An array. This will be converted to a dask array.
* data (array-like):
An indexable object with 'shape', 'dtype' and 'ndim' properties.
This will be converted to a dask array.
Kwargs:
* chunks:
Describes how the created dask array should be split up. Defaults to a
value first defined in biggus (being `8 * 1024 * 1024 * 2`).
For more information see
http://dask.pydata.org/en/latest/array-creation.html#chunks.
* chunks (list of int):
If present, a source chunk shape, e.g. for a chunked netcdf variable.
* asarray:
* asarray (bool):
If True, then chunks will be converted to instances of `ndarray`.
Set to False (default) to pass passed chunks through unchanged.
Returns:
The input array converted to a dask array.
.. note::
The result chunk size is a multiple of 'chunks', if given, up to the
dask default chunksize, i.e. `dask.config.get('array.chunk-size'),
or the full data shape if that is smaller.
If 'chunks' is not given, the result has chunks of the full data shape,
but reduced by a factor if that exceeds the dask default chunksize.
"""
if chunks is None:
# No existing chunks : Make a chunk the shape of the entire input array
# (but we will subdivide it if too big).
chunks = list(data.shape)

# Expand or reduce the basic chunk shape to an optimum size.
chunks = _optimum_chunksize(chunks, shape=data.shape, dtype=data.dtype)
# Adjust chunk size for better dask performance,
# NOTE: but only if no shape dimension is zero, so that we can handle the
# PPDataProxy of "raw" landsea-masked fields, which have a shape of (0, 0).
if all(elem > 0 for elem in data.shape):
# Expand or reduce the basic chunk shape to an optimum size.
chunks = _optimum_chunksize(chunks, shape=data.shape, dtype=data.dtype)

if isinstance(data, ma.core.MaskedConstant):
data = ma.masked_array(data.data, mask=data.mask)
Expand Down
4 changes: 3 additions & 1 deletion lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,10 @@ def _get_cf_var_data(cf_var, filename):
netCDF4.default_fillvals[cf_var.dtype.str[1:]])
proxy = NetCDFDataProxy(cf_var.shape, dtype, filename, cf_var.cf_name,
fill_value)
# Get the chunking specified for the variable : this is either a shape, or
# maybe the string "contiguous".
chunks = cf_var.cf_data.chunking()
# Chunks can be an iterable, or `'contiguous'`.
# In the "contiguous" case, pass chunks=None to 'as_lazy_data'.
if chunks == 'contiguous':
chunks = None
return as_lazy_data(proxy, chunks=chunks)
Expand Down
9 changes: 9 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_as_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ def test_large_specific_chunk_passthrough(self):
dtype=np.dtype('f4'))])
self.assertEqual(result.shape, huge_test_shape)

def test_shapeless_data(self):
# Check that chunk optimisation is skipped if shape contains a zero.
limitcall_patch = self.patch('iris._lazy_data._optimum_chunksize')
test_shape = (2, 1, 0, 2)
data = self._dummydata(test_shape)
result = as_lazy_data(data, chunks=test_shape)
self.assertFalse(limitcall_patch.called)
self.assertEqual(result.shape, test_shape)


if __name__ == '__main__':
tests.main()

0 comments on commit cdc23ab

Please sign in to comment.