Skip to content

Commit

Permalink
Rename chunking routine and pass dtype.
Browse files Browse the repository at this point in the history
  • Loading branch information
pp-mo committed Jul 26, 2019
1 parent f4d77c4 commit 2082639
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 18 deletions.
8 changes: 4 additions & 4 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def is_lazy_data(data):
return result


def _optimise_chunksize(chunks, shape,
limit=None,
dtype=np.dtype('f4')):
def _optimum_chunksize(chunks, shape,
limit=None,
dtype=np.dtype('f4')):
"""
Reduce or increase an initial chunk shape to get close to a chosen ideal
size, while prioritising the splitting of the earlier (outer) dimensions
Expand Down Expand Up @@ -177,7 +177,7 @@ def as_lazy_data(data, chunks=None, asarray=False):
chunks = list(data.shape)

# Expand or reduce the basic chunk shape to an optimum size.
chunks = _optimise_chunksize(chunks, shape=data.shape)
chunks = _optimum_chunksize(chunks, shape=data.shape, dtype=data.dtype)

if isinstance(data, ma.core.MaskedConstant):
data = ma.masked_array(data.data, mask=data.mask)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from dask.array import Array as dask_array
import numpy as np

from iris._lazy_data import _optimise_chunksize
from iris._lazy_data import _optimum_chunksize
import iris.fileformats.cf
from iris.fileformats.netcdf import _get_cf_var_data
from iris.tests import mock
Expand All @@ -36,7 +36,7 @@ class Test__get_cf_var_data(tests.IrisTest):
def setUp(self):
self.filename = 'DUMMY'
self.shape = (300000, 240, 200)
self.expected_chunks = _optimise_chunksize(self.shape, self.shape)
self.expected_chunks = _optimum_chunksize(self.shape, self.shape)

def _make(self, chunksizes):
cf_data = mock.Mock(_FillValue=None)
Expand All @@ -59,12 +59,12 @@ def test_cf_data_chunks(self):
cf_var = self._make(chunks)
lazy_data = _get_cf_var_data(cf_var, self.filename)
lazy_data_chunks = [c[0] for c in lazy_data.chunks]
expected_chunks = _optimise_chunksize(chunks, self.shape)
expected_chunks = _optimum_chunksize(chunks, self.shape)
self.assertArrayEqual(lazy_data_chunks, expected_chunks)

def test_cf_data_no_chunks(self):
# No chunks means chunks are calculated from the array's shape by
# `iris._lazy_data._optimise_chunksize()`.
# `iris._lazy_data._optimum_chunksize()`.
chunks = None
cf_var = self._make(chunks)
lazy_data = _get_cf_var_data(cf_var, self.filename)
Expand Down
20 changes: 10 additions & 10 deletions lib/iris/tests/unit/lazy_data/test_as_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import numpy as np
import numpy.ma as ma

from iris._lazy_data import as_lazy_data, _optimise_chunksize
from iris._lazy_data import as_lazy_data, _optimum_chunksize
from iris.tests import mock


Expand Down Expand Up @@ -84,8 +84,8 @@ def test_chunk_size_limiting(self):
]
err_fmt = 'Result of optimising chunks {} was {}, expected {}'
for (shape, expected) in given_shapes_and_resulting_chunks:
chunks = _optimise_chunksize(shape, shape,
limit=self.FIXED_CHUNKSIZE_LIMIT)
chunks = _optimum_chunksize(shape, shape,
limit=self.FIXED_CHUNKSIZE_LIMIT)
msg = err_fmt.format(shape, chunks, expected)
self.assertEqual(chunks, expected, msg)

Expand All @@ -100,23 +100,23 @@ def test_chunk_size_expanding(self):
]
err_fmt = 'Result of optimising shape={};chunks={} was {}, expected {}'
for (shape, fullshape, expected) in given_shapes_and_resulting_chunks:
chunks = _optimise_chunksize(chunks=shape, shape=fullshape,
limit=self.FIXED_CHUNKSIZE_LIMIT)
chunks = _optimum_chunksize(chunks=shape, shape=fullshape,
limit=self.FIXED_CHUNKSIZE_LIMIT)
msg = err_fmt.format(fullshape, shape, chunks, expected)
self.assertEqual(chunks, expected, msg)

def test_default_chunksize(self):
# Check that the "ideal" chunksize is taken from the dask config.
with dask.config.set({'array.chunk-size': '20b'}):
chunks = _optimise_chunksize((1, 8),
shape=(400, 20),
dtype=np.dtype('f4'))
chunks = _optimum_chunksize((1, 8),
shape=(400, 20),
dtype=np.dtype('f4'))
self.assertEqual(chunks, (1, 4))

def test_default_chunks_limiting(self):
# Check that chunking is still controlled when no specific 'chunks'
# is passed.
limitcall_patch = self.patch('iris._lazy_data._optimise_chunksize')
limitcall_patch = self.patch('iris._lazy_data._optimum_chunksize')
test_shape = (3, 2, 4)
data = self._dummydata(test_shape)
as_lazy_data(data)
Expand All @@ -125,7 +125,7 @@ def test_default_chunks_limiting(self):

def test_large_specific_chunk_passthrough(self):
# Check that even a too-large specific 'chunks' arg is honoured.
limitcall_patch = self.patch('iris._lazy_data._optimise_chunksize')
limitcall_patch = self.patch('iris._lazy_data._optimum_chunksize')
huge_test_shape = (1001, 1002, 1003, 1004)
data = self._dummydata(huge_test_shape)
result = as_lazy_data(data, chunks=huge_test_shape)
Expand Down

0 comments on commit 2082639

Please sign in to comment.