Skip to content

Commit

Permalink
Merge pull request #1 from Mikejmnez/new_branch
Browse files Browse the repository at this point in the history
moved definitions to zarr backends
  • Loading branch information
Miguel Jimenez authored May 22, 2020
2 parents 89a780b + 6f6eb23 commit 62893ab
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 49 deletions.
51 changes: 2 additions & 49 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ def maybe_decode_store(store, lock=False):
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
ds2._file_obj = ds._file_obj

else: # file is zarr!
else:

# adapted from Dataset.Chunk() and taken from open_zarr
if not isinstance(chunks, (int, dict)):
Expand All @@ -579,58 +579,11 @@ def maybe_decode_store(store, lock=False):
if isinstance(chunks, int):
chunks = dict.fromkeys(ds.dims, chunks)

if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
chunks = dict(zip(ds.dims, chunks))

def get_chunk(name, var, chunks):
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))

# Coordinate labels aren't chunked
if var.ndim == 1 and var.dims[0] == name:
return chunk_spec

if chunks == "auto":
return chunk_spec

for dim in var.dims:
if dim in chunks:
spec = chunks[dim]
if isinstance(spec, int):
spec = (spec,)
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
if any(s % chunk_spec[dim] for s in spec):
warnings.warn(
"Specified Dask chunks %r would "
"separate Zarr chunk shape %r for "
"dimension %r. This significantly "
"degrades performance. Consider "
"rechunking after loading instead."
% (chunks[dim], chunk_spec[dim], dim),
stacklevel=2,
)
chunk_spec[dim] = chunks[dim]
return chunk_spec

def maybe_chunk(name, var, chunks):
chunk_spec = get_chunk(name, var, chunks)

if (var.ndim > 0) and (chunk_spec is not None):
# does this cause any data to be read?
token2 = tokenize(name, var._data)
name2 = "zarr-%s" % token2
var = var.chunk(chunk_spec, name=name2, lock=None)
if overwrite_encoded_chunks and var.chunks is not None:
var.encoding["chunks"] = tuple(x[0] for x in var.chunk)
return var
else:
return var

variables = {k: maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
variables = {k: backends.ZarrStore.open_group.maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
ds2 = ds._replace_vars_and_dims(variables)
return ds2
else:
ds2 = ds

return ds2


Expand Down
42 changes: 42 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,48 @@ def encode_variable(self, variable):
def encode_attribute(self, a):
return encode_zarr_attr_value(a)


def get_chunk(name, var, chunks):
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))

# Coordinate labels aren't chunked
if var.ndim == 1 and var.dims[0] == name:
return chunk_spec

if chunks == "auto":
return chunk_spec

for dim in var.dims:
if dim in chunks:
spec = chunks[dim]
if isinstance(spec, int):
spec = (spec,)
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
if any(s % chunk_spec[dim] for s in spec):
warnings.warn(
"Specified Dask chunks %r would "
"separate Zarr chunk shape %r for "
"dimension %r. This significantly "
"degrades performance. Consider "
"rechunking after loading instead."
% (chunks[dim], chunk_spec[dim], dim),
stacklevel=2,
)
chunk_spec[dim] = chunks[dim]
return chunk_spec

def maybe_chunk(name, var, chunks):
chunk_spec = get_chunk(name, var, chunks)

if (var.ndim > 0) and (chunk_spec is not None):
# does this cause any data to be read?
token2 = tokenize(name, var._data)
name2 = "zarr-%s" % token2
var = var.chunk(chunk_spec, name=name2, lock=None)
if overwrite_encoded_chunks and var.chunks is not None:
var.encoding["chunks"] = tuple(x[0] for x in var.chunk)
return var

def store(
self,
variables,
Expand Down

0 comments on commit 62893ab

Please sign in to comment.