zarr-developers · rabernat · Apr 13, 2023 · Sep 12, 2022 · Sep 12, 2022 · Sep 12, 2022
diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py
@@ -2,10 +2,11 @@
 import os
 from collections.abc import MutableMapping
 from string import ascii_letters, digits
-from typing import Any, List, Mapping, Optional, Union
+from typing import Any, Sequence, List, Mapping, Optional, Union
 
 from zarr.meta import Metadata2, Metadata3
 from zarr.util import normalize_storage_path
+from zarr.context import Context
 
 # v2 store keys
 array_meta_key = '.zarray'
@@ -129,6 +130,34 @@ def _ensure_store(store: Any):
             f"wrap it in Zarr.storage.KVStore. Got {store}"
         )
 
+    def getitems(
+        self, keys: Sequence[str], contexts: Mapping[str, Context] = {}
+    ) -> Mapping[str, Any]:
+        """Retrieve data from multiple keys.
+
+        Parameters
+        ----------
+        keys : Iterable[str]
+            The keys to retrieve
+        contexts: Mapping[str, Context]
+            A mapping of keys to their context. Each context is a mapping of store
+            specific information. E.g. a context could be a dict telling the store
+            the preferred output array type: `{"meta_array": cupy.empty(())}`
+
+        Returns
+        -------
+        Mapping
+            A collection mapping the input keys to their results.
+
+        Notes
+        -----
+        This default implementation uses __getitem__() to read each key sequentially and
+        ignores contexts. Overwrite this method to implement concurrent reads of multiple
+        keys and/or to utilize the contexts.
+        """
+
+        return {k: self[k] for k in keys if k in self}
+
 
 class Store(BaseStore):
     """Abstract store class used by implementations following the Zarr v2 spec.

diff --git a/zarr/context.py b/zarr/context.py
@@ -0,0 +1,19 @@
+
+from typing import TypedDict
+
+from numcodecs.compat import NDArrayLike
+
+
+class Context(TypedDict, total=False):
+    """ A context for component specific information
+
+    All keys are optional. Any component reading the context must provide
+    a default implementation in the case a key cannot be found.
+
+    Items
+    -----
+    meta_array : array-like, optional
+        An array-like instance to use for determining the preferred output
+        array type.
+    """
+    meta_array: NDArrayLike
diff --git a/zarr/core.py b/zarr/core.py
@@ -1257,20 +1257,13 @@ def _get_selection(self, indexer, out=None, fields=None):
         else:
             check_array_shape('out', out, out_shape)
 
-        # iterate over chunks
-        if not hasattr(self.chunk_store, "getitems") or \
-           any(map(lambda x: x == 0, self.shape)):
-            # sequentially get one key at a time from storage
-            for chunk_coords, chunk_selection, out_selection in indexer:
-
-                # load chunk selection into output array
-                self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
-                                    drop_axes=indexer.drop_axes, fields=fields)
-        else:
-            # allow storage to get multiple items at once
+        if math.prod(out_shape) > 0:
+            # get chunks
             lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
-            self._chunk_getitems(lchunk_coords, lchunk_selection, out, lout_selection,
-                                 drop_axes=indexer.drop_axes, fields=fields)
+            self._chunk_getitems(
+                lchunk_coords, lchunk_selection, out, lout_selection,
+                drop_axes=indexer.drop_axes, fields=fields
+            )
 
         if out.shape:
             return out
@@ -1930,76 +1923,44 @@ def _process_chunk(
         # store selected data in output
         out[out_selection] = tmp
 
-    def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
-                       drop_axes=None, fields=None):
-        """Obtain part or whole of a chunk.
+    def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection,
+                        drop_axes=None, fields=None):
+        """Obtain part or whole of chunks.
 
         Parameters
         ----------
-        chunk_coords : tuple of ints
-            Indices of the chunk.
-        chunk_selection : selection
-            Location of region within the chunk to extract.
+        chunk_coords : list of tuple of ints
+            Indices of the chunks.
+        chunk_selection : list of selections
+            Location of region within the chunks to extract.
         out : ndarray
             Array to store result in.
-        out_selection : selection
-            Location of region within output array to store results in.
+        out_selection : list of selections
+            Location of regions within output array to store results in.
         drop_axes : tuple of ints
             Axes to squeeze out of the chunk.
         fields
             TODO
-
         """
-        out_is_ndarray = True
-        try:
-            out = ensure_ndarray_like(out)
-        except TypeError:
-            out_is_ndarray = False
-
-        assert len(chunk_coords) == len(self._cdata_shape)
-
-        # obtain key for chunk
-        ckey = self._chunk_key(chunk_coords)
 
-        try:
-            # obtain compressed data for chunk
-            cdata = self.chunk_store[ckey]
-
-        except KeyError:
-            # chunk not initialized
-            if self._fill_value is not None:
-                if fields:
-                    fill_value = self._fill_value[fields]
-                else:
-                    fill_value = self._fill_value
-                out[out_selection] = fill_value
-
-        else:
-            self._process_chunk(out, cdata, chunk_selection, drop_axes,
-                                out_is_ndarray, fields, out_selection)
-
-    def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection,
-                        drop_axes=None, fields=None):
-        """As _chunk_getitem, but for lists of chunks
-
-        This gets called where the storage supports ``getitems``, so that
-        it can decide how to fetch the keys, allowing concurrency.
-        """
         out_is_ndarray = True
         try:
             out = ensure_ndarray_like(out)
         except TypeError:  # pragma: no cover
             out_is_ndarray = False
 
+        # Keys to retrieve
         ckeys = [self._chunk_key(ch) for ch in lchunk_coords]
+
+        partial_read_decode = False
+        # Check if we can do a partial read
         if (
             self._partial_decompress
             and self._compressor
             and self._compressor.codec_id == "blosc"
             and hasattr(self._compressor, "decode_partial")
             and not fields
             and self.dtype != object
-            and hasattr(self.chunk_store, "getitems")
         ):
             partial_read_decode = True
             cdatas = {
@@ -2008,8 +1969,11 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection,
                 if ckey in self.chunk_store
             }
         else:
-            partial_read_decode = False
-            cdatas = self.chunk_store.getitems(ckeys, on_error="omit")
+            contexts = {}
+            if not isinstance(self._meta_array, np.ndarray):
+                contexts = {k: {"meta_array": self._meta_array} for k in ckeys}
+            cdatas = self.chunk_store.getitems(ckeys, contexts=contexts)
+
         for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection):
             if ckey in cdatas:
                 self._process_chunk(

diff --git a/zarr/storage.py b/zarr/storage.py
@@ -31,7 +31,7 @@
 from os import scandir
 from pickle import PicklingError
 from threading import Lock, RLock
-from typing import Optional, Union, List, Tuple, Dict, Any
+from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any
 import uuid
 import time
 
@@ -42,6 +42,7 @@
     ensure_contiguous_ndarray_like
 )
 from numcodecs.registry import codec_registry
+from zarr.context import Context
 
 from zarr.errors import (
     MetadataError,
@@ -1361,7 +1362,10 @@ def _normalize_key(self, key):
 
         return key.lower() if self.normalize_keys else key
 
-    def getitems(self, keys, **kwargs):
+    def getitems(
+        self, keys: Sequence[str], contexts: Mapping[str, Context] = {}
+    ) -> Mapping[str, Any]:
+
         keys_transformed = [self._normalize_key(key) for key in keys]
         results = self.map.getitems(keys_transformed, on_error="omit")
         # The function calling this method may not recognize the transformed keys

diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py
@@ -9,6 +9,7 @@
 import tempfile
 from contextlib import contextmanager
 from pickle import PicklingError
+from typing import Any, Mapping, Sequence
 from zipfile import ZipFile
 
 import numpy as np
@@ -20,6 +21,7 @@
 import zarr
 from zarr._storage.store import _get_hierarchy_metadata
 from zarr.codecs import BZ2, AsType, Blosc, Zlib
+from zarr.context import Context
 from zarr.convenience import consolidate_metadata
 from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataError
 from zarr.hierarchy import group
@@ -2572,3 +2574,29 @@ def test_meta_prefix_6853():
 
     fixtures = group(store=DirectoryStore(str(fixture)))
     assert list(fixtures.arrays())
+
+
+def test_getitems_contexts():
+
+    class MyStore(CountingDict):
+        def __init__(self):
+            super().__init__()
+            self.last_contexts = None
+
+        def getitems(
+            self, keys: Sequence[str], contexts: Mapping[str, Context] = {}
+        ) -> Mapping[str, Any]:
+            self.last_contexts = contexts
+            return {k: self.wrapped[k] for k in keys if k in self.wrapped}
+
+    store = MyStore()
+    z = zarr.create(shape=(10,), store=store)
+
+    # By default, not contexts are given to the store's getitems()
+    z[0]
+    assert len(store.last_contexts) == 0
+
+    # Setting a non-default meta_array, will create contexts for the store's getitems()
+    z._meta_array = "my_meta_array"
+    z[0]
+    assert store.last_contexts == {'0': {'meta_array': 'my_meta_array'}}
diff --git a/zarr/tests/util.py b/zarr/tests/util.py
@@ -1,6 +1,8 @@
 import collections
 import os
 import tempfile
+from typing import Any, Mapping, Sequence
+from zarr.context import Context
 
 from zarr.storage import Store
 from zarr._storage.v3 import StoreV3
@@ -42,6 +44,13 @@ def __delitem__(self, key):
         self.counter['__delitem__', key] += 1
         del self.wrapped[key]
 
+    def getitems(
+        self, keys: Sequence[str], contexts: Mapping[str, Context] = {}
+    ) -> Mapping[str, Any]:
+        for key in keys:
+            self.counter['__getitem__', key] += 1
+        return {k: self.wrapped[k] for k in keys if k in self.wrapped}
+
 
 class CountingDictV3(CountingDict, StoreV3):
     pass