From ddfcc78dbc7f92d7430f048ba34d1bae3554c92d Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 25 May 2022 18:37:58 +0200 Subject: [PATCH 1/6] use zarr to validate attrs --- xarray/backends/api.py | 3 +-- xarray/backends/zarr.py | 13 +++++++++++-- xarray/tests/test_backends.py | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5dd486e952e..a9ee7ea8001 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1556,9 +1556,8 @@ def to_zarr( f"'w-', 'a' and 'r+', but mode={mode!r}" ) - # validate Dataset keys, DataArray names, and attr keys/values + # validate Dataset keys, DataArray names _validate_dataset_names(dataset) - _validate_attrs(dataset) if region is not None: _validate_region(dataset, region) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 104f8aca58f..cc37c1b8190 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -320,6 +320,15 @@ def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim) ) +def _put_attrs(zarr_obj, attrs): + for key, value in attrs.items(): + try: + zarr_obj.attrs[key] = value + except TypeError as e: + raise TypeError(f"Invalid attr {key!r}: {value!r}. {e!s}") from e + return zarr_obj + + class ZarrStore(AbstractWritableDataStore): """Store for reading and writing data via zarr""" @@ -479,7 +488,7 @@ def set_dimensions(self, variables, unlimited_dims=None): ) def set_attributes(self, attributes): - self.zarr_group.attrs.put(attributes) + _put_attrs(self.zarr_group, attributes) def encode_variable(self, variable): variable = encode_zarr_variable(variable) @@ -618,7 +627,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No zarr_array = self.zarr_group.create( name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding ) - zarr_array.attrs.put(encoded_attrs) + zarr_array = _put_attrs(zarr_array, encoded_attrs) write_region = self._write_region if self._write_region is not None else {} write_region = {dim: write_region.get(dim, slice(None)) for dim in dims} diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 62c7c1aac31..8172de66631 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2443,6 +2443,22 @@ def test_write_read_select_write(self): with self.create_zarr_target() as final_store: ds_sel.to_zarr(final_store, mode="w") + @pytest.mark.parametrize("obj", [Dataset(), DataArray(name="foo")]) + def test_attributes(self, obj): + obj = obj.copy() + + obj.attrs["good"] = {"key": "value"} + ds = obj if isinstance(obj, Dataset) else obj.to_dataset() + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target) + assert_identical(ds, xr.open_zarr(store_target)) + + obj.attrs["bad"] = DataArray() + ds = obj if isinstance(obj, Dataset) else obj.to_dataset() + with self.create_zarr_target() as store_target: + with pytest.raises(TypeError, match=r"Invalid attr 'bad'"): + ds.to_zarr(store_target) + @requires_zarr class TestZarrDictStore(ZarrBase): From c3863080a274078950102cb6d01bf364e5b66053 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 1 Jun 2022 22:49:26 +0200 Subject: [PATCH 2/6] remove for loop --- xarray/backends/zarr.py | 10 +++++----- xarray/tests/test_backends.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index cc37c1b8190..d7f7e9cb078 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -321,11 +321,11 @@ def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim) def _put_attrs(zarr_obj, attrs): - for key, value in attrs.items(): - try: - zarr_obj.attrs[key] = value - except TypeError as e: - raise TypeError(f"Invalid attr {key!r}: {value!r}. {e!s}") from e + """Raise a more informative error message for invalid attrs.""" + try: + zarr_obj.attrs.put(attrs) + except TypeError as e: + raise TypeError(f"Invalid attrs. {e!s}") from e return zarr_obj diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8172de66631..45812edbdeb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2456,7 +2456,7 @@ def test_attributes(self, obj): obj.attrs["bad"] = DataArray() ds = obj if isinstance(obj, Dataset) else obj.to_dataset() with self.create_zarr_target() as store_target: - with pytest.raises(TypeError, match=r"Invalid attr 'bad'"): + with pytest.raises(TypeError, match=r"Invalid attrs."): ds.to_zarr(store_target) From a3516854305bf7725fdb2cc81107b753dd35c24c Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 1 Jun 2022 23:00:17 +0200 Subject: [PATCH 3/6] update docs --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c9ee52f3da0..8536bfa8357 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -93,6 +93,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- :py:meth:`Dataset.to_zarr` now allows to write all attribute types supported by `zarr-python`. + By `Mattia Almansi `_. - Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and ensure it skips missing values for float dtypes (consistent with other methods). This should not change the behavior (:pull:`6303`). From e2605e5f0d9f04093a218bb628b3c25229dc0bc7 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 1 Jun 2022 23:29:44 +0200 Subject: [PATCH 4/6] Update xarray/backends/zarr.py Co-authored-by: Deepak Cherian --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d7f7e9cb078..889551125c9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -325,7 +325,7 @@ def _put_attrs(zarr_obj, attrs): try: zarr_obj.attrs.put(attrs) except TypeError as e: - raise TypeError(f"Invalid attrs. {e!s}") from e + raise TypeError(f"Invalid attribute in Dataset.attrs.") from e return zarr_obj From c8918dfcd79adda4f120c9b9c1f725ed31636417 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 1 Jun 2022 23:33:44 +0200 Subject: [PATCH 5/6] fix flake8 --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 889551125c9..1b8b7ee81e7 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -325,7 +325,7 @@ def _put_attrs(zarr_obj, attrs): try: zarr_obj.attrs.put(attrs) except TypeError as e: - raise TypeError(f"Invalid attribute in Dataset.attrs.") from e + raise TypeError("Invalid attribute in Dataset.attrs.") from e return zarr_obj From cc6786ed8d63d264ce736518097501a9fdf557d0 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 1 Jun 2022 23:39:41 +0200 Subject: [PATCH 6/6] fix tests --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 45812edbdeb..6f92b26b0c9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2456,7 +2456,7 @@ def test_attributes(self, obj): obj.attrs["bad"] = DataArray() ds = obj if isinstance(obj, Dataset) else obj.to_dataset() with self.create_zarr_target() as store_target: - with pytest.raises(TypeError, match=r"Invalid attrs."): + with pytest.raises(TypeError, match=r"Invalid attribute in Dataset.attrs."): ds.to_zarr(store_target)