diff --git a/.github/stale.yml b/.github/stale.yml index f4057844d01..bb8b88505c1 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,7 +1,7 @@ # Configuration for probot-stale - https://github.com/probot/stale # Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 700 # start with a large number and reduce shortly +daysUntilStale: 600 # start with a large number and reduce shortly # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. @@ -31,6 +31,9 @@ markComment: | If this issue remains relevant, please comment here or remove the `stale` label; otherwise it will be marked as closed automatically +closeComment: | + The stalebot didn't hear anything for a while, so it closed this. Please reopen if this is still an issue. + # Comment to post when removing the stale label. # unmarkComment: > # Your comment here. @@ -40,8 +43,7 @@ markComment: | # Your comment here. # Limit the number of actions per hour, from 1-30. Default is 30 -limitPerRun: 1 # start with a small number - +limitPerRun: 2 # start with a small number # Limit to only `issues` or `pulls` # only: issues diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 6d482445f96..034ffee40ad 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -67,7 +67,7 @@ jobs: cp benchmarks/README_CI.md benchmarks.log .asv/results/ working-directory: ${{ env.ASV_DIR }} - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: asv-benchmark-results-${{ runner.os }} diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index ef1666359fe..f2542ab52d5 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -109,7 +109,7 @@ jobs: $PYTEST_EXTRA_FLAGS - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v3.0.0 with: file: ./coverage.xml flags: unittests,${{ matrix.env }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 205265b8c54..a5c1a2de5ad 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,13 +98,13 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Test results for ${{ runner.os }}-${{ matrix.python-version }} path: pytest.xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v3.0.0 with: file: ./coverage.xml flags: unittests @@ -118,7 +118,7 @@ jobs: if: github.repository == 'pydata/xarray' steps: - name: Upload - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index c88cf556a50..9cad271ce6f 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -41,7 +41,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: releases path: dist @@ -54,7 +54,7 @@ jobs: name: Install Python with: python-version: 3.8 - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: releases path: dist @@ -85,7 +85,7 @@ jobs: if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: releases path: dist diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 6091306ed8b..81d1c7db4b8 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -92,7 +92,7 @@ jobs: && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository == 'pydata/xarray' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: output-${{ matrix.python-version }}-log path: output-${{ matrix.python-version }}-log @@ -114,7 +114,7 @@ jobs: - uses: actions/setup-python@v3 with: python-version: "3.x" - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: path: /tmp/workspace/logs - name: Move all log files into a single directory diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c84a0549774..1341708e5f8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,9 @@ New Features elements which trigger summarization rather than full repr in (numpy) array detailed views of the html repr (:pull:`6400`). By `Benoît Bovy `_. +- Allow passing chunks in **kwargs form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and + :py:meth:`Variable.chunk`. (:pull:`6471`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/conventions.py b/xarray/conventions.py index ae915069947..ebe95b62721 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -770,7 +770,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): # this will copy coordinates from encoding to attrs if "coordinates" in attrs # after the next line, "coordinates" is never in encoding # we get support for attrs["coordinates"] for free. - coords_str = pop_to(encoding, attrs, "coordinates") + coords_str = pop_to(encoding, attrs, "coordinates") or attrs.get("coordinates") if not coords_str and variable_coordinates[name]: coordinates_text = " ".join( str(coord_name) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index df1e096b021..2cf78fa7c61 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1113,6 +1113,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + **chunks_kwargs: Any, ) -> DataArray: """Coerce this array's data into a dask arrays with the given chunks. @@ -1136,13 +1137,28 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. Returns ------- chunked : xarray.DataArray """ - if isinstance(chunks, (tuple, list)): + if chunks is None: + warnings.warn( + "None value for 'chunks' is deprecated. " + "It will raise an error in the future. Use instead '{}'", + category=FutureWarning, + ) + chunks = {} + + if isinstance(chunks, (float, str, int)): + chunks = dict.fromkeys(self.dims, chunks) + elif isinstance(chunks, (tuple, list)): chunks = dict(zip(self.dims, chunks)) + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") ds = self._to_temp_dataset().chunk( chunks, name_prefix=name_prefix, token=token, lock=lock diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index adf4de716d7..5bf0cc0ac69 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2000,6 +2000,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + **chunks_kwargs: Any, ) -> Dataset: """Coerce all arrays in this dataset into dask arrays with the given chunks. @@ -2013,7 +2014,7 @@ def chunk( Parameters ---------- - chunks : int, "auto" or mapping of hashable to int, optional + chunks : int, tuple of int, "auto" or mapping of hashable to int, optional Chunk sizes along each dimension, e.g., ``5``, ``"auto"``, or ``{"x": 5, "y": 5}``. name_prefix : str, optional @@ -2023,6 +2024,9 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided Returns ------- @@ -2034,7 +2038,7 @@ def chunk( Dataset.chunksizes xarray.unify_chunks """ - if chunks is None: + if chunks is None and chunks_kwargs is None: warnings.warn( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", @@ -2044,6 +2048,8 @@ def chunk( if isinstance(chunks, (Number, str, int)): chunks = dict.fromkeys(self.dims, chunks) + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") bad_dims = chunks.keys() - self.dims.keys() if bad_dims: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index a0f5bfdcf27..efdbe10d5ef 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -266,7 +266,7 @@ def either_dict_or_kwargs( kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: - if pos_kwargs is None: + if pos_kwargs is None or pos_kwargs == {}: # Need an explicit cast to appease mypy due to invariance; see # https://github.com/python/mypy/issues/6228 return cast(Mapping[Hashable, T], kw_kwargs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a21cf8c2d97..05c70390b46 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,7 @@ import numbers import warnings from datetime import timedelta -from typing import TYPE_CHECKING, Any, Hashable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Hashable, Literal, Mapping, Sequence import numpy as np import pandas as pd @@ -1012,7 +1012,19 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: _array_counter = itertools.count() - def chunk(self, chunks={}, name=None, lock=False): + def chunk( + self, + chunks: ( + int + | Literal["auto"] + | tuple[int, ...] + | tuple[tuple[int, ...], ...] + | Mapping[Any, None | int | tuple[int, ...]] + ) = {}, + name: str = None, + lock: bool = False, + **chunks_kwargs: Any, + ) -> Variable: """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask @@ -1034,6 +1046,9 @@ def chunk(self, chunks={}, name=None, lock=False): lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. Returns ------- @@ -1049,6 +1064,11 @@ def chunk(self, chunks={}, name=None, lock=False): ) chunks = {} + if isinstance(chunks, (float, str, int, tuple, list)): + pass # dask.array.from_array can handle these directly + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + if utils.is_dict_like(chunks): chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 83e560e7208..d5d8e00d45f 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -128,6 +128,25 @@ def test_multidimensional_coordinates(self) -> None: # Should not have any global coordinates. assert "coordinates" not in attrs + def test_var_with_coord_attr(self) -> None: + # regression test for GH6310 + # don't overwrite user-defined "coordinates" attributes + orig = Dataset( + {"values": ("time", np.zeros(2), {"coordinates": "time lon lat"})}, + coords={ + "time": ("time", np.zeros(2)), + "lat": ("time", np.zeros(2)), + "lon": ("time", np.zeros(2)), + }, + ) + # Encode the coordinates, as they would be in a netCDF output file. + enc, attrs = conventions.encode_dataset_coordinates(orig) + # Make sure we have the right coordinates for each variable. + values_coords = enc["values"].attrs.get("coordinates", "") + assert set(values_coords.split()) == {"time", "lat", "lon"} + # Should not have any global coordinates. + assert "coordinates" not in attrs + def test_do_not_overwrite_user_coordinates(self) -> None: orig = Dataset( coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 65efb3a732c..b8c9edd7258 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -804,6 +804,11 @@ def test_chunk(self): assert isinstance(blocked.data, da.Array) assert "testname_" in blocked.data.name + # test kwargs form of chunks + blocked = unblocked.chunk(dim_0=3, dim_1=3) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + def test_isel(self): assert_identical(self.dv[0], self.dv.isel(x=0)) assert_identical(self.dv, self.dv.isel(x=slice(None))) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5f368375fc0..25adda2df84 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -921,6 +921,9 @@ def test_chunk(self): expected_chunks = {"dim1": (8,), "dim2": (9,), "dim3": (10,)} assert reblocked.chunks == expected_chunks + # test kwargs form of chunks + assert data.chunk(**expected_chunks).chunks == expected_chunks + def get_dask_names(ds): return {k: v.data.name for k, v in ds.items()} @@ -947,7 +950,7 @@ def get_dask_names(ds): new_dask_names = get_dask_names(reblocked) assert reblocked.chunks == expected_chunks assert_identical(reblocked, data) - # recuhnking with same chunk sizes should not change names + # rechunking with same chunk sizes should not change names for k, v in new_dask_names.items(): assert v == orig_dask_names[k] diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index b8e2f6f4582..0168f19b921 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2154,6 +2154,40 @@ def test_coarsen_keep_attrs(self, operation="mean"): class TestVariableWithDask(VariableSubclassobjects): cls = staticmethod(lambda *args: Variable(*args).chunk()) + def test_chunk(self): + unblocked = Variable(["dim_0", "dim_1"], np.ones((3, 4))) + assert unblocked.chunks is None + + blocked = unblocked.chunk() + assert blocked.chunks == ((3,), (4,)) + first_dask_name = blocked.data.name + + blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) + assert blocked.chunks == ((2, 1), (2, 2)) + assert blocked.data.name != first_dask_name + + blocked = unblocked.chunk(chunks=(3, 3)) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + + # name doesn't change when rechunking by same amount + # this fails if ReprObject doesn't have __dask_tokenize__ defined + assert unblocked.chunk(2).data.name == unblocked.chunk(2).data.name + + assert blocked.load().chunks is None + + # Check that kwargs are passed + import dask.array as da + + blocked = unblocked.chunk(name="testname_") + assert isinstance(blocked.data, da.Array) + assert "testname_" in blocked.data.name + + # test kwargs form of chunks + blocked = unblocked.chunk(dim_0=3, dim_1=3) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + @pytest.mark.xfail def test_0d_object_array_with_list(self): super().test_0d_object_array_with_list()