From 9c6b457bd022ee4beba395bb06e52a766dc18078 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 23 Oct 2019 18:28:20 -0700 Subject: [PATCH 1/4] Use cftime master for upstream-dev build (#3439) --- ci/azure/install.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index 2911e227172..fee886ba804 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -25,8 +25,7 @@ steps: git+https://github.com/dask/dask \ git+https://github.com/dask/distributed \ git+https://github.com/zarr-developers/zarr \ - git+https://github.com/Unidata/cftime.git@refs/pull/127/merge - # git+https://github.com/Unidata/cftime # FIXME PR 127 not merged yet + git+https://github.com/Unidata/cftime condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies From 35c75f557a510b7e81c70fcaad8cad419a12ee2e Mon Sep 17 00:00:00 2001 From: Jon Thielen Date: Wed, 23 Oct 2019 23:25:42 -0500 Subject: [PATCH 2/4] Update Terminology page to account for multidimensional coordinates (#3410) * Update Terminology page to account for multidimensional coordinates * Update xarray-docs conda environment file * Add change to whats-new.rst * Modify example description for multidimensional coords based on suggestion --- doc/contributing.rst | 4 ++-- doc/terminology.rst | 6 +++--- doc/whats-new.rst | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/contributing.rst b/doc/contributing.rst index 66e8377600e..028ec47e014 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -286,12 +286,12 @@ How to build the *xarray* documentation Requirements ~~~~~~~~~~~~ Make sure to follow the instructions on :ref:`creating a development environment above `, but -to build the docs you need to use the environment file ``doc/environment.yml``. +to build the docs you need to use the environment file ``ci/requirements/doc.yml``. .. code-block:: none # Create and activate the docs environment - conda env create -f doc/environment.yml + conda env create -f ci/requirements/doc.yml conda activate xarray-docs # or with older versions of Anaconda: diff --git a/doc/terminology.rst b/doc/terminology.rst index 138a99740fe..4ee56190d5f 100644 --- a/doc/terminology.rst +++ b/doc/terminology.rst @@ -27,15 +27,15 @@ Terminology ---- -**Coordinate:** An array that labels a dimension of another ``DataArray``. Loosely, the coordinate array's values can be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be assigned multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general. +**Coordinate:** An array that labels a dimension or set of dimensions of another ``DataArray``. In the usual one-dimensional case, the coordinate array's values can loosely be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be labeled by multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general. ---- -**Dimension coordinate:** A coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``. +**Dimension coordinate:** A one-dimensional coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``. ---- -**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.dims`` but a dimension name *not* in ``arr.dims``. These coordinate arrays are useful for auxiliary labeling. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the assigned dimensions in parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``. +**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.coords`` but *not* in ``arr.dims``. These coordinates arrays can be one-dimensional or multidimensional, and they are useful for auxiliary labeling. As an example, multidimensional coordinates are often used in geoscience datasets when :doc:`the data's physical coordinates (such as latitude and longitude) differ from their logical coordinates `. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the corresponding dimension(s) in parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``. ---- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0f4d0c10f1f..9d3e64badb8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,6 +52,8 @@ Documentation :py:meth:`Dataset.resample` and explicitly state that a datetime-like dimension is required. (:pull:`3400`) By `Justus Magin `_. +- Update the terminology page to address multidimensional coordinates. (:pull:`3410`) + By `Jon Thielen `_. Internal Changes ~~~~~~~~~~~~~~~~ From 52e4ef1a79ecf51691887e3c4bf3994be9a231ee Mon Sep 17 00:00:00 2001 From: crusaderky Date: Thu, 24 Oct 2019 12:43:35 +0100 Subject: [PATCH 3/4] Hack around #3440 (#3442) --- ci/azure/install.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index fee886ba804..8da0ac1b5de 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -16,7 +16,7 @@ steps: --pre \ --upgrade \ matplotlib \ - pandas \ + pandas=0.26.0.dev0+628.g03c1a3db2 \ # FIXME https://github.com/pydata/xarray/issues/3440 scipy # numpy \ # FIXME https://github.com/pydata/xarray/issues/3409 pip install \ From 652dd3ca77dd19bbd1ab21fe556340c1904ec382 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Thu, 24 Oct 2019 13:53:20 +0100 Subject: [PATCH 4/4] minor lint tweaks (#3429) * pyflakes 2.1.1 and f-strings * Tweaks from mypy 0.740 (will skip to 0.750: mypy#7735) * black -t py36 * isort * fix tests --- xarray/backends/h5netcdf_.py | 2 +- xarray/backends/zarr.py | 2 +- xarray/core/accessor_dt.py | 4 ++- xarray/core/arithmetic.py | 2 +- xarray/core/coordinates.py | 2 +- xarray/core/dataset.py | 10 +++---- xarray/core/indexing.py | 51 +++++++++++++--------------------- xarray/core/missing.py | 8 +++--- xarray/core/resample.py | 2 +- xarray/core/rolling.py | 5 ++-- xarray/plot/__init__.py | 2 +- xarray/plot/facetgrid.py | 8 +++--- xarray/tests/test_dataarray.py | 6 ++-- xarray/tutorial.py | 2 +- 14 files changed, 47 insertions(+), 59 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 92c29502994..51ed512f98b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -245,7 +245,7 @@ def prepare_variable( dtype=dtype, dimensions=variable.dims, fillvalue=fillvalue, - **kwargs + **kwargs, ) else: nc4_var = self.ds[name] diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d924e1da4fc..6d4ebb02a11 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -467,7 +467,7 @@ def open_zarr( drop_variables=None, consolidated=False, overwrite_encoded_chunks=False, - **kwargs + **kwargs, ): """Load and decode a dataset from a Zarr store. diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 832eb88c5fa..aff6fbc6691 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -178,7 +178,9 @@ def __init__(self, obj): ) self._obj = obj - def _tslib_field_accessor(name, docstring=None, dtype=None): + def _tslib_field_accessor( # type: ignore + name: str, docstring: str = None, dtype: np.dtype = None + ): def f(self, dtype=dtype): if dtype is None: dtype = self._obj.dtype diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 137db034c95..571dfbe70ed 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -76,7 +76,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): dataset_join=dataset_join, dataset_fill_value=np.nan, kwargs=kwargs, - dask="allowed" + dask="allowed", ) # this has no runtime function - these are listed so IDEs know these diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 0c11e8efa38..eb2ceb1be07 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -367,7 +367,7 @@ def remap_label_indexers( indexers: Mapping[Hashable, Any] = None, method: str = None, tolerance=None, - **indexers_kwargs: Any + **indexers_kwargs: Any, ) -> Tuple[dict, dict]: # TODO more precise return type after annotations in indexing """Remap indexers from obj.coords. If indexer is an instance of DataArray and it has coordinate, then this coordinate diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3fdde8fa4e3..12d5cbdc9f3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2967,15 +2967,13 @@ def expand_dims( for a in axis: if a < -result_ndim or result_ndim - 1 < a: raise IndexError( - "Axis {a} is out of bounds of the expanded" - " dimension size {dim}.".format( - a=a, v=k, dim=result_ndim - ) + f"Axis {a} of variable {k} is out of bounds of the " + f"expanded dimension size {result_ndim}" ) axis_pos = [a if a >= 0 else result_ndim + a for a in axis] if len(axis_pos) != len(set(axis_pos)): - raise ValueError("axis should not contain duplicate" " values.") + raise ValueError("axis should not contain duplicate values") # We need to sort them to make sure `axis` equals to the # axis positions of the result array. zip_axis_dim = sorted(zip(axis_pos, dim.items())) @@ -3131,7 +3129,7 @@ def reorder_levels( coord = self._variables[dim] index = self.indexes[dim] if not isinstance(index, pd.MultiIndex): - raise ValueError("coordinate %r has no MultiIndex" % dim) + raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) variables[dim] = IndexVariable(coord.dims, new_index) indexes[dim] = new_index diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 3d2e634eaa8..b9809a8d2b9 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -59,8 +59,7 @@ def _sanitize_slice_element(x): if isinstance(x, np.ndarray): if x.ndim != 0: raise ValueError( - "cannot use non-scalar arrays in a slice for " - "xarray indexing: {}".format(x) + f"cannot use non-scalar arrays in a slice for xarray indexing: {x}" ) x = x[()] @@ -128,9 +127,9 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No # unlike pandas, in xarray we never want to silently convert a # slice indexer into an array indexer raise KeyError( - "cannot represent labeled-based slice indexer for " - "dimension %r with a slice over integer positions; " - "the index is unsorted or non-unique" % index_name + "cannot represent labeled-based slice indexer for dimension " + f"{index_name!r} with a slice over integer positions; the index is " + "unsorted or non-unique" ) elif is_dict_like(label): @@ -190,7 +189,7 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No ) indexer = get_indexer_nd(index, label, method, tolerance) if np.any(indexer < 0): - raise KeyError("not all values found in index %r" % index_name) + raise KeyError(f"not all values found in index {index_name!r}") return indexer, new_index @@ -208,7 +207,7 @@ def get_dim_indexers(data_obj, indexers): if k not in data_obj.dims and k not in data_obj._level_coords ] if invalid: - raise ValueError("dimensions or multi-index levels %r do not exist" % invalid) + raise ValueError(f"dimensions or multi-index levels {invalid!r} do not exist") level_indexers = defaultdict(dict) dim_indexers = {} @@ -223,8 +222,8 @@ def get_dim_indexers(data_obj, indexers): for dim, level_labels in level_indexers.items(): if dim_indexers.get(dim, False): raise ValueError( - "cannot combine multi-index level indexers " - "with an indexer for dimension %s" % dim + "cannot combine multi-index level indexers with an indexer for " + f"dimension {dim}" ) dim_indexers[dim] = level_labels @@ -326,7 +325,7 @@ def tuple(self): return self._key def __repr__(self): - return "{}({})".format(type(self).__name__, self.tuple) + return f"{type(self).__name__}({self.tuple})" def as_integer_or_none(value): @@ -362,9 +361,7 @@ def __init__(self, key): k = as_integer_slice(k) else: raise TypeError( - "unexpected indexer type for {}: {!r}".format( - type(self).__name__, k - ) + f"unexpected indexer type for {type(self).__name__}: {k!r}" ) new_key.append(k) @@ -395,20 +392,17 @@ def __init__(self, key): elif isinstance(k, np.ndarray): if not np.issubdtype(k.dtype, np.integer): raise TypeError( - "invalid indexer array, does not have " - "integer dtype: {!r}".format(k) + f"invalid indexer array, does not have integer dtype: {k!r}" ) if k.ndim != 1: raise TypeError( - "invalid indexer array for {}, must have " - "exactly 1 dimension: ".format(type(self).__name__, k) + f"invalid indexer array for {type(self).__name__}; must have " + f"exactly 1 dimension: {k!r}" ) k = np.asarray(k, dtype=np.int64) else: raise TypeError( - "unexpected indexer type for {}: {!r}".format( - type(self).__name__, k - ) + f"unexpected indexer type for {type(self).__name__}: {k!r}" ) new_key.append(k) @@ -439,8 +433,7 @@ def __init__(self, key): elif isinstance(k, np.ndarray): if not np.issubdtype(k.dtype, np.integer): raise TypeError( - "invalid indexer array, does not have " - "integer dtype: {!r}".format(k) + f"invalid indexer array, does not have integer dtype: {k!r}" ) if ndim is None: ndim = k.ndim @@ -448,14 +441,12 @@ def __init__(self, key): ndims = [k.ndim for k in key if isinstance(k, np.ndarray)] raise ValueError( "invalid indexer key: ndarray arguments " - "have different numbers of dimensions: {}".format(ndims) + f"have different numbers of dimensions: {ndims}" ) k = np.asarray(k, dtype=np.int64) else: raise TypeError( - "unexpected indexer type for {}: {!r}".format( - type(self).__name__, k - ) + f"unexpected indexer type for {type(self).__name__}: {k!r}" ) new_key.append(k) @@ -574,9 +565,7 @@ def __setitem__(self, key, value): self.array[full_key] = value def __repr__(self): - return "{}(array={!r}, key={!r})".format( - type(self).__name__, self.array, self.key - ) + return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" class LazilyVectorizedIndexedArray(ExplicitlyIndexedNDArrayMixin): @@ -627,9 +616,7 @@ def __setitem__(self, key, value): ) def __repr__(self): - return "{}(array={!r}, key={!r})".format( - type(self).__name__, self.array, self.key - ) + return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" def _wrap_numpy_scalars(array): diff --git a/xarray/core/missing.py b/xarray/core/missing.py index dfe209e3f7e..77dde66484e 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -71,7 +71,7 @@ def __call__(self, x): self._yi, left=self._left, right=self._right, - **self.call_kwargs + **self.call_kwargs, ) @@ -93,7 +93,7 @@ def __init__( copy=False, bounds_error=False, order=None, - **kwargs + **kwargs, ): from scipy.interpolate import interp1d @@ -126,7 +126,7 @@ def __init__( bounds_error=False, assume_sorted=assume_sorted, copy=copy, - **self.cons_kwargs + **self.cons_kwargs, ) @@ -147,7 +147,7 @@ def __init__( order=3, nu=0, ext=None, - **kwargs + **kwargs, ): from scipy.interpolate import UnivariateSpline diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 1f2e5c0be43..998964273be 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -151,7 +151,7 @@ def _interpolate(self, kind="linear"): assume_sorted=True, method=kind, kwargs={"bounds_error": False}, - **{self._dim: self._full_index} + **{self._dim: self._full_index}, ) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index d22c6aa7d91..f4e571a8efe 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,4 +1,5 @@ import functools +from typing import Callable import numpy as np @@ -106,7 +107,7 @@ def __repr__(self): def __len__(self): return self.obj.sizes[self.dim] - def _reduce_method(name): + def _reduce_method(name: str) -> Callable: # type: ignore array_agg_func = getattr(duck_array_ops, name) bottleneck_move_func = getattr(bottleneck, "move_" + name, None) @@ -453,7 +454,7 @@ def _numpy_or_bottleneck_reduce( array_agg_func=array_agg_func, bottleneck_move_func=bottleneck_move_func, ), - **kwargs + **kwargs, ) def construct(self, window_dim, stride=1, fill_value=dtypes.NA): diff --git a/xarray/plot/__init__.py b/xarray/plot/__init__.py index 903321228f7..86a09506824 100644 --- a/xarray/plot/__init__.py +++ b/xarray/plot/__init__.py @@ -1,6 +1,6 @@ +from .dataset_plot import scatter from .facetgrid import FacetGrid from .plot import contour, contourf, hist, imshow, line, pcolormesh, plot, step -from .dataset_plot import scatter __all__ = [ "plot", diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index ec51ff26c07..7f13ba601fe 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -294,7 +294,7 @@ def map_dataarray_line( hue=hue, add_legend=False, _labels=False, - **kwargs + **kwargs, ) self._mappables.append(mappable) @@ -376,7 +376,7 @@ def add_legend(self, **kwargs): labels=list(self._hue_var.values), title=self._hue_label, loc="center right", - **kwargs + **kwargs, ) self.figlegend = figlegend @@ -491,7 +491,7 @@ def set_titles(self, template="{coord} = {value}", maxchar=30, size=None, **kwar rotation=270, ha="left", va="center", - **kwargs + **kwargs, ) # The column titles on the top row @@ -590,7 +590,7 @@ def _easy_facetgrid( subplot_kws=None, ax=None, figsize=None, - **kwargs + **kwargs, ): """ Convenience method to call xarray.plot.FacetGrid from 2d plotting methods diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d05a02ae705..a3a2f55f6cc 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1534,11 +1534,11 @@ def test_expand_dims_error(self): # Should not pass the already existing dimension. array.expand_dims(dim=["x"]) # raise if duplicate - with raises_regex(ValueError, "duplicate values."): + with raises_regex(ValueError, "duplicate values"): array.expand_dims(dim=["y", "y"]) - with raises_regex(ValueError, "duplicate values."): + with raises_regex(ValueError, "duplicate values"): array.expand_dims(dim=["y", "z"], axis=[1, 1]) - with raises_regex(ValueError, "duplicate values."): + with raises_regex(ValueError, "duplicate values"): array.expand_dims(dim=["y", "z"], axis=[2, -2]) # out of bounds error, axis must be in [-4, 3] diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 88ca8d3ab4f..e99c0632fe8 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -32,7 +32,7 @@ def open_dataset( cache_dir=_default_cache_dir, github_url="https://github.com/pydata/xarray-data", branch="master", - **kws + **kws, ): """ Open a dataset from the online repository (requires internet).