diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 29ce46c8c68..e950baed5e0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -102,12 +102,11 @@ def _get_default_engine_netcdf(): def _get_default_engine(path: str, allow_remote: bool = False): if allow_remote and is_remote_uri(path): - engine = _get_default_engine_remote_uri() + return _get_default_engine_remote_uri() elif path.endswith(".gz"): - engine = _get_default_engine_gz() + return _get_default_engine_gz() else: - engine = _get_default_engine_netcdf() - return engine + return _get_default_engine_netcdf() def _validate_dataset_names(dataset): @@ -282,7 +281,7 @@ def _chunk_ds( mtime = _get_mtime(filename_or_obj) token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens) - name_prefix = "open_dataset-%s" % token + name_prefix = f"open_dataset-{token}" variables = {} for name, var in backend_ds.variables.items(): @@ -295,8 +294,7 @@ def _chunk_ds( name_prefix=name_prefix, token=token, ) - ds = backend_ds._replace(variables) - return ds + return backend_ds._replace(variables) def _dataset_from_backend_dataset( @@ -308,12 +306,10 @@ def _dataset_from_backend_dataset( overwrite_encoded_chunks, **extra_tokens, ): - if not (isinstance(chunks, (int, dict)) or chunks is None): - if chunks != "auto": - raise ValueError( - "chunks must be an int, dict, 'auto', or None. " - "Instead found %s. " % chunks - ) + if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}: + raise ValueError( + f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." + ) _protect_dataset_variables_inplace(backend_ds, cache) if chunks is None: @@ -331,9 +327,8 @@ def _dataset_from_backend_dataset( ds.set_close(backend_ds._close) # Ensure source filename always stored in dataset object (GH issue #2550) - if "source" not in ds.encoding: - if isinstance(filename_or_obj, str): - ds.encoding["source"] = filename_or_obj + if "source" not in ds.encoding and isinstance(filename_or_obj, str): + ds.encoding["source"] = filename_or_obj return ds @@ -515,7 +510,6 @@ def open_dataset( **decoders, **kwargs, ) - return ds @@ -1015,8 +1009,8 @@ def to_netcdf( elif engine != "scipy": raise ValueError( "invalid engine for creating bytes with " - "to_netcdf: %r. Only the default engine " - "or engine='scipy' is supported" % engine + f"to_netcdf: {engine!r}. Only the default engine " + "or engine='scipy' is supported" ) if not compute: raise NotImplementedError( @@ -1037,7 +1031,7 @@ def to_netcdf( try: store_open = WRITEABLE_STORES[engine] except KeyError: - raise ValueError("unrecognized engine for to_netcdf: %r" % engine) + raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}") if format is not None: format = format.upper() @@ -1049,9 +1043,8 @@ def to_netcdf( autoclose = have_chunks and scheduler in ["distributed", "multiprocessing"] if autoclose and engine == "scipy": raise NotImplementedError( - "Writing netCDF files with the %s backend " - "is not currently supported with dask's %s " - "scheduler" % (engine, scheduler) + f"Writing netCDF files with the {engine} backend " + f"is not currently supported with dask's {scheduler} scheduler" ) target = path_or_file if path_or_file is not None else BytesIO() @@ -1061,7 +1054,7 @@ def to_netcdf( kwargs["invalid_netcdf"] = invalid_netcdf else: raise ValueError( - "unrecognized option 'invalid_netcdf' for engine %s" % engine + f"unrecognized option 'invalid_netcdf' for engine {engine}" ) store = store_open(target, mode, format, group, **kwargs) @@ -1203,7 +1196,7 @@ def save_mfdataset( Data variables: a (time) float64 0.0 0.02128 0.04255 0.06383 ... 0.9574 0.9787 1.0 >>> years, datasets = zip(*ds.groupby("time.year")) - >>> paths = ["%s.nc" % y for y in years] + >>> paths = [f"{y}.nc" for y in years] >>> xr.save_mfdataset(datasets, paths) """ if mode == "w" and len(set(paths)) < len(paths): @@ -1215,7 +1208,7 @@ def save_mfdataset( if not isinstance(obj, Dataset): raise TypeError( "save_mfdataset only supports writing Dataset " - "objects, received type %s" % type(obj) + f"objects, received type {type(obj)}" ) if groups is None: diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py index 24a075aa811..9e5546f052a 100644 --- a/xarray/backends/cfgrib_.py +++ b/xarray/backends/cfgrib_.py @@ -90,8 +90,7 @@ def get_dimensions(self): def get_encoding(self): dims = self.get_dimensions() - encoding = {"unlimited_dims": {k for k, v in dims.items() if v is None}} - return encoding + return {"unlimited_dims": {k for k, v in dims.items() if v is None}} class CfgribfBackendEntrypoint(BackendEntrypoint): diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 026c7e5c7db..64a245ddead 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -69,9 +69,8 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 base_delay = initial_delay * 2 ** n next_delay = base_delay + np.random.randint(base_delay) msg = ( - "getitem failed, waiting %s ms before trying again " - "(%s tries remaining). Full traceback: %s" - % (next_delay, max_retries - n, traceback.format_exc()) + f"getitem failed, waiting {next_delay} ms before trying again " + f"({max_retries - n} tries remaining). Full traceback: {traceback.format_exc()}" ) logger.debug(msg) time.sleep(1e-3 * next_delay) @@ -336,7 +335,7 @@ def set_dimensions(self, variables, unlimited_dims=None): if dim in existing_dims and length != existing_dims[dim]: raise ValueError( "Unable to update size for existing dimension" - "%r (%d != %d)" % (dim, length, existing_dims[dim]) + f"{dim!r} ({length} != {existing_dims[dim]})" ) elif dim not in existing_dims: is_unlimited = dim in unlimited_dims diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 84e89f80dae..9f744d0c1ef 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -37,8 +37,7 @@ class H5NetCDFArrayWrapper(BaseNetCDF4Array): def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) - variable = ds.variables[self.variable_name] - return variable + return ds.variables[self.variable_name] def __getitem__(self, key): return indexing.explicit_indexing_adapter( @@ -102,7 +101,7 @@ def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=Fal if group is None: root, group = find_root_and_group(manager) else: - if not type(manager) is h5netcdf.File: + if type(manager) is not h5netcdf.File: raise ValueError( "must supply a h5netcdf.File if the group " "argument is provided" @@ -233,11 +232,9 @@ def get_dimensions(self): return self.ds.dimensions def get_encoding(self): - encoding = {} - encoding["unlimited_dims"] = { - k for k, v in self.ds.dimensions.items() if v is None + return { + "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None} } - return encoding def set_dimension(self, name, length, is_unlimited=False): if is_unlimited: @@ -266,9 +263,9 @@ def prepare_variable( "h5netcdf does not yet support setting a fill value for " "variable-length strings " "(https://github.com/shoyer/h5netcdf/issues/37). " - "Either remove '_FillValue' from encoding on variable %r " + f"Either remove '_FillValue' from encoding on variable {name!r} " "or set {'dtype': 'S1'} in encoding to use the fixed width " - "NC_CHAR type." % name + "NC_CHAR type." ) if dtype is str: diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 5303ea49381..59417336f5f 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -167,7 +167,7 @@ def locked(self): return any(lock.locked for lock in self.locks) def __repr__(self): - return "CombinedLock(%r)" % list(self.locks) + return f"CombinedLock({list(self.locks)!r})" class DummyLock: diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index a60c940c3c4..694b0d2fdd2 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -122,25 +122,23 @@ def _encode_nc4_variable(var): def _check_encoding_dtype_is_vlen_string(dtype): if dtype is not str: raise AssertionError( # pragma: no cover - "unexpected dtype encoding %r. This shouldn't happen: please " - "file a bug report at github.com/pydata/xarray" % dtype + f"unexpected dtype encoding {dtype!r}. This shouldn't happen: please " + "file a bug report at github.com/pydata/xarray" ) def _get_datatype(var, nc_format="NETCDF4", raise_on_invalid_encoding=False): if nc_format == "NETCDF4": - datatype = _nc4_dtype(var) - else: - if "dtype" in var.encoding: - encoded_dtype = var.encoding["dtype"] - _check_encoding_dtype_is_vlen_string(encoded_dtype) - if raise_on_invalid_encoding: - raise ValueError( - "encoding dtype=str for vlen strings is only supported " - "with format='NETCDF4'." - ) - datatype = var.dtype - return datatype + return _nc4_dtype(var) + if "dtype" in var.encoding: + encoded_dtype = var.encoding["dtype"] + _check_encoding_dtype_is_vlen_string(encoded_dtype) + if raise_on_invalid_encoding: + raise ValueError( + "encoding dtype=str for vlen strings is only supported " + "with format='NETCDF4'." + ) + return var.dtype def _nc4_dtype(var): @@ -178,7 +176,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): ds = create_group(ds, key) else: # wrap error to provide slightly more helpful message - raise OSError("group not found: %s" % key, e) + raise OSError(f"group not found: {key}", e) return ds @@ -203,7 +201,7 @@ def _force_native_endianness(var): # if endian exists, remove it from the encoding. var.encoding.pop("endian", None) # check to see if encoding has a value for endian its 'native' - if not var.encoding.get("endian", "native") == "native": + if var.encoding.get("endian", "native") != "native": raise NotImplementedError( "Attempt to write non-native endian type, " "this is not supported by the netCDF4 " @@ -270,8 +268,8 @@ def _extract_nc4_variable_encoding( invalid = [k for k in encoding if k not in valid_encodings] if invalid: raise ValueError( - "unexpected encoding parameters for %r backend: %r. Valid " - "encodings are: %r" % (backend, invalid, valid_encodings) + f"unexpected encoding parameters for {backend!r} backend: {invalid!r}. Valid " + f"encodings are: {valid_encodings!r}" ) else: for k in list(encoding): @@ -282,10 +280,8 @@ def _extract_nc4_variable_encoding( def _is_list_of_strings(value): - if np.asarray(value).dtype.kind in ["U", "S"] and np.asarray(value).size > 1: - return True - else: - return False + arr = np.asarray(value) + return arr.dtype.kind in ["U", "S"] and arr.size > 1 class NetCDF4DataStore(WritableCFDataStore): @@ -313,7 +309,7 @@ def __init__( if group is None: root, group = find_root_and_group(manager) else: - if not type(manager) is netCDF4.Dataset: + if type(manager) is not netCDF4.Dataset: raise ValueError( "must supply a root netCDF4.Dataset if the group " "argument is provided" @@ -417,25 +413,22 @@ def open_store_variable(self, name, var): return Variable(dimensions, data, attributes, encoding) def get_variables(self): - dsvars = FrozenDict( + return FrozenDict( (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() ) - return dsvars def get_attrs(self): - attrs = FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs()) - return attrs + return FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs()) def get_dimensions(self): - dims = FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) - return dims + return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) def get_encoding(self): - encoding = {} - encoding["unlimited_dims"] = { - k for k, v in self.ds.dimensions.items() if v.isunlimited() + return { + "unlimited_dims": { + k for k, v in self.ds.dimensions.items() if v.isunlimited() + } } - return encoding def set_dimension(self, name, length, is_unlimited=False): dim_length = length if not is_unlimited else None @@ -473,9 +466,9 @@ def prepare_variable( "netCDF4 does not yet support setting a fill value for " "variable-length strings " "(https://github.com/Unidata/netcdf4-python/issues/730). " - "Either remove '_FillValue' from encoding on variable %r " + f"Either remove '_FillValue' from encoding on variable {name!r} " "or set {'dtype': 'S1'} in encoding to use the fixed width " - "NC_CHAR type." % name + "NC_CHAR type." ) encoding = _extract_nc4_variable_encoding( diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 001af0bf8e1..5fdd0534d57 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -125,8 +125,6 @@ def is_valid_nc3_name(s): """ if not isinstance(s, str): return False - if not isinstance(s, str): - s = s.decode("utf-8") num_bytes = len(s.encode("utf-8")) return ( (unicodedata.normalize("NFC", s) == s) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 23e83b0021e..d892e8761a7 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -87,10 +87,7 @@ def build_engines(pkg_entrypoints): backend_entrypoints.update(external_backend_entrypoints) backend_entrypoints = sort_backends(backend_entrypoints) set_missing_parameters(backend_entrypoints) - engines = {} - for name, backend in backend_entrypoints.items(): - engines[name] = backend() - return engines + return {name: backend() for name, backend in backend_entrypoints.items()} @functools.lru_cache(maxsize=1) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 2372468d934..25d2df9d76a 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -47,7 +47,7 @@ def _getitem(self, key): result = robust_getitem(array, key, catch=ValueError) # in some cases, pydap doesn't squeeze axes automatically like numpy axis = tuple(n for n, k in enumerate(key) if isinstance(k, integer_types)) - if result.ndim + len(axis) != array.ndim and len(axis) > 0: + if result.ndim + len(axis) != array.ndim and axis: result = np.squeeze(result, axis) return result diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index f5d9b7bf900..49a5a9ec7ae 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -389,7 +389,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc # the filename is probably an s3 bucket rather than a regular file mtime = None token = tokenize(filename, mtime, chunks) - name_prefix = "open_rasterio-%s" % token + name_prefix = f"open_rasterio-{token}" result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index c27716ea44d..9c33b172639 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -128,7 +128,7 @@ def __init__( elif format == "NETCDF3_CLASSIC": version = 1 else: - raise ValueError("invalid format for scipy.io.netcdf backend: %r" % format) + raise ValueError(f"invalid format for scipy.io.netcdf backend: {format!r}") if lock is None and mode != "r" and isinstance(filename_or_obj, str): lock = get_write_lock(filename_or_obj) @@ -174,16 +174,14 @@ def get_dimensions(self): return Frozen(self.ds.dimensions) def get_encoding(self): - encoding = {} - encoding["unlimited_dims"] = { - k for k, v in self.ds.dimensions.items() if v is None + return { + "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None} } - return encoding def set_dimension(self, name, length, is_unlimited=False): if name in self.ds.dimensions: raise ValueError( - "%s does not support modifying dimensions" % type(self).__name__ + f"{type(self).__name__} does not support modifying dimensions" ) dim_length = length if not is_unlimited else None self.ds.createDimension(name, dim_length) @@ -204,12 +202,14 @@ def encode_variable(self, variable): def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None ): - if check_encoding and variable.encoding: - if variable.encoding != {"_FillValue": None}: - raise ValueError( - "unexpected encoding for scipy backend: %r" - % list(variable.encoding) - ) + if ( + check_encoding + and variable.encoding + and variable.encoding != {"_FillValue": None} + ): + raise ValueError( + f"unexpected encoding for scipy backend: {list(variable.encoding)}" + ) data = variable.data # nb. this still creates a numpy array in all memory, even though we diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index fef7d739d25..72c4e99265d 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -205,8 +205,8 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): dimensions = zarr_obj.attrs[dimension_key] except KeyError: raise KeyError( - "Zarr object is missing the attribute `%s`, which is " - "required for xarray to determine variable dimensions." % (dimension_key) + f"Zarr object is missing the attribute `{dimension_key}`, which is " + "required for xarray to determine variable dimensions." ) attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key]) return dimensions, attributes @@ -236,7 +236,7 @@ def extract_zarr_variable_encoding( invalid = [k for k in encoding if k not in valid_encodings] if invalid: raise ValueError( - "unexpected encoding parameters for zarr backend: %r" % invalid + f"unexpected encoding parameters for zarr backend: {invalid!r}" ) else: for k in list(encoding): @@ -380,8 +380,7 @@ def get_variables(self): ) def get_attrs(self): - attributes = dict(self.ds.attrs.asdict()) - return attributes + return dict(self.ds.attrs.asdict()) def get_dimensions(self): dimensions = {} @@ -390,16 +389,16 @@ def get_dimensions(self): for d, s in zip(v.attrs[DIMENSION_KEY], v.shape): if d in dimensions and dimensions[d] != s: raise ValueError( - "found conflicting lengths for dimension %s " - "(%d != %d)" % (d, s, dimensions[d]) + f"found conflicting lengths for dimension {d} " + f"({s} != {dimensions[d]})" ) dimensions[d] = s except KeyError: raise KeyError( - "Zarr object is missing the attribute `%s`, " + f"Zarr object is missing the attribute `{DIMENSION_KEY}`, " "which is required for xarray to determine " - "variable dimensions." % (DIMENSION_KEY) + "variable dimensions." ) return dimensions @@ -459,7 +458,7 @@ def store( variables_without_encoding, attributes ) - if len(existing_variables) > 0: + if existing_variables: # there are variables to append # their encoding must be the same as in the store ds = open_zarr(self.ds.store, group=self.ds.path, chunks=None) @@ -700,7 +699,6 @@ def open_zarr( decode_timedelta=decode_timedelta, use_cftime=use_cftime, ) - return ds diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index c25d5296c41..c031bffb2cd 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -178,8 +178,7 @@ def _get_day_of_month(other, day_option): if day_option == "start": return 1 elif day_option == "end": - days_in_month = _days_in_month(other) - return days_in_month + return _days_in_month(other) elif day_option is None: # Note: unlike `_shift_month`, _get_day_of_month does not # allow day_option = None @@ -291,10 +290,7 @@ def roll_qtrday(other, n, month, day_option, modby=3): def _validate_month(month, default_month): - if month is None: - result_month = default_month - else: - result_month = month + result_month = default_month if month is None else month if not isinstance(result_month, int): raise TypeError( "'self.month' must be an integer value between 1 " @@ -687,11 +683,7 @@ def to_offset(freq): freq = freq_data["freq"] multiples = freq_data["multiple"] - if multiples is None: - multiples = 1 - else: - multiples = int(multiples) - + multiples = 1 if multiples is None else int(multiples) return _FREQUENCIES[freq](n=multiples) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 15f75955e00..a43724a6f31 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -255,7 +255,7 @@ def format_times( indent = first_row_offset if row == 0 else offset row_end = last_row_end if row == n_rows - 1 else intermediate_row_end times_for_row = index[row * n_per_row : (row + 1) * n_per_row] - representation = representation + format_row( + representation += format_row( times_for_row, indent=indent, separator=separator, row_end=row_end ) @@ -268,8 +268,9 @@ def format_attrs(index, separator=", "): "dtype": f"'{index.dtype}'", "length": f"{len(index)}", "calendar": f"'{index.calendar}'", + "freq": f"'{index.freq}'" if len(index) >= 3 else None, } - attrs["freq"] = f"'{index.freq}'" if len(index) >= 3 else None + attrs_str = [f"{k}={v}" for k, v in attrs.items()] attrs_str = f"{separator}".join(attrs_str) return attrs_str @@ -350,14 +351,13 @@ def __repr__(self): attrs_str = format_attrs(self) # oneliner only if smaller than display_width full_repr_str = f"{klass_name}([{datastr}], {attrs_str})" - if len(full_repr_str) <= display_width: - return full_repr_str - else: + if len(full_repr_str) > display_width: # if attrs_str too long, one per line if len(attrs_str) >= display_width - offset: attrs_str = attrs_str.replace(",", f",\n{' '*(offset-2)}") full_repr_str = f"{klass_name}([{datastr}],\n{' '*(offset-1)}{attrs_str})" - return full_repr_str + + return full_repr_str def _partial_date_slice(self, resolution, parsed): """Adapted from @@ -470,15 +470,15 @@ def get_loc(self, key, method=None, tolerance=None): def _maybe_cast_slice_bound(self, label, side, kind): """Adapted from pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound""" - if isinstance(label, str): - parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) - start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) - if self.is_monotonic_decreasing and len(self) > 1: - return end if side == "left" else start - return start if side == "left" else end - else: + if not isinstance(label, str): return label + parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) + start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) + if self.is_monotonic_decreasing and len(self) > 1: + return end if side == "left" else start + return start if side == "left" else end + # TODO: Add ability to use integer range outside of iloc? # e.g. series[1:5]. def get_value(self, series, key): diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py index c83c766f071..e9efef8eb7a 100644 --- a/xarray/coding/frequencies.py +++ b/xarray/coding/frequencies.py @@ -187,7 +187,7 @@ def _get_quartely_rule(self): if len(self.month_deltas) > 1: return None - if not self.month_deltas[0] % 3 == 0: + if self.month_deltas[0] % 3 != 0: return None return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index)) @@ -259,8 +259,7 @@ def month_anchor_check(dates): if calendar_end: cal = date.day == date.daysinmonth - if calendar_end: - calendar_end &= cal + calendar_end &= cal elif not calendar_start: break diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index e16e983fd8a..c217cb0c865 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -111,7 +111,7 @@ def encode(self, variable, name=None): if "char_dim_name" in encoding.keys(): char_dim_name = encoding.pop("char_dim_name") else: - char_dim_name = "string%s" % data.shape[-1] + char_dim_name = f"string{data.shape[-1]}" dims = dims + (char_dim_name,) return Variable(dims, data, attrs, encoding) @@ -140,8 +140,7 @@ def bytes_to_char(arr): chunks=arr.chunks + ((arr.dtype.itemsize,)), new_axis=[arr.ndim], ) - else: - return _numpy_bytes_to_char(arr) + return _numpy_bytes_to_char(arr) def _numpy_bytes_to_char(arr): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 54400414ebc..9f5d1f87aee 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -75,7 +75,7 @@ def _is_standard_calendar(calendar): def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith("s"): - units = "%ss" % units + units = f"{units}s" return { "nanoseconds": "ns", "microseconds": "us", @@ -147,7 +147,7 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime): result = decode_cf_datetime(example_value, units, calendar, use_cftime) except Exception: calendar_msg = ( - "the default calendar" if calendar is None else "calendar %r" % calendar + "the default calendar" if calendar is None else f"calendar {calendar!r}" ) msg = ( f"unable to decode time units {units!r} with {calendar_msg!r}. Try " @@ -370,8 +370,7 @@ def infer_timedelta_units(deltas): """ deltas = to_timedelta_unboxed(np.asarray(deltas).ravel()) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) - units = _infer_time_units_from_diff(unique_timedeltas) - return units + return _infer_time_units_from_diff(unique_timedeltas) def cftime_to_nptime(times): diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 938752c4efc..1ebaab1be02 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -77,7 +77,6 @@ def __repr__(self): def lazy_elemwise_func(array, func, dtype): """Lazily apply an element-wise function to an array. - Parameters ---------- array : any valid value of Variable._data @@ -255,10 +254,10 @@ def encode(self, variable, name=None): if "scale_factor" in encoding or "add_offset" in encoding: dtype = _choose_float_dtype(data.dtype, "add_offset" in encoding) data = data.astype(dtype=dtype, copy=True) - if "add_offset" in encoding: - data -= pop_to(encoding, attrs, "add_offset", name=name) - if "scale_factor" in encoding: - data /= pop_to(encoding, attrs, "scale_factor", name=name) + if "add_offset" in encoding: + data -= pop_to(encoding, attrs, "add_offset", name=name) + if "scale_factor" in encoding: + data /= pop_to(encoding, attrs, "scale_factor", name=name) return Variable(dims, data, attrs, encoding) @@ -294,7 +293,7 @@ def encode(self, variable, name=None): # integer data should be treated as unsigned" if encoding.get("_Unsigned", "false") == "true": pop_to(encoding, attrs, "_Unsigned") - signed_dtype = np.dtype("i%s" % data.dtype.itemsize) + signed_dtype = np.dtype(f"i{data.dtype.itemsize}") if "_FillValue" in attrs: new_fill = signed_dtype.type(attrs["_FillValue"]) attrs["_FillValue"] = new_fill @@ -310,7 +309,7 @@ def decode(self, variable, name=None): if data.dtype.kind == "i": if unsigned == "true": - unsigned_dtype = np.dtype("u%s" % data.dtype.itemsize) + unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) data = lazy_elemwise_func(data, transform, unsigned_dtype) if "_FillValue" in attrs: @@ -318,7 +317,7 @@ def decode(self, variable, name=None): attrs["_FillValue"] = new_fill elif data.dtype.kind == "u": if unsigned == "false": - signed_dtype = np.dtype("i%s" % data.dtype.itemsize) + signed_dtype = np.dtype(f"i{data.dtype.itemsize}") transform = partial(np.asarray, dtype=signed_dtype) data = lazy_elemwise_func(data, transform, signed_dtype) if "_FillValue" in attrs: @@ -326,8 +325,8 @@ def decode(self, variable, name=None): attrs["_FillValue"] = new_fill else: warnings.warn( - "variable %r has _Unsigned attribute but is not " - "of integer type. Ignoring attribute." % name, + f"variable {name!r} has _Unsigned attribute but is not " + "of integer type. Ignoring attribute.", SerializationWarning, stacklevel=3, ) diff --git a/xarray/conventions.py b/xarray/conventions.py index aece572fda3..901d19bd99b 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -110,9 +110,9 @@ def maybe_encode_nonstring_dtype(var, name=None): and "missing_value" not in var.attrs ): warnings.warn( - "saving variable %s with floating " + f"saving variable {name} with floating " "point data as an integer dtype without " - "any _FillValue to use for NaNs" % name, + "any _FillValue to use for NaNs", SerializationWarning, stacklevel=10, ) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index f0e416b52e6..d50163c435b 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -81,7 +81,7 @@ def _contains_obj_type(*, pat: Any, checker: Any) -> bool: return True # If it is not an object array it can't contain compiled re - if not getattr(pat, "dtype", "no") == np.object_: + if getattr(pat, "dtype", "no") != np.object_: return False return _apply_str_ufunc(func=checker, obj=pat).all() @@ -95,7 +95,7 @@ def _contains_str_like(pat: Any) -> bool: if not hasattr(pat, "dtype"): return False - return pat.dtype.kind == "U" or pat.dtype.kind == "S" + return pat.dtype.kind in ["U", "S"] def _contains_compiled_re(pat: Any) -> bool: diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index f6e026c0109..a4794dd28f5 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -48,7 +48,7 @@ def _get_joiner(join, index_cls): # We rewrite all indexes and then use join='left' return operator.itemgetter(0) else: - raise ValueError("invalid value for join: %s" % join) + raise ValueError(f"invalid value for join: {join}") def _override_indexes(objects, all_indexes, exclude): @@ -57,16 +57,16 @@ def _override_indexes(objects, all_indexes, exclude): lengths = {index.size for index in dim_indexes} if len(lengths) != 1: raise ValueError( - "Indexes along dimension %r don't have the same length." - " Cannot use join='override'." % dim + f"Indexes along dimension {dim!r} don't have the same length." + " Cannot use join='override'." ) objects = list(objects) for idx, obj in enumerate(objects[1:]): - new_indexes = {} - for dim in obj.xindexes: - if dim not in exclude: - new_indexes[dim] = all_indexes[dim][0] + new_indexes = { + dim: all_indexes[dim][0] for dim in obj.xindexes if dim not in exclude + } + objects[idx + 1] = obj._overwrite_indexes(new_indexes) return objects @@ -338,21 +338,17 @@ def align( labeled_size = index.size if len(unlabeled_sizes | {labeled_size}) > 1: raise ValueError( - "arguments without labels along dimension %r cannot be " - "aligned because they have different dimension size(s) %r " - "than the size of the aligned dimension labels: %r" - % (dim, unlabeled_sizes, labeled_size) + f"arguments without labels along dimension {dim!r} cannot be " + f"aligned because they have different dimension size(s) {unlabeled_sizes!r} " + f"than the size of the aligned dimension labels: {labeled_size!r}" ) - for dim in unlabeled_dim_sizes: - if dim not in all_indexes: - sizes = unlabeled_dim_sizes[dim] - if len(sizes) > 1: - raise ValueError( - "arguments without labels along dimension %r cannot be " - "aligned because they have different dimension sizes: %r" - % (dim, sizes) - ) + for dim, sizes in unlabeled_dim_sizes.items(): + if dim not in all_indexes and len(sizes) > 1: + raise ValueError( + f"arguments without labels along dimension {dim!r} cannot be " + f"aligned because they have different dimension sizes: {sizes!r}" + ) result = [] for obj in objects: @@ -486,8 +482,7 @@ def reindex_like_indexers( if other_size != target_size: raise ValueError( "different size for unlabeled " - "dimension on argument %r: %r vs %r" - % (dim, other_size, target_size) + f"dimension on argument {dim!r}: {other_size!r} vs {target_size!r}" ) return indexers @@ -575,8 +570,8 @@ def reindex_variables( if not index.is_unique: raise ValueError( - "cannot reindex or align along dimension %r because the " - "index has duplicate values" % dim + f"cannot reindex or align along dimension {dim!r} because the " + "index has duplicate values" ) int_indexer = get_indexer_nd(index, target, method, tolerance) @@ -603,9 +598,9 @@ def reindex_variables( new_size = indexers[dim].size if existing_size != new_size: raise ValueError( - "cannot reindex or align along dimension %r without an " - "index because its size %r is different from the size of " - "the new index %r" % (dim, existing_size, new_size) + f"cannot reindex or align along dimension {dim!r} without an " + f"index because its size {existing_size!r} is different from the size of " + f"the new index {new_size!r}" ) for name, var in variables.items(): @@ -756,8 +751,6 @@ def broadcast(*args, exclude=None): args = align(*args, join="outer", copy=False, exclude=exclude) dims_map, common_coords = _get_broadcast_dims_map_common_coords(args, exclude) - result = [] - for arg in args: - result.append(_broadcast_helper(arg, exclude, dims_map, common_coords)) + result = [_broadcast_helper(arg, exclude, dims_map, common_coords) for arg in args] return tuple(result) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 105e0a5a66c..7d9273670a3 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -11,8 +11,7 @@ def _infer_concat_order_from_positions(datasets): - combined_ids = dict(_infer_tile_ids_from_nested_list(datasets, ())) - return combined_ids + return dict(_infer_tile_ids_from_nested_list(datasets, ())) def _infer_tile_ids_from_nested_list(entry, current_pos): @@ -144,7 +143,7 @@ def _check_dimension_depth_tile_ids(combined_tile_ids): nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: nesting_depths = [0] - if not set(nesting_depths) == {nesting_depths[0]}: + if set(nesting_depths) != {nesting_depths[0]}: raise ValueError( "The supplied objects do not form a hypercube because" " sub-lists do not have consistent depths" diff --git a/xarray/core/common.py b/xarray/core/common.py index e4a5264d8e6..320c94972d2 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -209,11 +209,11 @@ def __init_subclass__(cls, **kwargs): if not hasattr(object.__new__(cls), "__dict__"): pass elif cls.__module__.startswith("xarray."): - raise AttributeError("%s must explicitly define __slots__" % cls.__name__) + raise AttributeError(f"{cls.__name__} must explicitly define __slots__") else: cls.__setattr__ = cls._setattr_dict warnings.warn( - "xarray subclass %s should explicitly define __slots__" % cls.__name__, + f"xarray subclass {cls.__name__} should explicitly define __slots__", FutureWarning, stacklevel=2, ) @@ -251,10 +251,9 @@ def _setattr_dict(self, name: str, value: Any) -> None: if name in self.__dict__: # Custom, non-slotted attr, or improperly assigned variable? warnings.warn( - "Setting attribute %r on a %r object. Explicitly define __slots__ " + f"Setting attribute {name!r} on a {type(self).__name__!r} object. Explicitly define __slots__ " "to suppress this warning for legitimate custom attributes and " - "raise an error when attempting variables assignments." - % (name, type(self).__name__), + "raise an error when attempting variables assignments.", FutureWarning, stacklevel=2, ) @@ -274,9 +273,8 @@ def __setattr__(self, name: str, value: Any) -> None: ): raise raise AttributeError( - "cannot set attribute %r on a %r object. Use __setitem__ style" + f"cannot set attribute {name!r} on a {type(self).__name__!r} object. Use __setitem__ style" "assignment (e.g., `ds['name'] = ...`) instead of assigning variables." - % (name, type(self).__name__) ) from e def __dir__(self) -> List[str]: @@ -655,7 +653,7 @@ def pipe( func, target = func if target in kwargs: raise ValueError( - "%s is both the pipe target and a keyword argument" % target + f"{target} is both the pipe target and a keyword argument" ) kwargs[target] = self return func(*args, **kwargs) @@ -1273,8 +1271,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): if not isinstance(cond, (Dataset, DataArray)): raise TypeError( - "cond argument is %r but must be a %r or %r" - % (cond, Dataset, DataArray) + f"cond argument is {cond!r} but must be a {Dataset!r} or {DataArray!r}" ) # align so we can use integer indexing diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e12938d6965..2bc3ba921a7 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -307,7 +307,7 @@ def assert_and_return_exact_match(all_keys): if keys != first_keys: raise ValueError( "exact match required for all data variable names, " - "but %r != %r" % (keys, first_keys) + f"but {keys!r} != {first_keys!r}" ) return first_keys @@ -516,7 +516,7 @@ def unified_dim_sizes( if len(set(var.dims)) < len(var.dims): raise ValueError( "broadcasting cannot handle duplicate " - "dimensions on a variable: %r" % list(var.dims) + f"dimensions on a variable: {list(var.dims)}" ) for dim, size in zip(var.dims, var.shape): if dim not in exclude_dims: @@ -526,7 +526,7 @@ def unified_dim_sizes( raise ValueError( "operands cannot be broadcast together " "with mismatched lengths for dimension " - "%r: %s vs %s" % (dim, dim_sizes[dim], size) + f"{dim}: {dim_sizes[dim]} vs {size}" ) return dim_sizes @@ -563,8 +563,8 @@ def broadcast_compat_data( if unexpected_dims: raise ValueError( "operand to apply_ufunc encountered unexpected " - "dimensions %r on an input variable: these are core " - "dimensions on other input or output variables" % unexpected_dims + f"dimensions {unexpected_dims!r} on an input variable: these are core " + "dimensions on other input or output variables" ) # for consistency with numpy, keep broadcast dimensions to the left diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 9eca99918d4..368f8992607 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -223,8 +223,7 @@ def concat( if compat not in _VALID_COMPAT: raise ValueError( - "compat=%r invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" - % compat + f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) if isinstance(first_obj, DataArray): @@ -234,7 +233,7 @@ def concat( else: raise TypeError( "can only concatenate xarray Dataset and DataArray " - "objects, got %s" % type(first_obj) + f"objects, got {type(first_obj)}" ) return f( objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs @@ -293,18 +292,16 @@ def process_subset_opt(opt, subset): if opt == "different": if compat == "override": raise ValueError( - "Cannot specify both %s='different' and compat='override'." - % subset + f"Cannot specify both {subset}='different' and compat='override'." ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: equals[k] = None - variables = [] - for ds in datasets: - if k in ds.variables: - variables.append(ds.variables[k]) + variables = [ + ds.variables[k] for ds in datasets if k in ds.variables + ] if len(variables) == 1: # coords="different" doesn't make sense when only one object @@ -367,12 +364,12 @@ def process_subset_opt(opt, subset): if subset == "coords": raise ValueError( "some variables in coords are not coordinates on " - "the first dataset: %s" % (invalid_vars,) + f"the first dataset: {invalid_vars}" ) else: raise ValueError( "some variables in data_vars are not data variables " - "on the first dataset: %s" % (invalid_vars,) + f"on the first dataset: {invalid_vars}" ) concat_over.update(opt) @@ -439,7 +436,7 @@ def _dataset_concat( both_data_and_coords = coord_names & data_names if both_data_and_coords: raise ValueError( - "%r is a coordinate in some datasets but not others." % both_data_and_coords + f"{both_data_and_coords!r} is a coordinate in some datasets but not others." ) # we don't want the concat dimension in the result dataset yet dim_coords.pop(dim, None) @@ -507,7 +504,7 @@ def ensure_common_dims(vars): try: vars = ensure_common_dims([ds[k].variable for ds in datasets]) except KeyError: - raise ValueError("%r is not present in all datasets." % k) + raise ValueError(f"{k!r} is not present in all datasets.") combined = concat_vars(vars, dim, positions, combine_attrs=combine_attrs) assert isinstance(combined, Variable) result_vars[k] = combined @@ -519,8 +516,7 @@ def ensure_common_dims(vars): absent_coord_names = coord_names - set(result.variables) if absent_coord_names: raise ValueError( - "Variables %r are coordinates in some datasets but not others." - % absent_coord_names + f"Variables {absent_coord_names!r} are coordinates in some datasets but not others." ) result = result.set_coords(coord_names) result.encoding = result_encoding diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 50be8a7f677..767b76d0d12 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -122,13 +122,13 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: ) cumprod_lengths = np.cumproduct(index_lengths) - if cumprod_lengths[-1] != 0: - # sizes of the repeats - repeat_counts = cumprod_lengths[-1] / cumprod_lengths - else: + if cumprod_lengths[-1] == 0: # if any factor is empty, the cartesian product is empty repeat_counts = np.zeros_like(cumprod_lengths) + else: + # sizes of the repeats + repeat_counts = cumprod_lengths[-1] / cumprod_lengths # sizes of the tiles tile_counts = np.roll(cumprod_lengths, 1) tile_counts[0] = 1 @@ -156,7 +156,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: level_list += levels names += index.names - return pd.MultiIndex(level_list, code_list, names=names) + return pd.MultiIndex(level_list, code_list, names=names) def update(self, other: Mapping[Hashable, Any]) -> None: other_vars = getattr(other, "variables", other) @@ -226,10 +226,9 @@ def merge(self, other: "Coordinates") -> "Dataset": coords, indexes = merge_coordinates_without_align([self, other]) coord_names = set(coords) - merged = Dataset._construct_direct( + return Dataset._construct_direct( variables=coords, coord_names=coord_names, indexes=indexes ) - return merged class DatasetCoordinates(Coordinates): @@ -364,13 +363,13 @@ def to_dataset(self) -> "Dataset": return Dataset._construct_direct(coords, set(coords)) def __delitem__(self, key: Hashable) -> None: - if key in self: - del self._data._coords[key] - if self._data._indexes is not None and key in self._data._indexes: - del self._data._indexes[key] - else: + if key not in self: raise KeyError(f"{key!r} is not a coordinate variable.") + del self._data._coords[key] + if self._data._indexes is not None and key in self._data._indexes: + del self._data._indexes[key] + def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() @@ -386,14 +385,11 @@ def assert_coordinate_consistent( """ for k in obj.dims: # make sure there are no conflict in dimension coordinates - if k in coords and k in obj.coords: - if not coords[k].equals(obj[k].variable): - raise IndexError( - "dimension coordinate {!r} conflicts between " - "indexed and indexing objects:\n{}\nvs.\n{}".format( - k, obj[k], coords[k] - ) - ) + if k in coords and k in obj.coords and not coords[k].equals(obj[k].variable): + raise IndexError( + f"dimension coordinate {k!r} conflicts between " + f"indexed and indexing objects:\n{obj[k]}\nvs.\n{coords[k]}" + ) def remap_label_indexers( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 21daed1cec1..831d0d24ccb 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -98,16 +98,16 @@ def _infer_coords_and_dims( and len(coords) != len(shape) ): raise ValueError( - "coords is not dict-like, but it has %s items, " - "which does not match the %s dimensions of the " - "data" % (len(coords), len(shape)) + f"coords is not dict-like, but it has {len(coords)} items, " + f"which does not match the {len(shape)} dimensions of the " + "data" ) if isinstance(dims, str): dims = (dims,) if dims is None: - dims = ["dim_%s" % n for n in range(len(shape))] + dims = [f"dim_{n}" for n in range(len(shape))] if coords is not None and len(coords) == len(shape): # try to infer dimensions from coords if utils.is_dict_like(coords): @@ -125,12 +125,12 @@ def _infer_coords_and_dims( elif len(dims) != len(shape): raise ValueError( "different number of dimensions on data " - "and dims: %s vs %s" % (len(shape), len(dims)) + f"and dims: {len(shape)} vs {len(dims)}" ) else: for d in dims: if not isinstance(d, str): - raise TypeError("dimension %s is not a string" % d) + raise TypeError(f"dimension {d} is not a string") new_coords: Dict[Any, Variable] = {} @@ -147,24 +147,24 @@ def _infer_coords_and_dims( for k, v in new_coords.items(): if any(d not in dims for d in v.dims): raise ValueError( - "coordinate %s has dimensions %s, but these " + f"coordinate {k} has dimensions {v.dims}, but these " "are not a subset of the DataArray " - "dimensions %s" % (k, v.dims, dims) + f"dimensions {dims}" ) for d, s in zip(v.dims, v.shape): if s != sizes[d]: raise ValueError( - "conflicting sizes for dimension %r: " - "length %s on the data but length %s on " - "coordinate %r" % (d, sizes[d], s, k) + f"conflicting sizes for dimension {d!r}: " + f"length {sizes[d]} on the data but length {s} on " + f"coordinate {k!r}" ) if k in sizes and v.shape != (sizes[k],): raise ValueError( - "coordinate %r is a DataArray dimension, but " - "it has shape %r rather than expected shape %r " - "matching the dimension size" % (k, v.shape, (sizes[k],)) + f"coordinate {k!r} is a DataArray dimension, but " + f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} " + "matching the dimension size" ) assert_unique_multiindex_level_names(new_coords) @@ -539,8 +539,7 @@ def _to_dataset_whole( indexes = self._indexes coord_names = set(self._coords) - dataset = Dataset._construct_direct(variables, coord_names, indexes=indexes) - return dataset + return Dataset._construct_direct(variables, coord_names, indexes=indexes) def to_dataset( self, @@ -669,9 +668,8 @@ def dims(self, value): def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: if utils.is_dict_like(key): return key - else: - key = indexing.expanded_indexer(key, self.ndim) - return dict(zip(self.dims, key)) + key = indexing.expanded_indexer(key, self.ndim) + return dict(zip(self.dims, key)) @property def _level_coords(self) -> Dict[Hashable, Hashable]: @@ -823,13 +821,12 @@ def reset_coords( dataset = self.coords.to_dataset().reset_coords(names, drop) if drop: return self._replace(coords=dataset._variables) - else: - if self.name is None: - raise ValueError( - "cannot reset_coords with drop=False on an unnamed DataArrray" - ) - dataset[self.name] = self.variable - return dataset + if self.name is None: + raise ValueError( + "cannot reset_coords with drop=False on an unnamed DataArrray" + ) + dataset[self.name] = self.variable + return dataset def __dask_tokenize__(self): from dask.base import normalize_token @@ -2012,7 +2009,7 @@ def reorder_levels( coord = self._coords[dim] index = coord.to_index() if not isinstance(index, pd.MultiIndex): - raise ValueError("coordinate %r has no MultiIndex" % dim) + raise ValueError(f"coordinate {dim!r} has no MultiIndex") replace_coords[dim] = IndexVariable(coord.dims, index.reorder_levels(order)) coords = self._coords.copy() coords.update(replace_coords) @@ -2658,8 +2655,8 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: constructor = constructors[self.ndim] except KeyError: raise ValueError( - "cannot convert arrays with %s dimensions into " - "pandas objects" % self.ndim + f"cannot convert arrays with {self.ndim} dimensions into " + "pandas objects" ) indexes = [self.get_index(dim) for dim in self.dims] return constructor(self.values, *indexes) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 706ccbde8c4..9acae953b61 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -196,16 +196,15 @@ def calculate_dimensions(variables: Mapping[Hashable, Variable]) -> Dict[Hashabl for dim, size in zip(var.dims, var.shape): if dim in scalar_vars: raise ValueError( - "dimension %r already exists as a scalar variable" % dim + f"dimension {dim!r} already exists as a scalar variable" ) if dim not in dims: dims[dim] = size last_used[dim] = k elif dims[dim] != size: raise ValueError( - "conflicting sizes for dimension %r: " - "length %s on %r and length %s on %r" - % (dim, size, k, dims[dim], last_used[dim]) + f"conflicting sizes for dimension {dim!r}: " + f"length {size} on {k!r} and length {dims[dim]} on {last_used!r}" ) return dims @@ -245,8 +244,7 @@ def merge_indexes( and var.dims != current_index_variable.dims ): raise ValueError( - "dimension mismatch between %r %s and %r %s" - % (dim, current_index_variable.dims, n, var.dims) + f"dimension mismatch between {dim!r} {current_index_variable.dims} and {n!r} {var.dims}" ) if current_index_variable is not None and append: @@ -256,7 +254,7 @@ def merge_indexes( codes.extend(current_index.codes) levels.extend(current_index.levels) else: - names.append("%s_level_0" % dim) + names.append(f"{dim}_level_0") cat = pd.Categorical(current_index.values, ordered=True) codes.append(cat.codes) levels.append(cat.categories) @@ -733,8 +731,7 @@ def __init__( both_data_and_coords = set(data_vars) & set(coords) if both_data_and_coords: raise ValueError( - "variables %r are found in both data_vars and coords" - % both_data_and_coords + f"variables {both_data_and_coords!r} are found in both data_vars and coords" ) if isinstance(coords, Dataset): @@ -1700,7 +1697,7 @@ def reset_coords( bad_coords = set(names) & set(self.dims) if bad_coords: raise ValueError( - "cannot remove index coordinates with reset_coords: %s" % bad_coords + f"cannot remove index coordinates with reset_coords: {bad_coords}" ) obj = self.copy() obj._coord_names.difference_update(names) @@ -2050,7 +2047,7 @@ def chunk( bad_dims = chunks.keys() - self.dims.keys() if bad_dims: raise ValueError( - "some chunks keys are not dimensions on this " "object: %s" % bad_dims + f"some chunks keys are not dimensions on this object: {bad_dims}" ) variables = { @@ -2408,12 +2405,12 @@ def head( if not isinstance(v, int): raise TypeError( "expected integer type indexer for " - "dimension %r, found %r" % (k, type(v)) + f"dimension {k!r}, found {type(v)!r}" ) elif v < 0: raise ValueError( "expected positive integer as indexer " - "for dimension %r, found %s" % (k, v) + f"for dimension {k!r}, found {v}" ) indexers_slices = {k: slice(val) for k, val in indexers.items()} return self.isel(indexers_slices) @@ -2454,12 +2451,12 @@ def tail( if not isinstance(v, int): raise TypeError( "expected integer type indexer for " - "dimension %r, found %r" % (k, type(v)) + f"dimension {k!r}, found {type(v)!r}" ) elif v < 0: raise ValueError( "expected positive integer as indexer " - "for dimension %r, found %s" % (k, v) + f"for dimension {k!r}, found {v}" ) indexers_slices = { k: slice(-val, None) if val != 0 else slice(val) @@ -2504,12 +2501,12 @@ def thin( if not isinstance(v, int): raise TypeError( "expected integer type indexer for " - "dimension %r, found %r" % (k, type(v)) + f"dimension {k!r}, found {type(v)!r}" ) elif v < 0: raise ValueError( "expected positive integer as indexer " - "for dimension %r, found %s" % (k, v) + f"for dimension {k!r}, found {v}" ) elif v == 0: raise ValueError("step cannot be zero") @@ -2830,7 +2827,7 @@ def _reindex( bad_dims = [d for d in indexers if d not in self.dims] if bad_dims: - raise ValueError("invalid reindex dimensions: %s" % bad_dims) + raise ValueError(f"invalid reindex dimensions: {bad_dims}") variables, indexes = alignment.reindex_variables( self.variables, @@ -3209,8 +3206,8 @@ def rename( for k in name_dict.keys(): if k not in self and k not in self.dims: raise ValueError( - "cannot rename %r because it is not a " - "variable or dimension in this dataset" % k + f"cannot rename {k!r} because it is not a " + "variable or dimension in this dataset" ) variables, coord_names, dims, indexes = self._rename_all( @@ -3250,8 +3247,8 @@ def rename_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - "cannot rename %r because it is not a " - "dimension in this dataset" % k + f"cannot rename {k!r} because it is not a " + "dimension in this dataset" ) if v in self.dims or v in self: raise ValueError( @@ -3294,8 +3291,8 @@ def rename_vars( for k in name_dict: if k not in self: raise ValueError( - "cannot rename %r because it is not a " - "variable or coordinate in this dataset" % k + f"cannot rename {k!r} because it is not a " + "variable or coordinate in this dataset" ) variables, coord_names, dims, indexes = self._rename_all( name_dict=name_dict, dims_dict={} @@ -3370,13 +3367,13 @@ def swap_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - "cannot swap from dimension %r because it is " - "not an existing dimension" % k + f"cannot swap from dimension {k!r} because it is " + "not an existing dimension" ) if v in self.variables and self.variables[v].dims != (k,): raise ValueError( - "replacement dimension %r is not a 1D " - "variable along the old dimension %r" % (v, k) + f"replacement dimension {v!r} is not a 1D " + f"variable along the old dimension {k!r}" ) result_dims = {dims_dict.get(dim, dim) for dim in self.dims} @@ -3980,7 +3977,7 @@ def unstack( missing_dims = [d for d in dims if d not in self.dims] if missing_dims: raise ValueError( - "Dataset does not contain the dimensions: %s" % missing_dims + f"Dataset does not contain the dimensions: {missing_dims}" ) non_multi_dims = [ @@ -3989,7 +3986,7 @@ def unstack( if non_multi_dims: raise ValueError( "cannot unstack dimensions that do not " - "have a MultiIndex: %s" % non_multi_dims + f"have a MultiIndex: {non_multi_dims}" ) result = self.copy(deep=False) @@ -4306,7 +4303,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): try: index = self.get_index(dim) except KeyError: - raise ValueError("dimension %r does not have coordinate labels" % dim) + raise ValueError(f"dimension {dim!r} does not have coordinate labels") new_index = index.drop(labels_for_dim, errors=errors) ds = ds.loc[{dim: new_index}] return ds @@ -4413,7 +4410,7 @@ def drop_dims( missing_dims = drop_dims - set(self.dims) if missing_dims: raise ValueError( - "Dataset does not contain the dimensions: %s" % missing_dims + f"Dataset does not contain the dimensions: {missing_dims}" ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} @@ -4451,8 +4448,8 @@ def transpose(self, *dims: Hashable) -> "Dataset": if dims: if set(dims) ^ set(self.dims) and ... not in dims: raise ValueError( - "arguments to transpose (%s) must be " - "permuted dataset dimensions (%s)" % (dims, tuple(self.dims)) + f"arguments to transpose ({dims}) must be " + f"permuted dataset dimensions ({tuple(self.dims)})" ) ds = self.copy() for name, var in self._variables.items(): @@ -4493,7 +4490,7 @@ def dropna( # depending on the order of the supplied axes. if dim not in self.dims: - raise ValueError("%s must be a single dataset dimension" % dim) + raise ValueError(f"{dim} must be a single dataset dimension") if subset is None: subset = iter(self.data_vars) @@ -4515,7 +4512,7 @@ def dropna( elif how == "all": mask = count > 0 elif how is not None: - raise ValueError("invalid how option: %s" % how) + raise ValueError(f"invalid how option: {how}") else: raise TypeError("must specify how or thresh") @@ -4862,7 +4859,7 @@ def reduce( missing_dimensions = [d for d in dims if d not in self.dims] if missing_dimensions: raise ValueError( - "Dataset does not contain the dimensions: %s" % missing_dimensions + f"Dataset does not contain the dimensions: {missing_dimensions}" ) if keep_attrs is None: @@ -5570,8 +5567,7 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): if inplace and set(lhs_data_vars) != set(rhs_data_vars): raise ValueError( "datasets must have the same data variables " - "for in-place arithmetic operations: %s, %s" - % (list(lhs_data_vars), list(rhs_data_vars)) + f"for in-place arithmetic operations: {list(lhs_data_vars)}, {list(rhs_data_vars)}" ) dest_vars = {} @@ -5743,7 +5739,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + raise ValueError(f"dimensions {invalid!r} do not exist") variables = {} for name, var in self.variables.items(): @@ -5805,7 +5801,7 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + raise ValueError(f"dimensions {invalid!r} do not exist") if roll_coords is None: warnings.warn( @@ -6067,7 +6063,7 @@ def rank(self, dim, pct=False, keep_attrs=None): Variables that do not depend on `dim` are dropped. """ if dim not in self.dims: - raise ValueError("Dataset does not contain the dimension: %s" % dim) + raise ValueError(f"Dataset does not contain the dimension: {dim}") variables = {} for name, var in self.variables.items(): diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 51499c3a687..5f9349051b7 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -63,10 +63,7 @@ def maybe_promote(dtype): # Check np.timedelta64 before np.integer fill_value = np.timedelta64("NaT") elif np.issubdtype(dtype, np.integer): - if dtype.itemsize <= 2: - dtype = np.float32 - else: - dtype = np.float64 + dtype = np.float32 if dtype.itemsize <= 2 else np.float64 fill_value = np.nan elif np.issubdtype(dtype, np.complexfloating): fill_value = np.nan + np.nan * 1j diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cb8a5f9946f..e32fd4be376 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -204,7 +204,7 @@ def asarray(data, xp=np): def as_shared_dtype(scalars_or_arrays): """Cast a arrays to a shared dtype using xarray's type promotion rules.""" - if any([isinstance(x, cupy_array_type) for x in scalars_or_arrays]): + if any(isinstance(x, cupy_array_type) for x in scalars_or_arrays): import cupy as cp arrays = [asarray(x, xp=cp) for x in scalars_or_arrays] @@ -427,9 +427,7 @@ def _datetime_nanmin(array): def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): """Convert an array containing datetime-like data to numerical values. - Convert the datetime array to a timedelta relative to an offset. - Parameters ---------- array : array-like @@ -442,12 +440,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): conversions are not allowed due to non-linear relationships between units. dtype : dtype Output dtype. - Returns ------- array Numerical representation of datetime object relative to an offset. - Notes ----- Some datetime unit conversions won't work, for example from days to years, even diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 9b7b060107b..3debefe2e0d 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -38,7 +38,7 @@ def __get__(self, obj, cls): # __getattr__ on data object will swallow any AttributeErrors # raised when initializing the accessor, so we need to raise as # something else (GH933): - raise RuntimeError("error initializing %r accessor." % self._name) + raise RuntimeError(f"error initializing {self._name!r} accessor.") cache[self._name] = accessor_obj return accessor_obj @@ -48,9 +48,8 @@ def _register_accessor(name, cls): def decorator(accessor): if hasattr(cls, name): warnings.warn( - "registration of accessor %r under name %r for type %r is " - "overriding a preexisting attribute with the same name." - % (accessor, name, cls), + f"registration of accessor {accessor!r} under name {name!r} for type {cls!r} is " + "overriding a preexisting attribute with the same name.", AccessorRegistrationWarning, stacklevel=2, ) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 5c2d2210ebd..2a480427d4e 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -25,9 +25,8 @@ def short_data_repr_html(array): internal_data = getattr(array, "variable", array)._data if hasattr(internal_data, "_repr_html_"): return internal_data._repr_html_() - else: - text = escape(short_data_repr(array)) - return f"
{text}" + text = escape(short_data_repr(array)) + return f"
{text}" def format_dims(dims, coord_names): @@ -77,8 +76,7 @@ def summarize_coord(name, var): if is_index: coord = var.variable.to_index_variable() if coord.level_names is not None: - coords = {} - coords[name] = _summarize_coord_multiindex(name, coord) + coords = {name: _summarize_coord_multiindex(name, coord)} for lname in coord.level_names: var = coord.get_level_variable(lname) coords[lname] = summarize_variable(lname, var) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e2678896c0e..c73ef738a29 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,8 +29,8 @@ def check_reduce_dims(reduce_dims, dimensions): reduce_dims = [reduce_dims] if any(dim not in dimensions for dim in reduce_dims): raise ValueError( - "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (reduce_dims, dimensions) + f"cannot reduce over dimensions {reduce_dims!r}. expected either '...' " + f"to reduce over all dimensions or one or more of {dimensions!r}." ) @@ -105,7 +105,7 @@ def _consolidate_slices(slices): last_slice = slice(None) for slice_ in slices: if not isinstance(slice_, slice): - raise ValueError("list element is not a slice: %r" % slice_) + raise ValueError(f"list element is not a slice: {slice_!r}") if ( result and last_slice.stop == slice_.start @@ -141,8 +141,7 @@ def _inverse_permutation_indices(positions): return None positions = [np.arange(sl.start, sl.stop, sl.step) for sl in positions] - indices = nputils.inverse_permutation(np.concatenate(positions)) - return indices + return nputils.inverse_permutation(np.concatenate(positions)) class _DummyGroup: @@ -200,9 +199,8 @@ def _ensure_1d(group, obj): def _unique_and_monotonic(group): if isinstance(group, _DummyGroup): return True - else: - index = safe_cast_to_index(group) - return index.is_unique and index.is_monotonic + index = safe_cast_to_index(group) + return index.is_unique and index.is_monotonic def _apply_loffset(grouper, result): @@ -380,7 +378,7 @@ def __init__( if len(group_indices) == 0: if bins is not None: raise ValueError( - "None of the data falls within bins with edges %r" % bins + f"None of the data falls within bins with edges {bins!r}" ) else: raise ValueError( @@ -475,8 +473,7 @@ def _infer_concat_args(self, applied_example): def _binary_op(self, other, f, reflexive=False): g = f if not reflexive else lambda x, y: f(y, x) applied = self._yield_binary_applied(g, other) - combined = self._combine(applied) - return combined + return self._combine(applied) def _yield_binary_applied(self, func, other): dummy = None @@ -494,8 +491,8 @@ def _yield_binary_applied(self, func, other): if self._group.name not in other.dims: raise ValueError( "incompatible dimensions for a grouped " - "binary operation: the group variable %r " - "is not a dimension on the other argument" % self._group.name + f"binary operation: the group variable {self._group.name!r} " + "is not a dimension on the other argument" ) if dummy is None: dummy = _dummy_copy(other) @@ -548,8 +545,7 @@ def fillna(self, value): Dataset.fillna DataArray.fillna """ - out = ops.fillna(self, value) - return out + return ops.fillna(self, value) def quantile( self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True @@ -655,7 +651,6 @@ def quantile( keep_attrs=keep_attrs, skipna=skipna, ) - return out def where(self, cond, other=dtypes.NA): @@ -737,8 +732,7 @@ def _concat_shortcut(self, applied, dim, positions=None): # compiled language) stacked = Variable.concat(applied, dim, shortcut=True) reordered = _maybe_reorder(stacked, dim, positions) - result = self._obj._replace_maybe_drop_dims(reordered) - return result + return self._obj._replace_maybe_drop_dims(reordered) def _restore_dim_order(self, stacked): def lookup_order(dimension): @@ -795,10 +789,7 @@ def map(self, func, shortcut=False, args=(), **kwargs): applied : DataArray or DataArray The result of splitting, applying and combining this array. """ - if shortcut: - grouped = self._iter_grouped_shortcut() - else: - grouped = self._iter_grouped() + grouped = self._iter_grouped_shortcut() if shortcut else self._iter_grouped() applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped) return self._combine(applied, shortcut=shortcut) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 6747957ca75..06da058eb1f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -66,7 +66,7 @@ def broadcast_dimension_size(variables: List[Variable]) -> Dict[Hashable, int]: for var in variables: for dim, size in zip(var.dims, var.shape): if dim in dims and size != dims[dim]: - raise ValueError("index %r not aligned" % dim) + raise ValueError(f"index {dim!r} not aligned") dims[dim] = size return dims @@ -211,17 +211,15 @@ def merge_collected( for _, other_index in indexed_elements[1:]: if not index.equals(other_index): raise MergeError( - "conflicting values for index %r on objects to be " - "combined:\nfirst value: %r\nsecond value: %r" - % (name, index, other_index) + f"conflicting values for index {name!r} on objects to be " + f"combined:\nfirst value: {index!r}\nsecond value: {other_index!r}" ) if compat == "identical": for other_variable, _ in indexed_elements[1:]: if not dict_equiv(variable.attrs, other_variable.attrs): raise MergeError( "conflicting attribute values on combined " - "variable %r:\nfirst value: %r\nsecond value: %r" - % (name, variable.attrs, other_variable.attrs) + f"variable {name!r}:\nfirst value: {variable.attrs!r}\nsecond value: {other_variable.attrs!r}" ) merged_vars[name] = variable merged_vars[name].attrs = merge_attrs( @@ -497,9 +495,9 @@ def assert_valid_explicit_coords(variables, dims, explicit_coords): for coord_name in explicit_coords: if coord_name in dims and variables[coord_name].dims != (coord_name,): raise MergeError( - "coordinate %s shares a name with a dataset dimension, but is " + f"coordinate {coord_name} shares a name with a dataset dimension, but is " "not a 1D variable along that dimension. This is disallowed " - "by the xarray data model." % coord_name + "by the xarray data model." ) @@ -521,7 +519,7 @@ def merge_attrs(variable_attrs, combine_attrs): except ValueError as e: raise MergeError( "combine_attrs='no_conflicts', but some values are not " - "the same. Merging %s with %s" % (str(result), str(attrs)) + f"the same. Merging {str(result)} with {str(attrs)}" ) from e return result elif combine_attrs == "drop_conflicts": @@ -547,12 +545,12 @@ def merge_attrs(variable_attrs, combine_attrs): for attrs in variable_attrs[1:]: if not dict_equiv(result, attrs): raise MergeError( - "combine_attrs='identical', but attrs differ. First is %s " - ", other is %s." % (str(result), str(attrs)) + f"combine_attrs='identical', but attrs differ. First is {str(result)} " + f", other is {str(attrs)}." ) return result else: - raise ValueError("Unrecognised value for combine_attrs=%s" % combine_attrs) + raise ValueError(f"Unrecognised value for combine_attrs={combine_attrs}") class _MergeResult(NamedTuple): @@ -642,15 +640,11 @@ def merge_core( if ambiguous_coords: raise MergeError( "unable to determine if these variables should be " - "coordinates or not in the merged result: %s" % ambiguous_coords + f"coordinates or not in the merged result: {ambiguous_coords}" ) attrs = merge_attrs( - [ - var.attrs - for var in coerced - if isinstance(var, Dataset) or isinstance(var, DataArray) - ], + [var.attrs for var in coerced if isinstance(var, (Dataset, DataArray))], combine_attrs, ) @@ -895,8 +889,7 @@ def merge( combine_attrs=combine_attrs, fill_value=fill_value, ) - merged = Dataset._construct_direct(**merge_result._asdict()) - return merged + return Dataset._construct_direct(**merge_result._asdict()) def dataset_merge_method( diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 41205242cce..c576e0718c6 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -93,7 +93,7 @@ def __init__(self, xi, yi, method="linear", fill_value=None, period=None): self._left = fill_value self._right = fill_value else: - raise ValueError("%s is not a valid fill_value" % fill_value) + raise ValueError(f"{fill_value} is not a valid fill_value") def __call__(self, x): return self.f( diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 803c7c3ccfe..35bac982d4c 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -46,9 +46,9 @@ def _validate_axis(axis, ndim, argname): axis = list(axis) axis = [a + ndim if a < 0 else a for a in axis] if not builtins.all(0 <= a < ndim for a in axis): - raise ValueError("invalid axis for this array in `%s` argument" % argname) + raise ValueError(f"invalid axis for this array in {argname} argument") if len(set(axis)) != len(axis): - raise ValueError("repeated axis in `%s` argument" % argname) + raise ValueError(f"repeated axis in {argname} argument") return axis @@ -73,8 +73,7 @@ def moveaxis(a, source, destination): for dest, src in sorted(zip(destination, source)): order.insert(dest, src) - result = transpose(order) - return result + return transpose(order) # Type annotations stubs diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 27740d53d45..8265035a25c 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -236,7 +236,7 @@ def func(self, *args, **kwargs): def inject_reduce_methods(cls): methods = ( [ - (name, getattr(duck_array_ops, "array_%s" % name), False) + (name, getattr(duck_array_ops, f"array_{name}"), False) for name in REDUCE_METHODS ] + [(name, getattr(duck_array_ops, name), True) for name in NAN_REDUCE_METHODS] @@ -275,7 +275,7 @@ def inject_cum_methods(cls): def op_str(name): - return "__%s__" % name + return f"__{name}__" def get_op(name): diff --git a/xarray/core/options.py b/xarray/core/options.py index d53c9d5d7d9..45f45c0dcc5 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -166,8 +166,7 @@ def __init__(self, **kwargs): for k, v in kwargs.items(): if k not in OPTIONS: raise ValueError( - "argument name %r is not in the set of valid options %r" - % (k, set(OPTIONS)) + f"argument name {k!r} is not in the set of valid options {set(OPTIONS)!r}" ) if k in _VALIDATORS and not _VALIDATORS[k](v): if k == ARITHMETIC_JOIN: diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index e1d32b7de43..795d30af28f 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -75,7 +75,7 @@ def check_result_variables( def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): - raise TypeError("Expected Dataset, got %s" % type(obj)) + raise TypeError(f"Expected Dataset, got {type(obj)}") if len(obj.data_vars) > 1: raise TypeError( diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 882664cbb60..4a413902b90 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -146,7 +146,7 @@ def _get_time_bins(index, freq, closed, label, base): if not isinstance(index, CFTimeIndex): raise TypeError( "index must be a CFTimeIndex, but got " - "an instance of %r" % type(index).__name__ + f"an instance of {type(index).__name__!r}" ) if len(index) == 0: datetime_bins = labels = CFTimeIndex(data=[], name=index.name) @@ -163,11 +163,7 @@ def _get_time_bins(index, freq, closed, label, base): datetime_bins, freq, closed, index, labels ) - if label == "right": - labels = labels[1:] - else: - labels = labels[:-1] - + labels = labels[1:] if label == "right" else labels[:-1] # TODO: when CFTimeIndex supports missing values, if the reference index # contains missing values, insert the appropriate NaN value at the # beginning of the datetime_bins and labels indexes. @@ -262,11 +258,7 @@ def _get_range_edges(first, last, offset, closed="left", base=0): first = normalize_date(first) last = normalize_date(last) - if closed == "left": - first = offset.rollback(first) - else: - first = first - offset - + first = offset.rollback(first) if closed == "left" else first - offset last = last + offset return first, last @@ -321,11 +313,7 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): else: lresult = last else: - if foffset.total_seconds() > 0: - fresult = first - foffset - else: - fresult = first - + fresult = first - foffset if foffset.total_seconds() > 0 else first if loffset.total_seconds() > 0: lresult = last + (offset.as_timedelta() - loffset) else: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index f99a7568282..870df122aa9 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -6,7 +6,6 @@ from . import dtypes, duck_array_ops, utils from .arithmetic import CoarsenArithmetic -from .dask_array_ops import dask_rolling_wrapper from .options import _get_keep_attrs from .pycompat import is_duck_dask_array @@ -173,11 +172,10 @@ def _mapping_to_list( if utils.is_dict_like(arg): if allow_default: return [arg.get(d, default) for d in self.dim] - else: - for d in self.dim: - if d not in arg: - raise KeyError(f"argument has no key {d}.") - return [arg[d] for d in self.dim] + for d in self.dim: + if d not in arg: + raise KeyError(f"argument has no key {d}.") + return [arg[d] for d in self.dim] elif allow_allsame: # for single argument return [arg] * len(self.dim) elif len(self.dim) == 1: @@ -439,7 +437,6 @@ def reduce(self, func, keep_attrs=None, **kwargs): obj = self.obj.fillna(fillna) else: obj = self.obj - windows = self._construct( obj, rolling_dim, keep_attrs=keep_attrs, fill_value=fillna ) @@ -504,9 +501,6 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") - values = dask_rolling_wrapper( - func, padded.data, window=self.window[0], min_count=min_count, axis=axis - ) else: values = func( padded.data, window=self.window[0], min_count=min_count, axis=axis @@ -549,20 +543,17 @@ def _numpy_or_bottleneck_reduce( return self._bottleneck_reduce( bottleneck_move_func, keep_attrs=keep_attrs, **kwargs ) - else: - if rolling_agg_func: - return rolling_agg_func( - self, keep_attrs=self._get_keep_attrs(keep_attrs) - ) - if fillna is not None: - if fillna is dtypes.INF: - fillna = dtypes.get_pos_infinity(self.obj.dtype, max_for_int=True) - elif fillna is dtypes.NINF: - fillna = dtypes.get_neg_infinity(self.obj.dtype, min_for_int=True) - kwargs.setdefault("skipna", False) - kwargs.setdefault("fillna", fillna) + if rolling_agg_func: + return rolling_agg_func(self, keep_attrs=self._get_keep_attrs(keep_attrs)) + if fillna is not None: + if fillna is dtypes.INF: + fillna = dtypes.get_pos_infinity(self.obj.dtype, max_for_int=True) + elif fillna is dtypes.NINF: + fillna = dtypes.get_neg_infinity(self.obj.dtype, min_for_int=True) + kwargs.setdefault("skipna", False) + kwargs.setdefault("fillna", fillna) - return self.reduce(array_agg_func, keep_attrs=keep_attrs, **kwargs) + return self.reduce(array_agg_func, keep_attrs=keep_attrs, **kwargs) class DatasetRolling(Rolling): @@ -612,7 +603,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None dims.append(d) center[d] = self.center[i] - if len(dims) > 0: + if dims: w = {d: windows[d] for d in dims} self.rollings[key] = DataArrayRolling(da, w, min_periods, center) @@ -735,7 +726,7 @@ def construct( for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on self.dim dims = [d for d in self.dim if d in da.dims] - if len(dims) > 0: + if dims: wi = {d: window_dim[i] for i, d in enumerate(self.dim) if d in da.dims} st = {d: stride[i] for i, d in enumerate(self.dim) if d in da.dims} diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d3b4cd39c53..31ac43ed214 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -218,7 +218,7 @@ def update_safety_check( if k in first_dict and not compat(v, first_dict[k]): raise ValueError( "unsafe to merge dictionaries without " - "overriding values; conflicting key %r" % k + f"overriding values; conflicting key {k!r}" ) @@ -254,7 +254,7 @@ def is_full_slice(value: Any) -> bool: def is_list_like(value: Any) -> bool: - return isinstance(value, list) or isinstance(value, tuple) + return isinstance(value, (list, tuple)) def is_duck_array(value: Any) -> bool: @@ -274,22 +274,19 @@ def either_dict_or_kwargs( kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: - if pos_kwargs is not None: - if not is_dict_like(pos_kwargs): - raise ValueError( - "the first argument to .%s must be a dictionary" % func_name - ) - if kw_kwargs: - raise ValueError( - "cannot specify both keyword and positional " - "arguments to .%s" % func_name - ) - return pos_kwargs - else: + if pos_kwargs is None: # Need an explicit cast to appease mypy due to invariance; see # https://github.com/python/mypy/issues/6228 return cast(Mapping[Hashable, T], kw_kwargs) + if not is_dict_like(pos_kwargs): + raise ValueError(f"the first argument to .{func_name} must be a dictionary") + if kw_kwargs: + raise ValueError( + f"cannot specify both keyword and positional arguments to .{func_name}" + ) + return pos_kwargs + def is_scalar(value: Any, include_0d: bool = True) -> bool: """Whether to treat a value as a scalar. @@ -358,10 +355,7 @@ def dict_equiv( for k in first: if k not in second or not compat(first[k], second[k]): return False - for k in second: - if k not in first: - return False - return True + return all(k in first for k in second) def compat_dict_intersection( @@ -730,7 +724,7 @@ def __init__(self, data: MutableMapping[K, V], hidden_keys: Iterable[K]): def _raise_if_hidden(self, key: K) -> None: if key in self._hidden_keys: - raise KeyError("Key `%r` is hidden." % key) + raise KeyError(f"Key `{key!r}` is hidden.") # The next five methods are requirements of the ABC. def __setitem__(self, key: K, value: V) -> None: @@ -863,7 +857,7 @@ def drop_missing_dims( """ if missing_dims == "raise": - supplied_dims_set = set(val for val in supplied_dims if val is not ...) + supplied_dims_set = {val for val in supplied_dims if val is not ...} invalid = supplied_dims_set - set(dims) if invalid: raise ValueError( diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cffaf2c3146..7122346baca 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -145,25 +145,24 @@ def as_variable(obj, name=None) -> "Union[Variable, IndexVariable]": data = as_compatible_data(obj) if data.ndim != 1: raise MissingDimensionsError( - "cannot set variable %r with %r-dimensional data " + f"cannot set variable {name!r} with {data.ndim!r}-dimensional data " "without explicit dimension names. Pass a tuple of " - "(dims, data) instead." % (name, data.ndim) + "(dims, data) instead." ) obj = Variable(name, data, fastpath=True) else: raise TypeError( "unable to convert object into a variable without an " - "explicit list of dimensions: %r" % obj + f"explicit list of dimensions: {obj!r}" ) if name is not None and name in obj.dims: # convert the Variable into an Index if obj.ndim != 1: raise MissingDimensionsError( - "%r has more than 1-dimension and the same name as one of its " - "dimensions %r. xarray disallows such variables because they " - "conflict with the coordinates used to label " - "dimensions." % (name, obj.dims) + f"{name!r} has more than 1-dimension and the same name as one of its " + f"dimensions {obj.dims!r}. xarray disallows such variables because they " + "conflict with the coordinates used to label dimensions." ) obj = obj.to_index_variable() @@ -236,21 +235,14 @@ def as_compatible_data(data, fastpath=False): else: data = np.asarray(data) - if not isinstance(data, np.ndarray): - if hasattr(data, "__array_function__"): - return data + if not isinstance(data, np.ndarray) and hasattr(data, "__array_function__"): + return data # validate whether the data is valid data types. data = np.asarray(data) - if isinstance(data, np.ndarray): - if data.dtype.kind == "O": - data = _possibly_convert_objects(data) - elif data.dtype.kind == "M": - data = _possibly_convert_objects(data) - elif data.dtype.kind == "m": - data = _possibly_convert_objects(data) - + if isinstance(data, np.ndarray) and data.dtype.kind in "OMm": + data = _possibly_convert_objects(data) return _maybe_wrap_data(data) @@ -268,10 +260,7 @@ def _as_array_or_item(data): TODO: remove this (replace with np.asarray) once these issues are fixed """ - if isinstance(data, cupy_array_type): - data = data.get() - else: - data = np.asarray(data) + data = data.get() if isinstance(data, cupy_array_type) else np.asarray(data) if data.ndim == 0: if data.dtype.kind == "M": data = np.datetime64(data, "ns") @@ -584,8 +573,8 @@ def _parse_dimensions(self, dims): dims = tuple(dims) if len(dims) != self.ndim: raise ValueError( - "dimensions %s must have the same length as the " - "number of data dimensions, ndim=%s" % (dims, self.ndim) + f"dimensions {dims} must have the same length as the " + f"number of data dimensions, ndim={self.ndim}" ) return dims @@ -662,9 +651,7 @@ def _broadcast_indexes_basic(self, key): def _validate_indexers(self, key): """Make sanity checks""" for dim, k in zip(self.dims, key): - if isinstance(k, BASIC_INDEXING_TYPES): - pass - else: + if not isinstance(k, BASIC_INDEXING_TYPES): if not isinstance(k, Variable): k = np.asarray(k) if k.ndim > 1: @@ -852,9 +839,8 @@ def __setitem__(self, key, value): value = as_compatible_data(value) if value.ndim > len(dims): raise ValueError( - "shape mismatch: value array of shape %s could not be " - "broadcast to indexing result with %s dimensions" - % (value.shape, len(dims)) + f"shape mismatch: value array of shape {value.shape} could not be " + f"broadcast to indexing result with {len(dims)} dimensions" ) if value.ndim == 0: value = Variable((), value) @@ -1462,8 +1448,8 @@ def set_dims(self, dims, shape=None): missing_dims = set(self.dims) - set(dims) if missing_dims: raise ValueError( - "new dimensions %r must be a superset of " - "existing dimensions %r" % (dims, self.dims) + f"new dimensions {dims!r} must be a superset of " + f"existing dimensions {self.dims!r}" ) self_dims = set(self.dims) @@ -1487,7 +1473,7 @@ def set_dims(self, dims, shape=None): def _stack_once(self, dims: List[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): - raise ValueError("invalid existing dimensions: %s" % dims) + raise ValueError(f"invalid existing dimensions: {dims}") if new_dim in self.dims: raise ValueError( @@ -1554,7 +1540,7 @@ def _unstack_once_full( new_dim_sizes = tuple(dims.values()) if old_dim not in self.dims: - raise ValueError("invalid existing dimension: %s" % old_dim) + raise ValueError(f"invalid existing dimension: {old_dim}") if set(new_dim_names).intersection(self.dims): raise ValueError( @@ -2550,7 +2536,7 @@ class IndexVariable(Variable): def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): super().__init__(dims, data, attrs, encoding, fastpath) if self.ndim != 1: - raise ValueError("%s objects must be 1-dimensional" % type(self).__name__) + raise ValueError(f"{type(self).__name__} objects must be 1-dimensional") # Unlike in Variable, always eagerly load values into memory if not isinstance(self._data, PandasIndex): @@ -2601,7 +2587,7 @@ def _finalize_indexing_result(self, dims, data): return self._replace(dims=dims, data=data) def __setitem__(self, key, value): - raise TypeError("%s values cannot be modified" % type(self).__name__) + raise TypeError(f"{type(self).__name__} values cannot be modified") @classmethod def concat( @@ -2744,7 +2730,7 @@ def level_names(self): def get_level_variable(self, level): """Return a new IndexVariable from a given MultiIndex level.""" if self.level_names is None: - raise ValueError("IndexVariable %r has no MultiIndex" % self.name) + raise ValueError(f"IndexVariable {self.name!r} has no MultiIndex") index = self.to_index() return type(self)(self.dims, index.get_level_values(level)) @@ -2769,7 +2755,7 @@ def _unified_dims(variables): if len(set(var_dims)) < len(var_dims): raise ValueError( "broadcasting cannot handle duplicate " - "dimensions: %r" % list(var_dims) + f"dimensions: {list(var_dims)!r}" ) for d, s in zip(var_dims, var.shape): if d not in all_dims: @@ -2777,8 +2763,7 @@ def _unified_dims(variables): elif all_dims[d] != s: raise ValueError( "operands cannot be broadcast together " - "with mismatched lengths for dimension %r: %s" - % (d, (all_dims[d], s)) + f"with mismatched lengths for dimension {d!r}: {(all_dims[d], s)}" ) return all_dims @@ -2900,12 +2885,12 @@ def assert_unique_multiindex_level_names(variables): for k, v in level_names.items(): if k in variables: - v.append("(%s)" % k) + v.append(f"({k})") duplicate_names = [v for v in level_names.values() if len(v) > 1] if duplicate_names: conflict_str = "\n".join(", ".join(v) for v in duplicate_names) - raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str) + raise ValueError(f"conflicting MultiIndex level name(s):\n{conflict_str}") # Check confliction between level names and dimensions GH:2299 for k, v in variables.items(): for d in v.dims: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 325ea799f28..858695ec538 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -44,14 +44,13 @@ def _determine_extend(calc_data, vmin, vmax): extend_min = calc_data.min() < vmin extend_max = calc_data.max() > vmax if extend_min and extend_max: - extend = "both" + return "both" elif extend_min: - extend = "min" + return "min" elif extend_max: - extend = "max" + return "max" else: - extend = "neither" - return extend + return "neither" def _build_discrete_cmap(cmap, levels, extend, filled): @@ -320,7 +319,7 @@ def _infer_xy_labels_3d(darray, x, y, rgb): if len(set(not_none)) < len(not_none): raise ValueError( "Dimension names must be None or unique strings, but imshow was " - "passed x=%r, y=%r, and rgb=%r." % (x, y, rgb) + f"passed x={x!r}, y={y!r}, and rgb={rgb!r}." ) for label in not_none: if label not in darray.dims: @@ -342,8 +341,7 @@ def _infer_xy_labels_3d(darray, x, y, rgb): rgb = could_be_color[0] if rgb is not None and darray[rgb].size not in (3, 4): raise ValueError( - "Cannot interpret dim %r of size %s as RGB or RGBA." - % (rgb, darray[rgb].size) + f"Cannot interpret dim {rgb!r} of size {darray[rgb].size} as RGB or RGBA." ) # If rgb dimension is still unknown, there must be two or three dimensions @@ -353,9 +351,9 @@ def _infer_xy_labels_3d(darray, x, y, rgb): rgb = could_be_color[-1] warnings.warn( "Several dimensions of this array could be colors. Xarray " - "will use the last possible dimension (%r) to match " + f"will use the last possible dimension ({rgb!r}) to match " "matplotlib.pyplot.imshow. You can pass names of x, y, " - "and/or rgb dimensions to override this guess." % rgb + "and/or rgb dimensions to override this guess." ) assert rgb is not None @@ -662,15 +660,15 @@ def _rescale_imshow_rgb(darray, vmin, vmax, robust): vmax = 255 if np.issubdtype(darray.dtype, np.integer) else 1 if vmax < vmin: raise ValueError( - "vmin=%r is less than the default vmax (%r) - you must supply " - "a vmax > vmin in this case." % (vmin, vmax) + f"vmin={vmin!r} is less than the default vmax ({vmax!r}) - you must supply " + "a vmax > vmin in this case." ) elif vmin is None: vmin = 0 if vmin > vmax: raise ValueError( - "vmax=%r is less than the default vmin (0) - you must supply " - "a vmin < vmax in this case." % vmax + f"vmax={vmax!r} is less than the default vmin (0) - you must supply " + "a vmin < vmax in this case." ) # Scale interval [vmin .. vmax] to [0 .. 1], with darray as 64-bit float # to avoid precision loss, integer over/underflow, etc with extreme inputs. diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index db102eefdc1..355c5dbed32 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -387,7 +387,7 @@ def test_da_groupby_assign_coords(): @pytest.mark.parametrize("obj", [repr_da, repr_da.to_dataset(name="a")]) def test_groupby_repr(obj, dim): actual = repr(obj.groupby(dim)) - expected = "%sGroupBy" % obj.__class__.__name__ + expected = f"{obj.__class__.__name__}GroupBy" expected += ", grouped over %r" % dim expected += "\n%r groups with labels " % (len(np.unique(obj[dim]))) if dim == "x": @@ -404,7 +404,7 @@ def test_groupby_repr(obj, dim): @pytest.mark.parametrize("obj", [repr_da, repr_da.to_dataset(name="a")]) def test_groupby_repr_datetime(obj): actual = repr(obj.groupby("t.month")) - expected = "%sGroupBy" % obj.__class__.__name__ + expected = f"{obj.__class__.__name__}GroupBy" expected += ", grouped over 'month'" expected += "\n%r groups with labels " % (len(np.unique(obj.t.dt.month))) expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12." diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 80c5e22513d..62762d29216 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -87,10 +87,10 @@ def open_dataset( if name in external_urls: url = external_urls[name] else: - # process the name - default_extension = ".nc" path = pathlib.Path(name) if not path.suffix: + # process the name + default_extension = ".nc" path = path.with_suffix(default_extension) url = f"{base_url}/raw/{version}/{path.name}" diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py index ce01936b9dd..bf80dcf68cd 100644 --- a/xarray/ufuncs.py +++ b/xarray/ufuncs.py @@ -77,8 +77,7 @@ def __call__(self, *args, **kwargs): res = f(*new_args, **kwargs) if res is NotImplemented: raise TypeError( - "%r not implemented for types (%r, %r)" - % (self._name, type(args[0]), type(args[1])) + f"{self._name!r} not implemented for types ({type(args[0])!r}, {type(args[1])!r})" ) return res @@ -127,11 +126,11 @@ def _create_op(name): doc = _remove_unused_reference_labels(_skip_signature(_dedent(doc), name)) func.__doc__ = ( - "xarray specific variant of numpy.%s. Handles " + f"xarray specific variant of numpy.{name}. Handles " "xarray.Dataset, xarray.DataArray, xarray.Variable, " "numpy.ndarray and dask.array.Array objects with " "automatic dispatching.\n\n" - "Documentation from numpy:\n\n%s" % (name, doc) + f"Documentation from numpy:\n\n{doc}" ) return func diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index d643d768093..cd5d425efe2 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -42,15 +42,15 @@ def get_sys_info(): [ ("python", sys.version), ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - # ("Version", "%s" % (version)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get("LC_ALL", "None")), - ("LANG", "%s" % os.environ.get("LANG", "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), + ("OS", f"{sysname}"), + ("OS-release", f"{release}"), + # ("Version", f"{version}"), + ("machine", f"{machine}"), + ("processor", f"{processor}"), + ("byteorder", f"{sys.byteorder}"), + ("LC_ALL", f'{os.environ.get("LC_ALL", "None")}'), + ("LANG", f'{os.environ.get("LANG", "None")}'), + ("LOCALE", f"{locale.getlocale()}"), ] ) except Exception: