diff --git a/satpy/readers/hdf4_utils.py b/satpy/readers/hdf4_utils.py index d6258d9d62..10f3b24b66 100644 --- a/satpy/readers/hdf4_utils.py +++ b/satpy/readers/hdf4_utils.py @@ -18,10 +18,12 @@ """Helpers for reading hdf4-based files.""" import logging +import os import dask.array as da import numpy as np import xarray as xr +from dask.base import tokenize from pyhdf.SD import SD, SDC, SDS from satpy.readers.file_handlers import BaseFileHandler @@ -45,12 +47,22 @@ } -def from_sds(var, *args, **kwargs): +def from_sds(var, src_path, **kwargs): """Create a dask array from a SD dataset.""" - var.__dict__["dtype"] = np.dtype(HTYPE_TO_DTYPE[var.info()[3]]) - shape = var.info()[2] + var_info = var.info() + var.__dict__["dtype"] = np.dtype(HTYPE_TO_DTYPE[var_info[3]]) + shape = var_info[2] var.__dict__["shape"] = shape if isinstance(shape, (tuple, list)) else tuple(shape) - return da.from_array(var, *args, **kwargs) + + name = kwargs.pop("name", None) + if name is None: + var_name = var_info[0] + tokenize_args = (os.fspath(src_path), var_name) + if kwargs: + tokenize_args += (kwargs,) + # put variable name in the front for easier dask debugging + name = var_name + "-" + tokenize(*tokenize_args) + return da.from_array(var, name=name, **kwargs) class HDF4FileHandler(BaseFileHandler): @@ -92,7 +104,7 @@ def collect_metadata(self, name, obj): def _open_xarray_dataset(self, val, chunks=CHUNK_SIZE): """Read the band in blocks.""" - dask_arr = from_sds(val, chunks=chunks) + dask_arr = from_sds(val, self.filename, chunks=chunks) attrs = val.attributes() return xr.DataArray(dask_arr, dims=("y", "x"), attrs=attrs) diff --git a/satpy/readers/hdfeos_base.py b/satpy/readers/hdfeos_base.py index 3fd920c01f..7c25e1d09a 100644 --- a/satpy/readers/hdfeos_base.py +++ b/satpy/readers/hdfeos_base.py @@ -216,7 +216,7 @@ def load_dataset(self, dataset_name, is_category=False): dataset = self._read_dataset_in_file(dataset_name) chunks = self._chunks_for_variable(dataset) - dask_arr = from_sds(dataset, chunks=chunks) + dask_arr = from_sds(dataset, self.filename, chunks=chunks) dims = ("y", "x") if dask_arr.ndim == 2 else None data = xr.DataArray(dask_arr, dims=dims, attrs=dataset.attributes()) diff --git a/satpy/readers/modis_l1b.py b/satpy/readers/modis_l1b.py index 8280b30065..17bf5d56ae 100644 --- a/satpy/readers/modis_l1b.py +++ b/satpy/readers/modis_l1b.py @@ -117,7 +117,7 @@ def get_dataset(self, key, info): var_attrs = subdata.attributes() uncertainty = self.sd.select(var_name + "_Uncert_Indexes") chunks = self._chunks_for_variable(subdata) - array = xr.DataArray(from_sds(subdata, chunks=chunks)[band_index, :, :], + array = xr.DataArray(from_sds(subdata, self.filename, chunks=chunks)[band_index, :, :], dims=["y", "x"]).astype(np.float32) valid_range = var_attrs["valid_range"] valid_min = np.float32(valid_range[0]) @@ -214,7 +214,7 @@ def _mask_uncertain_pixels(self, array, uncertainty, band_index): if not self._mask_saturated: return array uncertainty_chunks = self._chunks_for_variable(uncertainty) - band_uncertainty = from_sds(uncertainty, chunks=uncertainty_chunks)[band_index, :, :] + band_uncertainty = from_sds(uncertainty, self.filename, chunks=uncertainty_chunks)[band_index, :, :] array = array.where(band_uncertainty < 15) return array diff --git a/satpy/readers/modis_l2.py b/satpy/readers/modis_l2.py index 8fdf1c69bb..2f2555692d 100644 --- a/satpy/readers/modis_l2.py +++ b/satpy/readers/modis_l2.py @@ -111,7 +111,7 @@ def read_geo_resolution(metadata): def _select_hdf_dataset(self, hdf_dataset_name, byte_dimension): """Load a dataset from HDF-EOS level 2 file.""" dataset = self.sd.select(hdf_dataset_name) - dask_arr = from_sds(dataset, chunks=CHUNK_SIZE) + dask_arr = from_sds(dataset, self.filename, chunks=CHUNK_SIZE) attrs = dataset.attributes() dims = ["y", "x"] if byte_dimension == 0: