Skip to content

Commit

Permalink
Align units of time coordinates and bounds
Browse files Browse the repository at this point in the history
  • Loading branch information
sfinkens committed Nov 25, 2019
1 parent 2d24cc5 commit e568e68
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 32 deletions.
59 changes: 45 additions & 14 deletions satpy/tests/writer_tests/test_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,10 @@ def test_single_time_value(self):
end_time=end_time))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))

# Unlimited time dimension
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf', unlimited_dims=['time'])
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertSetEqual(f.encoding['unlimited_dims'], {'time'})
with xr.open_dataset(filename, decode_cf=True) as f:
np.testing.assert_array_equal(f['time'], scn['test-array']['time'])
bounds_exp = np.array([[start_time, end_time]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

def test_bounds(self):
"""Test setting time bounds."""
Expand All @@ -244,8 +240,23 @@ def test_bounds(self):
end_time=end_time))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=True) as f:
bounds_exp = np.array([[start_time, end_time]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

# Check that time coordinates and bounds have the same units
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
self.assertEqual(f['time_bnds'].attrs['units'], f['time'].attrs['units'])
self.assertEqual(f['time_bnds'].attrs['calendar'], f['time'].attrs['calendar'])

# User-specified time encoding should have preference
with TempFile() as filename:
time_units = 'seconds since 2018-01-01'
scn.save_datasets(filename=filename, encoding={'time': {'units': time_units}},
writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertEqual(f['time'].attrs['units'], time_units)
self.assertEqual(f['time_bnds'].attrs['units'], time_units)

def test_bounds_minimum(self):
"""Test minimum bounds."""
Expand All @@ -270,8 +281,9 @@ def test_bounds_minimum(self):
end_time=end_timeB))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
with xr.open_dataset(filename, decode_cf=True) as f:
bounds_exp = np.array([[start_timeA, end_timeB]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

def test_bounds_missing_time_info(self):
"""Test time bounds generation in case of missing time."""
Expand All @@ -292,11 +304,12 @@ def test_bounds_missing_time_info(self):
coords={'time': [np.datetime64('2018-05-30T10:05:00')]})
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf')
with xr.open_dataset(filename, decode_cf=False) as f:
self.assertTrue(np.all(f['time_bnds'][:] == np.array([-300., 600.])))
with xr.open_dataset(filename, decode_cf=True) as f:
bounds_exp = np.array([[start_timeA, end_timeA]], dtype='datetime64[m]')
np.testing.assert_array_equal(f['time_bnds'], bounds_exp)

def test_encoding_kwarg(self):
"""Test encoding of keyword arguments."""
"""Test 'encoding' keyword argument."""
from satpy import Scene
import xarray as xr
scn = Scene()
Expand All @@ -318,6 +331,24 @@ def test_encoding_kwarg(self):
# check that dtype behave as int8
self.assertTrue(np.iinfo(f['test-array'][:].dtype).max == 127)

def test_unlimited_dims_kwarg(self):
"""Test specification of unlimited dimensions."""
from satpy import Scene
import xarray as xr
scn = Scene()
start_time = datetime(2018, 5, 30, 10, 0)
end_time = datetime(2018, 5, 30, 10, 15)
test_array = np.array([[1, 2], [3, 4]])
scn['test-array'] = xr.DataArray(test_array,
dims=['x', 'y'],
coords={'time': np.datetime64('2018-05-30T10:05:00')},
attrs=dict(start_time=start_time,
end_time=end_time))
with TempFile() as filename:
scn.save_datasets(filename=filename, writer='cf', unlimited_dims=['time'])
with xr.open_dataset(filename) as f:
self.assertSetEqual(f.encoding['unlimited_dims'], {'time'})

def test_header_attrs(self):
"""Check master attributes are set."""
from satpy import Scene
Expand Down
45 changes: 27 additions & 18 deletions satpy/writers/cf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@

from dask.base import tokenize
import xarray as xr
from xarray.coding.times import CFDatetimeCoder
import numpy as np

from pyresample.geometry import AreaDefinition, SwathDefinition
Expand Down Expand Up @@ -259,22 +260,14 @@ def area2cf(dataarray, strict=False):
return res


def make_time_bounds(dataarray, start_times, end_times):
def make_time_bounds(start_times, end_times):
"""Create time bounds for the current *dataarray*."""
import numpy as np
start_time = min(start_time for start_time in start_times
if start_time is not None)
end_time = min(end_time for end_time in end_times
if end_time is not None)
try:
dtnp64 = dataarray['time'].data[0]
except IndexError:
dtnp64 = dataarray['time'].data
time_bnds = [(np.datetime64(start_time) - dtnp64),
(np.datetime64(end_time) - dtnp64)]
data = xr.DataArray(np.array(time_bnds)[None, :] / np.timedelta64(1, 's'),
data = xr.DataArray([[np.datetime64(start_time), np.datetime64(end_time)]],
dims=['time', 'bnds_1d'])
data.encoding['_FillValue'] = None
return data


Expand Down Expand Up @@ -566,20 +559,37 @@ def _collect_datasets(self, datasets, epoch=EPOCH, flatten_attrs=False, exclude_

return datas, start_times, end_times

def update_encoding(self, datasets, to_netcdf_kwargs):
def update_encoding(self, dataset, to_netcdf_kwargs):
"""Update encoding.
Avoid _FillValue attribute being added to coordinate variables (https://github.com/pydata/xarray/issues/1865).
"""
other_to_netcdf_kwargs = to_netcdf_kwargs.copy()
encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy()
coord_vars = []
for data_array in datasets:
for name, data_array in dataset.items():
coord_vars.extend(set(data_array.dims).intersection(data_array.coords))
for coord_var in coord_vars:
encoding.setdefault(coord_var, {})
encoding[coord_var].update({'_FillValue': None})

# Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime
# encoding, which can be overridden by user-defined encoding.
if 'time' in dataset:
try:
dtnp64 = dataset['time'].data[0]
except IndexError:
dtnp64 = dataset['time'].data

default = CFDatetimeCoder().encode(xr.DataArray(dtnp64))
time_enc = {'units': default.attrs['units'], 'calendar': default.attrs['calendar']}
time_enc.update(encoding.get('time', {}))
bounds_enc = {'units': time_enc['units'],
'calendar': time_enc['calendar'],
'_FillValue': None}
encoding['time'] = time_enc
encoding['time_bnds'] = bounds_enc

return encoding, other_to_netcdf_kwargs

def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=EPOCH,
Expand All @@ -591,7 +601,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None,
Args:
datasets (list):
Names of datasets to be saved
Datasets to be saved
filename (str):
Output file
groups (dict):
Expand Down Expand Up @@ -666,17 +676,16 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None,
group_datasets, epoch=epoch, flatten_attrs=flatten_attrs, exclude_attrs=exclude_attrs,
include_lonlats=include_lonlats, pretty=pretty, compression=compression)
dataset = xr.Dataset(datas)
try:
dataset['time_bnds'] = make_time_bounds(dataset,
start_times,
if 'time' in dataset:
dataset['time_bnds'] = make_time_bounds(start_times,
end_times)
dataset['time'].attrs['bounds'] = "time_bnds"
dataset['time'].attrs['standard_name'] = "time"
except KeyError:
else:
grp_str = ' of group {}'.format(group_name) if group_name is not None else ''
logger.warning('No time dimension in datasets{}, skipping time bounds creation.'.format(grp_str))

encoding, other_to_netcdf_kwargs = self.update_encoding(datasets, to_netcdf_kwargs)
encoding, other_to_netcdf_kwargs = self.update_encoding(dataset, to_netcdf_kwargs)
res = dataset.to_netcdf(filename, engine=engine, group=group_name, mode='a', encoding=encoding,
**other_to_netcdf_kwargs)
written.append(res)
Expand Down

0 comments on commit e568e68

Please sign in to comment.