From 00a92d348e19936d73de21f7b19bd2c0fb8610b9 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Tue, 26 Nov 2019 16:39:39 +0100 Subject: [PATCH 01/19] Try to cache small data variables In the netcdf utility reader, cache small data variables to prevent needlessly often opening and closing the data files. --- satpy/readers/netcdf_utils.py | 39 ++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 02d00baf5c..fb7a2e8fff 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -51,18 +51,25 @@ class NetCDF4FileHandler(BaseFileHandler): wrapper["/attr/platform_short_name"] - Note that loading datasets requires reopening the original file, but to - get just the shape of the dataset append "/shape" to the item string: + Note that loading uncached datasets requires reopening the original + file, but to get just the shape of the dataset append "/shape" + to the item string: wrapper["group/subgroup/var_name/shape"] + If your file has many small data variables that are frequently accessed, + you may choose to cache some of them. You can do this by passing a number, + any variable smaller than this number in bytes will be read into RAM. + Warning, this part of the API is provisional and subject to change. """ def __init__(self, filename, filename_info, filetype_info, - auto_maskandscale=False, xarray_kwargs=None): + auto_maskandscale=False, xarray_kwargs=None, + cache_vars=0): super(NetCDF4FileHandler, self).__init__( filename, filename_info, filetype_info) self.file_content = {} + self.cached_file_content = {} try: file_handle = netCDF4.Dataset(self.filename, 'r') except IOError: @@ -76,6 +83,13 @@ def __init__(self, filename, filename_info, filetype_info, self.collect_metadata("", file_handle) self.collect_dimensions("", file_handle) + if cache_vars > 0: + self.collect_cache_vars( + [varname for (varname, var) + in self.file_content.items() + if isinstance(var, netCDF4.Variable) + and var.size*var.dtype.itemsize Date: Tue, 26 Nov 2019 16:47:36 +0100 Subject: [PATCH 02/19] In FCI reader, use data variable caching In the FCI reader, use the data variable caching implemented in the previous commit. This should address #972. --- satpy/readers/fci_l1c_fdhsi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/satpy/readers/fci_l1c_fdhsi.py b/satpy/readers/fci_l1c_fdhsi.py index 91cd1632c4..724394d7a5 100644 --- a/satpy/readers/fci_l1c_fdhsi.py +++ b/satpy/readers/fci_l1c_fdhsi.py @@ -78,7 +78,10 @@ class using the :mod:`~satpy.Scene.load` method with the reader def __init__(self, filename, filename_info, filetype_info): super(FCIFDHSIFileHandler, self).__init__(filename, filename_info, - filetype_info) + filetype_info, + xarray_kwargs={ + "backend": "h5netcdf"}, + cache_vars=10000) logger.debug('Reading: {}'.format(self.filename)) logger.debug('Start: {}'.format(self.start_time)) logger.debug('End: {}'.format(self.end_time)) From d34af0c68c45d8d77481172df50681dae5d5a740 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Tue, 26 Nov 2019 17:08:41 +0100 Subject: [PATCH 03/19] Don't try to cache strings For strings, I cannot measure their size because their .dtype is a type, not a dtype. Therefore I can't get the itemsize so I don't know how large they will be (they're also variable length). Don't cache those for now, I'm not using them anyway. --- satpy/readers/netcdf_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index fb7a2e8fff..3c7b94324b 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -20,6 +20,7 @@ """ import netCDF4 import logging +import numpy as np import xarray as xr from satpy import CHUNK_SIZE @@ -88,6 +89,7 @@ def __init__(self, filename, filename_info, filetype_info, [varname for (varname, var) in self.file_content.items() if isinstance(var, netCDF4.Variable) + and isinstance(var.dtype. np.dtype) # vlen may be str and var.size*var.dtype.itemsize Date: Tue, 26 Nov 2019 17:12:42 +0100 Subject: [PATCH 04/19] Fix typo in previous commit --- satpy/readers/netcdf_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 3c7b94324b..9e76d383be 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -89,7 +89,7 @@ def __init__(self, filename, filename_info, filetype_info, [varname for (varname, var) in self.file_content.items() if isinstance(var, netCDF4.Variable) - and isinstance(var.dtype. np.dtype) # vlen may be str + and isinstance(var.dtype, np.dtype) # vlen may be str and var.size*var.dtype.itemsize Date: Wed, 27 Nov 2019 09:05:37 +0100 Subject: [PATCH 05/19] Caching bugfix Fix a bug in the small variable caching, where I was overwriting rather than adding a key to the cache dictionary. --- satpy/readers/netcdf_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 9e76d383be..c2ad33d1c1 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -145,7 +145,7 @@ def collect_cache_vars(self, cache_vars, obj): obj (netCDF4.Dataset): Dataset object from which to read them. """ for var_name in cache_vars: - self.cached_file_content = self.file_content[var_name][:] + self.cached_file_content[var_name] = self.file_content[var_name][:] def __getitem__(self, key): val = self.file_content[key] From da1cdf3c099ec475daacb8f0581d26addca84ca2 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 09:08:58 +0100 Subject: [PATCH 06/19] Bugfix in nc utils small var caching Fix a small bug in the ncutils small var caching, wrong variable named. --- satpy/readers/netcdf_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index c2ad33d1c1..3ab8bace43 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -151,7 +151,7 @@ def __getitem__(self, key): val = self.file_content[key] if isinstance(val, netCDF4.Variable): if key in self.cached_file_content: - return self.cached_file_content[var_name] + return self.cached_file_content[key] # these datasets are closed and inaccessible when the file is # closed, need to reopen # TODO: Handle HDF4 versus NetCDF3 versus NetCDF4 From f06a6ab0fffa486eaff5e5b586095f95d2c0a968 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 09:32:14 +0100 Subject: [PATCH 07/19] Make xarray objects when caching Downstream, we need at least the attributes for some of the cached variables. Therefore we do need to make them into xarray dataaarrays again. --- satpy/readers/netcdf_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 3ab8bace43..9ea3587035 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -145,7 +145,9 @@ def collect_cache_vars(self, cache_vars, obj): obj (netCDF4.Dataset): Dataset object from which to read them. """ for var_name in cache_vars: - self.cached_file_content[var_name] = self.file_content[var_name][:] + v = self.file_content[var_name] + self.cached_file_content[var_name] = xarray.DataArray( + v[:], dims=v.dimensions, attrs=v.__dict__, name=v.name) def __getitem__(self, key): val = self.file_content[key] From f3ab50423f4e4960562b26f01104452acdd3ac93 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 09:37:00 +0100 Subject: [PATCH 08/19] bug in small var caching method Fix bug in small var caching method, should be xr not xarray --- satpy/readers/netcdf_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 9ea3587035..a5ae5347ad 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -146,7 +146,7 @@ def collect_cache_vars(self, cache_vars, obj): """ for var_name in cache_vars: v = self.file_content[var_name] - self.cached_file_content[var_name] = xarray.DataArray( + self.cached_file_content[var_name] = xr.DataArray( v[:], dims=v.dimensions, attrs=v.__dict__, name=v.name) def __getitem__(self, key): From bed6967bef93b1287cff30ffdd8dc0201b104ed0 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 14:44:26 +0100 Subject: [PATCH 09/19] Further optional optimisation in nc-utils In netcdf_utils, add an option to avoid the slow xarray.open_dataset completely. Instead, this option allows to keep the fileformat open as long as the filehandler objects is, and create xarray.dataarray objects manually. The coordinates are missing for now. --- satpy/readers/netcdf_utils.py | 64 ++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index a5ae5347ad..51e056141d 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -62,11 +62,19 @@ class NetCDF4FileHandler(BaseFileHandler): you may choose to cache some of them. You can do this by passing a number, any variable smaller than this number in bytes will be read into RAM. Warning, this part of the API is provisional and subject to change. + + You may get an additional speedup by passing ``cache_handle=True``. This + will keep the netCDF4 dataset handles open throughout the lifetime of the + object, and instead of using `xarray.open_dataset` to open every data + variable, a dask array will be created "manually". This may be useful if + you have a dataset distributed over many files, such as for FCI. Note + that the coordinates will be missing in this case. """ + file_handle = None def __init__(self, filename, filename_info, filetype_info, auto_maskandscale=False, xarray_kwargs=None, - cache_vars=0): + cache_vars=0, cache_handle=False): super(NetCDF4FileHandler, self).__init__( filename, filename_info, filetype_info) self.file_content = {} @@ -92,11 +100,21 @@ def __init__(self, filename, filename_info, filetype_info, and isinstance(var.dtype, np.dtype) # vlen may be str and var.size*var.dtype.itemsize Date: Wed, 27 Nov 2019 14:47:07 +0100 Subject: [PATCH 10/19] FCI reader now uses new nc-uitls file handling The FCI reader nowm uses the new option (introduced in the previous commit) to bypass xarray.open_dataset completely, this should further imporve performance. --- satpy/readers/fci_l1c_fdhsi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/satpy/readers/fci_l1c_fdhsi.py b/satpy/readers/fci_l1c_fdhsi.py index 724394d7a5..5d94435a62 100644 --- a/satpy/readers/fci_l1c_fdhsi.py +++ b/satpy/readers/fci_l1c_fdhsi.py @@ -81,7 +81,8 @@ def __init__(self, filename, filename_info, filetype_info): filetype_info, xarray_kwargs={ "backend": "h5netcdf"}, - cache_vars=10000) + cache_vars=10000, + cache_handle=True) logger.debug('Reading: {}'.format(self.filename)) logger.debug('Start: {}'.format(self.start_time)) logger.debug('End: {}'.format(self.end_time)) From f668575e336386d206b28795aa06da7c807b1a2d Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 15:01:50 +0100 Subject: [PATCH 11/19] Bugfix missing return in __getitem__ Fix a bug introduced a couple of commits ago, where a return statement went AWOL for cases where __getitem__ on the NetCDF4FileHandler is retrieving an attribute or shape. --- satpy/readers/netcdf_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 51e056141d..9deef987de 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -181,9 +181,10 @@ def __getitem__(self, key): else: group = None if self.file_handle is not None: - return self._get_var_from_filehandle(group, key) + val = self._get_var_from_filehandle(group, key) else: - return self._get_var_from_xr(group, key) + val = self._get_var_from_xr(group, key) + return val def _get_var_from_xr(self, group, key): with xr.open_dataset(self.filename, group=group, From c3c2c80a756856afaec494d1f9603e1aa7052aaa Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 15:09:45 +0100 Subject: [PATCH 12/19] Bugfix: add missing import in netcdf-utils Fix a bug where an import statement for dask was missing in the netcdf-utils. --- satpy/readers/netcdf_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 9deef987de..0f43438007 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -22,6 +22,7 @@ import logging import numpy as np import xarray as xr +import dask.array as da from satpy import CHUNK_SIZE from satpy.readers.file_handlers import BaseFileHandler From b747f0f741eccceee697e7c0525b61064bfab415 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Wed, 27 Nov 2019 16:09:25 +0100 Subject: [PATCH 13/19] Fix bad return statement The previous commit cannot possibly have been running at all. --- satpy/readers/netcdf_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 0f43438007..64ff2b07ad 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -201,7 +201,7 @@ def _get_var_from_xr(self, group, key): # https://github.com/pydata/xarray/issues/2954#issuecomment-491221266 if not val.chunks: val.load() - return val + return val def _get_var_from_filehandle(self, group, key): g = self.file_handle[group] From 40d3ee39a4c4ba7e75284dd58bfbad3b2aa0ade2 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Thu, 28 Nov 2019 10:21:32 +0100 Subject: [PATCH 14/19] TST: Add test case for nc utils caching Add a test case to cover the newly implemented caching feature in netcdf-utils --- satpy/tests/reader_tests/test_netcdf_utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py index aa16774965..68dc96b51a 100644 --- a/satpy/tests/reader_tests/test_netcdf_utils.py +++ b/satpy/tests/reader_tests/test_netcdf_utils.py @@ -93,6 +93,9 @@ def setUp(self): ds2_i = nc.createVariable('ds2_i', np.int32, dimensions=('rows', 'cols')) ds2_i[:] = np.arange(10 * 100).reshape((10, 100)) + ds2_s = nc.createVariable("ds2_s", np.int8, + dimensions=("rows",)) + ds2_s[:] = np.arange(10) # Add attributes nc.test_attr_str = 'test_string' @@ -138,7 +141,20 @@ def test_all_basic(self): self.assertTrue('ds2_f' in file_handler) self.assertFalse('fake_ds' in file_handler) + self.assertIsNone(file_handler.file_handle) + def test_caching(self): + """Test that caching works as intended. + """ + from satpy.readers.netcdf_utils import NetCDF4FileHandler + h = NetCDF4FileHandler("test.nc", {}, {}, cache_vars=1000, + cache_handle=True) + self.assertIsNotNone(h.file_handle) + self.assertTrue(h.file_handle.isopen()) + + self.assertEqual(sorted(h.cached_file_content.keys()), ["ds2_s"]) + h.__del__() + self.assertFalse(h.file_handle.isopen()) def suite(): """The test suite for test_netcdf_utils.""" From d0cc1f1937ca6aae1073c2119ea8dde8ea88782f Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Thu, 28 Nov 2019 10:26:18 +0100 Subject: [PATCH 15/19] PEP8 fixes in netcdf_utils PEP8/flake8 fixes in netcdf_utils and test_netcdf_utils --- satpy/readers/netcdf_utils.py | 7 ++++--- satpy/tests/reader_tests/test_netcdf_utils.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 64ff2b07ad..20373f548f 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -73,6 +73,7 @@ class NetCDF4FileHandler(BaseFileHandler): """ file_handle = None + def __init__(self, filename, filename_info, filetype_info, auto_maskandscale=False, xarray_kwargs=None, cache_vars=0, cache_handle=False): @@ -98,8 +99,8 @@ def __init__(self, filename, filename_info, filetype_info, [varname for (varname, var) in self.file_content.items() if isinstance(var, netCDF4.Variable) - and isinstance(var.dtype, np.dtype) # vlen may be str - and var.size*var.dtype.itemsize Date: Thu, 28 Nov 2019 11:37:07 +0100 Subject: [PATCH 16/19] TST: Improve test coverage for netcdf-utils Improve test coverage for netcdf_utils. Test coverage for this module is now 100% according to my local pytest. --- satpy/tests/reader_tests/test_netcdf_utils.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py index cb6c0a5a0a..1b5959b432 100644 --- a/satpy/tests/reader_tests/test_netcdf_utils.py +++ b/satpy/tests/reader_tests/test_netcdf_utils.py @@ -96,6 +96,8 @@ def setUp(self): ds2_s = nc.createVariable("ds2_s", np.int8, dimensions=("rows",)) ds2_s[:] = np.arange(10) + ds2_sc = nc.createVariable("ds2_sc", np.int8, dimensions=()) + ds2_sc[:] = 42 # Add attributes nc.test_attr_str = 'test_string' @@ -142,6 +144,7 @@ def test_all_basic(self): self.assertTrue('ds2_f' in file_handler) self.assertFalse('fake_ds' in file_handler) self.assertIsNone(file_handler.file_handle) + self.assertEqual(file_handler["ds2_sc"], 42) def test_caching(self): """Test that caching works as intended. @@ -152,10 +155,22 @@ def test_caching(self): self.assertIsNotNone(h.file_handle) self.assertTrue(h.file_handle.isopen()) - self.assertEqual(sorted(h.cached_file_content.keys()), ["ds2_s"]) + self.assertEqual(sorted(h.cached_file_content.keys()), + ["ds2_s", "ds2_sc"]) + # with caching, these tests access different lines than without + np.testing.assert_array_equal(h["ds2_s"], np.arange(10)) + np.testing.assert_array_equal(h["test_group/ds1_i"], + np.arange(10 * 100).reshape((10, 100))) h.__del__() self.assertFalse(h.file_handle.isopen()) + def test_filenotfound(self): + """Test that error is raised when file not found + """ + from satpy.readers.netcdf_utils import NetCDF4FileHandler + + with self.assertRaises(IOError): + h = NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {}) def suite(): """The test suite for test_netcdf_utils.""" From a379c16b7cc76fbbf97bc545d75091f2e11e638c Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Thu, 28 Nov 2019 11:45:38 +0100 Subject: [PATCH 17/19] PEP8 / flake8 fixes Fix PEP8 / flake8 complaints --- satpy/tests/reader_tests/test_netcdf_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py index 1b5959b432..731ea264f8 100644 --- a/satpy/tests/reader_tests/test_netcdf_utils.py +++ b/satpy/tests/reader_tests/test_netcdf_utils.py @@ -170,7 +170,8 @@ def test_filenotfound(self): from satpy.readers.netcdf_utils import NetCDF4FileHandler with self.assertRaises(IOError): - h = NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {}) + NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {}) + def suite(): """The test suite for test_netcdf_utils.""" From 88d22e6cd2a55889cc3b89588be065f2a62bef56 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Thu, 28 Nov 2019 13:10:01 +0100 Subject: [PATCH 18/19] Cosmetic fixes in netcdf utils caching A few cosmetic changes to the netcdf utils caching. Improve the API documentation, change an argument name to better reflect its role, and point out in additional places that we're not doing coordinates when caching variables. --- satpy/readers/fci_l1c_fdhsi.py | 4 +--- satpy/readers/netcdf_utils.py | 20 +++++++++++++++---- satpy/tests/reader_tests/test_netcdf_utils.py | 2 +- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/satpy/readers/fci_l1c_fdhsi.py b/satpy/readers/fci_l1c_fdhsi.py index 5d94435a62..7d4ed5e634 100644 --- a/satpy/readers/fci_l1c_fdhsi.py +++ b/satpy/readers/fci_l1c_fdhsi.py @@ -79,9 +79,7 @@ class using the :mod:`~satpy.Scene.load` method with the reader def __init__(self, filename, filename_info, filetype_info): super(FCIFDHSIFileHandler, self).__init__(filename, filename_info, filetype_info, - xarray_kwargs={ - "backend": "h5netcdf"}, - cache_vars=10000, + cache_var_size=10000, cache_handle=True) logger.debug('Reading: {}'.format(self.filename)) logger.debug('Start: {}'.format(self.start_time)) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index 20373f548f..7d7b821039 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -69,14 +69,24 @@ class NetCDF4FileHandler(BaseFileHandler): object, and instead of using `xarray.open_dataset` to open every data variable, a dask array will be created "manually". This may be useful if you have a dataset distributed over many files, such as for FCI. Note - that the coordinates will be missing in this case. + that the coordinates will be missing in this case. If you use this option, + ``xarray_kwargs`` will have no effect. + + Args: + filename (str): File to read + filename_info (dict): Dictionary with filename information + filetype_info (dict): Dictionary with filetype information + auto_maskandscale (bool): Apply mask and scale factors + xarray_kwargs (dict): Addition arguments to `xarray.open_dataset` + cache_var_size (int): Cache variables smaller than this size. + cache_handle (bool): Keep files open for lifetime of filehandler. """ file_handle = None def __init__(self, filename, filename_info, filetype_info, auto_maskandscale=False, xarray_kwargs=None, - cache_vars=0, cache_handle=False): + cache_var_size=0, cache_handle=False): super(NetCDF4FileHandler, self).__init__( filename, filename_info, filetype_info) self.file_content = {} @@ -94,13 +104,13 @@ def __init__(self, filename, filename_info, filetype_info, self.collect_metadata("", file_handle) self.collect_dimensions("", file_handle) - if cache_vars > 0: + if cache_var_size > 0: self.collect_cache_vars( [varname for (varname, var) in self.file_content.items() if isinstance(var, netCDF4.Variable) and isinstance(var.dtype, np.dtype) # vlen may be str - and var.size * var.dtype.itemsize < cache_vars], + and var.size * var.dtype.itemsize < cache_var_size], file_handle) if cache_handle: self.file_handle = file_handle @@ -205,6 +215,8 @@ def _get_var_from_xr(self, group, key): return val def _get_var_from_filehandle(self, group, key): + # Not getting coordinates as this is more work, therefore more + # overhead, and those are not used downstream. g = self.file_handle[group] v = g[key] x = xr.DataArray( diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py index 731ea264f8..0204d88dc9 100644 --- a/satpy/tests/reader_tests/test_netcdf_utils.py +++ b/satpy/tests/reader_tests/test_netcdf_utils.py @@ -150,7 +150,7 @@ def test_caching(self): """Test that caching works as intended. """ from satpy.readers.netcdf_utils import NetCDF4FileHandler - h = NetCDF4FileHandler("test.nc", {}, {}, cache_vars=1000, + h = NetCDF4FileHandler("test.nc", {}, {}, cache_var_size=1000, cache_handle=True) self.assertIsNotNone(h.file_handle) self.assertTrue(h.file_handle.isopen()) From 2b75d177fc76b64dcc518d479783993dedc66182 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Mon, 9 Dec 2019 16:11:23 +0100 Subject: [PATCH 19/19] In optimised nc-utils, clarify caching In the docstring for the optimised netcdf_utils, clarify the first reference to caching. --- satpy/readers/netcdf_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py index dcf7b3ebff..6e266b8e85 100644 --- a/satpy/readers/netcdf_utils.py +++ b/satpy/readers/netcdf_utils.py @@ -51,9 +51,9 @@ class NetCDF4FileHandler(BaseFileHandler): wrapper["/attr/platform_short_name"] - Note that loading uncached datasets requires reopening the original - file, but to get just the shape of the dataset append "/shape" - to the item string: + Note that loading datasets requires reopening the original file + (unless those datasets are cached, see below), but to get just the + shape of the dataset append "/shape" to the item string: wrapper["group/subgroup/var_name/shape"]