From 00a92d348e19936d73de21f7b19bd2c0fb8610b9 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Tue, 26 Nov 2019 16:39:39 +0100
Subject: [PATCH 01/19] Try to cache small data variables

In the netcdf utility reader, cache small data variables to prevent
needlessly often opening and closing the data files.
---
 satpy/readers/netcdf_utils.py | 39 ++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 02d00baf5c..fb7a2e8fff 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -51,18 +51,25 @@ class NetCDF4FileHandler(BaseFileHandler):
 
         wrapper["/attr/platform_short_name"]
 
-    Note that loading datasets requires reopening the original file, but to
-    get just the shape of the dataset append "/shape" to the item string:
+    Note that loading uncached datasets requires reopening the original
+    file, but to get just the shape of the dataset append "/shape"
+    to the item string:
 
         wrapper["group/subgroup/var_name/shape"]
 
+    If your file has many small data variables that are frequently accessed,
+    you may choose to cache some of them.  You can do this by passing a number,
+    any variable smaller than this number in bytes will be read into RAM.
+    Warning, this part of the API is provisional and subject to change.
     """
 
     def __init__(self, filename, filename_info, filetype_info,
-                 auto_maskandscale=False, xarray_kwargs=None):
+                 auto_maskandscale=False, xarray_kwargs=None,
+                 cache_vars=0):
         super(NetCDF4FileHandler, self).__init__(
             filename, filename_info, filetype_info)
         self.file_content = {}
+        self.cached_file_content = {}
         try:
             file_handle = netCDF4.Dataset(self.filename, 'r')
         except IOError:
@@ -76,6 +83,13 @@ def __init__(self, filename, filename_info, filetype_info,
 
         self.collect_metadata("", file_handle)
         self.collect_dimensions("", file_handle)
+        if cache_vars > 0:
+            self.collect_cache_vars(
+                    [varname for (varname, var)
+                        in self.file_content.items()
+                        if isinstance(var, netCDF4.Variable)
+                        and var.size*var.dtype.itemsize<cache_vars],
+                    file_handle)
         file_handle.close()
         self._xarray_kwargs = xarray_kwargs or {}
         self._xarray_kwargs.setdefault('chunks', CHUNK_SIZE)
@@ -114,9 +128,28 @@ def collect_dimensions(self, name, obj):
             dim_name = "{}/dimension/{}".format(name, dim_name)
             self.file_content[dim_name] = len(dim_obj)
 
+    def collect_cache_vars(self, cache_vars, obj):
+        """Collect data variables for caching.
+
+        This method will collect some data variables and store them in RAM.
+        This may be useful if some small variables are frequently accessed,
+        to prevent needlessly frequently opening and closing the file, which
+        in case of xarray is associated with some overhead.
+
+        Should be called later than `collect_metadata`.
+
+        Args:
+            cache_vars (List[str]): Names of data variables to be cached.
+            obj (netCDF4.Dataset): Dataset object from which to read them.
+        """
+        for var_name in cache_vars:
+            self.cached_file_content = self.file_content[var_name][:]
+
     def __getitem__(self, key):
         val = self.file_content[key]
         if isinstance(val, netCDF4.Variable):
+            if key in self.cached_file_content:
+                return self.cached_file_content[var_name]
             # these datasets are closed and inaccessible when the file is
             # closed, need to reopen
             # TODO: Handle HDF4 versus NetCDF3 versus NetCDF4

From ab136a59f8a19c2c9bc272a84f29717b0d417c0e Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Tue, 26 Nov 2019 16:47:36 +0100
Subject: [PATCH 02/19] In FCI reader, use data variable caching

In the FCI reader, use the data variable caching implemented in the
previous commit.  This should address #972.
---
 satpy/readers/fci_l1c_fdhsi.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/satpy/readers/fci_l1c_fdhsi.py b/satpy/readers/fci_l1c_fdhsi.py
index 91cd1632c4..724394d7a5 100644
--- a/satpy/readers/fci_l1c_fdhsi.py
+++ b/satpy/readers/fci_l1c_fdhsi.py
@@ -78,7 +78,10 @@ class using the :mod:`~satpy.Scene.load` method with the reader
 
     def __init__(self, filename, filename_info, filetype_info):
         super(FCIFDHSIFileHandler, self).__init__(filename, filename_info,
-                                                  filetype_info)
+                                                  filetype_info,
+                                                  xarray_kwargs={
+                                                      "backend": "h5netcdf"},
+                                                  cache_vars=10000)
         logger.debug('Reading: {}'.format(self.filename))
         logger.debug('Start: {}'.format(self.start_time))
         logger.debug('End: {}'.format(self.end_time))

From d34af0c68c45d8d77481172df50681dae5d5a740 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Tue, 26 Nov 2019 17:08:41 +0100
Subject: [PATCH 03/19] Don't try to cache strings

For strings, I cannot measure their size because their .dtype is a type,
not a dtype.  Therefore I can't get the itemsize so I don't know how
large they will be (they're also variable length).  Don't cache those
for now, I'm not using them anyway.
---
 satpy/readers/netcdf_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index fb7a2e8fff..3c7b94324b 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -20,6 +20,7 @@
 """
 import netCDF4
 import logging
+import numpy as np
 import xarray as xr
 
 from satpy import CHUNK_SIZE
@@ -88,6 +89,7 @@ def __init__(self, filename, filename_info, filetype_info,
                     [varname for (varname, var)
                         in self.file_content.items()
                         if isinstance(var, netCDF4.Variable)
+                        and isinstance(var.dtype. np.dtype) #  vlen may be str
                         and var.size*var.dtype.itemsize<cache_vars],
                     file_handle)
         file_handle.close()

From f90d52588588fa7232813c0734f57398e5bb06d1 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Tue, 26 Nov 2019 17:12:42 +0100
Subject: [PATCH 04/19] Fix typo in previous commit

---
 satpy/readers/netcdf_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 3c7b94324b..9e76d383be 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -89,7 +89,7 @@ def __init__(self, filename, filename_info, filetype_info,
                     [varname for (varname, var)
                         in self.file_content.items()
                         if isinstance(var, netCDF4.Variable)
-                        and isinstance(var.dtype. np.dtype) #  vlen may be str
+                        and isinstance(var.dtype, np.dtype) #  vlen may be str
                         and var.size*var.dtype.itemsize<cache_vars],
                     file_handle)
         file_handle.close()

From f6f9f8005ec9015329d8f5c9245dd2d4e01c3322 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 09:05:37 +0100
Subject: [PATCH 05/19] Caching bugfix

Fix a bug in the small variable caching, where I was overwriting rather
than adding a key to the cache dictionary.
---
 satpy/readers/netcdf_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 9e76d383be..c2ad33d1c1 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -145,7 +145,7 @@ def collect_cache_vars(self, cache_vars, obj):
             obj (netCDF4.Dataset): Dataset object from which to read them.
         """
         for var_name in cache_vars:
-            self.cached_file_content = self.file_content[var_name][:]
+            self.cached_file_content[var_name] = self.file_content[var_name][:]
 
     def __getitem__(self, key):
         val = self.file_content[key]

From da1cdf3c099ec475daacb8f0581d26addca84ca2 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 09:08:58 +0100
Subject: [PATCH 06/19] Bugfix in nc utils small var caching

Fix a small bug in the ncutils small var caching, wrong variable named.
---
 satpy/readers/netcdf_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index c2ad33d1c1..3ab8bace43 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -151,7 +151,7 @@ def __getitem__(self, key):
         val = self.file_content[key]
         if isinstance(val, netCDF4.Variable):
             if key in self.cached_file_content:
-                return self.cached_file_content[var_name]
+                return self.cached_file_content[key]
             # these datasets are closed and inaccessible when the file is
             # closed, need to reopen
             # TODO: Handle HDF4 versus NetCDF3 versus NetCDF4

From f06a6ab0fffa486eaff5e5b586095f95d2c0a968 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 09:32:14 +0100
Subject: [PATCH 07/19] Make xarray objects when caching

Downstream, we need at least the attributes for some of the cached
variables.  Therefore we do need to make them into xarray dataaarrays
again.
---
 satpy/readers/netcdf_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 3ab8bace43..9ea3587035 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -145,7 +145,9 @@ def collect_cache_vars(self, cache_vars, obj):
             obj (netCDF4.Dataset): Dataset object from which to read them.
         """
         for var_name in cache_vars:
-            self.cached_file_content[var_name] = self.file_content[var_name][:]
+            v = self.file_content[var_name]
+            self.cached_file_content[var_name] = xarray.DataArray(
+                    v[:], dims=v.dimensions, attrs=v.__dict__, name=v.name)
 
     def __getitem__(self, key):
         val = self.file_content[key]

From f3ab50423f4e4960562b26f01104452acdd3ac93 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 09:37:00 +0100
Subject: [PATCH 08/19] bug in small var caching method

Fix bug in small var caching method, should be xr not xarray
---
 satpy/readers/netcdf_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 9ea3587035..a5ae5347ad 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -146,7 +146,7 @@ def collect_cache_vars(self, cache_vars, obj):
         """
         for var_name in cache_vars:
             v = self.file_content[var_name]
-            self.cached_file_content[var_name] = xarray.DataArray(
+            self.cached_file_content[var_name] = xr.DataArray(
                     v[:], dims=v.dimensions, attrs=v.__dict__, name=v.name)
 
     def __getitem__(self, key):

From bed6967bef93b1287cff30ffdd8dc0201b104ed0 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 14:44:26 +0100
Subject: [PATCH 09/19] Further optional optimisation in nc-utils

In netcdf_utils, add an option to avoid the slow xarray.open_dataset
completely.  Instead, this option allows to keep the fileformat open as
long as the filehandler objects is, and create xarray.dataarray objects
manually.  The coordinates are missing for now.
---
 satpy/readers/netcdf_utils.py | 64 ++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 16 deletions(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index a5ae5347ad..51e056141d 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -62,11 +62,19 @@ class NetCDF4FileHandler(BaseFileHandler):
     you may choose to cache some of them.  You can do this by passing a number,
     any variable smaller than this number in bytes will be read into RAM.
     Warning, this part of the API is provisional and subject to change.
+
+    You may get an additional speedup by passing ``cache_handle=True``.  This
+    will keep the netCDF4 dataset handles open throughout the lifetime of the
+    object, and instead of using `xarray.open_dataset` to open every data
+    variable, a dask array will be created "manually".  This may be useful if
+    you have a dataset distributed over many files, such as for FCI.  Note
+    that the coordinates will be missing in this case.
     """
 
+    file_handle = None
     def __init__(self, filename, filename_info, filetype_info,
                  auto_maskandscale=False, xarray_kwargs=None,
-                 cache_vars=0):
+                 cache_vars=0, cache_handle=False):
         super(NetCDF4FileHandler, self).__init__(
             filename, filename_info, filetype_info)
         self.file_content = {}
@@ -92,11 +100,21 @@ def __init__(self, filename, filename_info, filetype_info,
                         and isinstance(var.dtype, np.dtype) #  vlen may be str
                         and var.size*var.dtype.itemsize<cache_vars],
                     file_handle)
-        file_handle.close()
+        if cache_handle:
+            self.file_handle = file_handle
+        else:
+            file_handle.close()
         self._xarray_kwargs = xarray_kwargs or {}
         self._xarray_kwargs.setdefault('chunks', CHUNK_SIZE)
         self._xarray_kwargs.setdefault('mask_and_scale', self.auto_maskandscale)
 
+    def __del__(self):
+        if self.file_handle is not None:
+            try:
+                self.file_handle.close()
+            except RuntimeError: # presumably closed already
+                pass
+
     def _collect_attrs(self, name, obj):
         """Collect all the attributes for the provided file object.
         """
@@ -162,20 +180,34 @@ def __getitem__(self, key):
                 group, key = parts
             else:
                 group = None
-            with xr.open_dataset(self.filename, group=group,
-                                 **self._xarray_kwargs) as nc:
-                val = nc[key]
-                # Even though `chunks` is specified in the kwargs, xarray
-                # uses dask.arrays only for data variables that have at least
-                # one dimension; for zero-dimensional data variables (scalar),
-                # it uses its own lazy loading for scalars.  When those are
-                # accessed after file closure, xarray reopens the file without
-                # closing it again.  This will leave potentially many open file
-                # objects (which may in turn trigger a Segmentation Fault:
-                # https://github.com/pydata/xarray/issues/2954#issuecomment-491221266
-                if not val.chunks:
-                    val.load()
-        return val
+            if self.file_handle is not None:
+                return self._get_var_from_filehandle(group, key)
+            else:
+                return self._get_var_from_xr(group, key)
+
+    def _get_var_from_xr(self, group, key):
+        with xr.open_dataset(self.filename, group=group,
+                             **self._xarray_kwargs) as nc:
+            val = nc[key]
+            # Even though `chunks` is specified in the kwargs, xarray
+            # uses dask.arrays only for data variables that have at least
+            # one dimension; for zero-dimensional data variables (scalar),
+            # it uses its own lazy loading for scalars.  When those are
+            # accessed after file closure, xarray reopens the file without
+            # closing it again.  This will leave potentially many open file
+            # objects (which may in turn trigger a Segmentation Fault:
+            # https://github.com/pydata/xarray/issues/2954#issuecomment-491221266
+            if not val.chunks:
+                val.load()
+    return val
+
+    def _get_var_from_filehandle(self, group, key):
+        g = self.file_handle[group]
+        v = g[key]
+        x = xr.DataArray(
+                da.from_array(v), dims=v.dimensions, attrs=v.__dict__,
+                name=v.name)
+        return x
 
     def __contains__(self, item):
         return item in self.file_content

From 7565be33d3f47dd962c5e833131ebcc5212eb318 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 14:47:07 +0100
Subject: [PATCH 10/19] FCI reader now uses new nc-uitls file handling

The FCI reader nowm uses the new option (introduced in the previous
commit) to bypass xarray.open_dataset completely, this should further
imporve performance.
---
 satpy/readers/fci_l1c_fdhsi.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/satpy/readers/fci_l1c_fdhsi.py b/satpy/readers/fci_l1c_fdhsi.py
index 724394d7a5..5d94435a62 100644
--- a/satpy/readers/fci_l1c_fdhsi.py
+++ b/satpy/readers/fci_l1c_fdhsi.py
@@ -81,7 +81,8 @@ def __init__(self, filename, filename_info, filetype_info):
                                                   filetype_info,
                                                   xarray_kwargs={
                                                       "backend": "h5netcdf"},
-                                                  cache_vars=10000)
+                                                  cache_vars=10000,
+                                                  cache_handle=True)
         logger.debug('Reading: {}'.format(self.filename))
         logger.debug('Start: {}'.format(self.start_time))
         logger.debug('End: {}'.format(self.end_time))

From f668575e336386d206b28795aa06da7c807b1a2d Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 15:01:50 +0100
Subject: [PATCH 11/19] Bugfix missing return in __getitem__

Fix a bug introduced a couple of commits ago, where a return statement
went AWOL for cases where __getitem__ on the NetCDF4FileHandler is
retrieving an attribute or shape.
---
 satpy/readers/netcdf_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 51e056141d..9deef987de 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -181,9 +181,10 @@ def __getitem__(self, key):
             else:
                 group = None
             if self.file_handle is not None:
-                return self._get_var_from_filehandle(group, key)
+                val = self._get_var_from_filehandle(group, key)
             else:
-                return self._get_var_from_xr(group, key)
+                val = self._get_var_from_xr(group, key)
+        return val
 
     def _get_var_from_xr(self, group, key):
         with xr.open_dataset(self.filename, group=group,

From c3c2c80a756856afaec494d1f9603e1aa7052aaa Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 15:09:45 +0100
Subject: [PATCH 12/19] Bugfix: add missing import in netcdf-utils

Fix a bug where an import statement for dask was missing in the
netcdf-utils.
---
 satpy/readers/netcdf_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 9deef987de..0f43438007 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -22,6 +22,7 @@
 import logging
 import numpy as np
 import xarray as xr
+import dask.array as da
 
 from satpy import CHUNK_SIZE
 from satpy.readers.file_handlers import BaseFileHandler

From b747f0f741eccceee697e7c0525b61064bfab415 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Wed, 27 Nov 2019 16:09:25 +0100
Subject: [PATCH 13/19] Fix bad return statement

The previous commit cannot possibly have been running at all.
---
 satpy/readers/netcdf_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 0f43438007..64ff2b07ad 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -201,7 +201,7 @@ def _get_var_from_xr(self, group, key):
             # https://github.com/pydata/xarray/issues/2954#issuecomment-491221266
             if not val.chunks:
                 val.load()
-    return val
+        return val
 
     def _get_var_from_filehandle(self, group, key):
         g = self.file_handle[group]

From 40d3ee39a4c4ba7e75284dd58bfbad3b2aa0ade2 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Thu, 28 Nov 2019 10:21:32 +0100
Subject: [PATCH 14/19] TST: Add test case for nc utils caching

Add a test case to cover the newly implemented caching feature in
netcdf-utils
---
 satpy/tests/reader_tests/test_netcdf_utils.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py
index aa16774965..68dc96b51a 100644
--- a/satpy/tests/reader_tests/test_netcdf_utils.py
+++ b/satpy/tests/reader_tests/test_netcdf_utils.py
@@ -93,6 +93,9 @@ def setUp(self):
             ds2_i = nc.createVariable('ds2_i', np.int32,
                                       dimensions=('rows', 'cols'))
             ds2_i[:] = np.arange(10 * 100).reshape((10, 100))
+            ds2_s = nc.createVariable("ds2_s", np.int8,
+                                      dimensions=("rows",))
+            ds2_s[:] = np.arange(10)
 
             # Add attributes
             nc.test_attr_str = 'test_string'
@@ -138,7 +141,20 @@ def test_all_basic(self):
 
         self.assertTrue('ds2_f' in file_handler)
         self.assertFalse('fake_ds' in file_handler)
+        self.assertIsNone(file_handler.file_handle)
 
+    def test_caching(self):
+        """Test that caching works as intended.
+        """
+        from satpy.readers.netcdf_utils import NetCDF4FileHandler
+        h = NetCDF4FileHandler("test.nc", {}, {}, cache_vars=1000,
+                cache_handle=True)
+        self.assertIsNotNone(h.file_handle)
+        self.assertTrue(h.file_handle.isopen())
+
+        self.assertEqual(sorted(h.cached_file_content.keys()), ["ds2_s"])
+        h.__del__()
+        self.assertFalse(h.file_handle.isopen())
 
 def suite():
     """The test suite for test_netcdf_utils."""

From d0cc1f1937ca6aae1073c2119ea8dde8ea88782f Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Thu, 28 Nov 2019 10:26:18 +0100
Subject: [PATCH 15/19] PEP8 fixes in netcdf_utils

PEP8/flake8 fixes in netcdf_utils and test_netcdf_utils
---
 satpy/readers/netcdf_utils.py                 | 7 ++++---
 satpy/tests/reader_tests/test_netcdf_utils.py | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 64ff2b07ad..20373f548f 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -73,6 +73,7 @@ class NetCDF4FileHandler(BaseFileHandler):
     """
 
     file_handle = None
+
     def __init__(self, filename, filename_info, filetype_info,
                  auto_maskandscale=False, xarray_kwargs=None,
                  cache_vars=0, cache_handle=False):
@@ -98,8 +99,8 @@ def __init__(self, filename, filename_info, filetype_info,
                     [varname for (varname, var)
                         in self.file_content.items()
                         if isinstance(var, netCDF4.Variable)
-                        and isinstance(var.dtype, np.dtype) #  vlen may be str
-                        and var.size*var.dtype.itemsize<cache_vars],
+                        and isinstance(var.dtype, np.dtype)  # vlen may be str
+                        and var.size * var.dtype.itemsize < cache_vars],
                     file_handle)
         if cache_handle:
             self.file_handle = file_handle
@@ -113,7 +114,7 @@ def __del__(self):
         if self.file_handle is not None:
             try:
                 self.file_handle.close()
-            except RuntimeError: # presumably closed already
+            except RuntimeError:  # presumably closed already
                 pass
 
     def _collect_attrs(self, name, obj):
diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py
index 68dc96b51a..cb6c0a5a0a 100644
--- a/satpy/tests/reader_tests/test_netcdf_utils.py
+++ b/satpy/tests/reader_tests/test_netcdf_utils.py
@@ -148,7 +148,7 @@ def test_caching(self):
         """
         from satpy.readers.netcdf_utils import NetCDF4FileHandler
         h = NetCDF4FileHandler("test.nc", {}, {}, cache_vars=1000,
-                cache_handle=True)
+                               cache_handle=True)
         self.assertIsNotNone(h.file_handle)
         self.assertTrue(h.file_handle.isopen())
 
@@ -156,6 +156,7 @@ def test_caching(self):
         h.__del__()
         self.assertFalse(h.file_handle.isopen())
 
+
 def suite():
     """The test suite for test_netcdf_utils."""
     loader = unittest.TestLoader()

From 8f2442daaec54eeed3e6f6071a27a3fd9f821cc5 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Thu, 28 Nov 2019 11:37:07 +0100
Subject: [PATCH 16/19] TST: Improve test coverage for netcdf-utils

Improve test coverage for netcdf_utils.  Test coverage for this module
is now 100% according to my local pytest.
---
 satpy/tests/reader_tests/test_netcdf_utils.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py
index cb6c0a5a0a..1b5959b432 100644
--- a/satpy/tests/reader_tests/test_netcdf_utils.py
+++ b/satpy/tests/reader_tests/test_netcdf_utils.py
@@ -96,6 +96,8 @@ def setUp(self):
             ds2_s = nc.createVariable("ds2_s", np.int8,
                                       dimensions=("rows",))
             ds2_s[:] = np.arange(10)
+            ds2_sc = nc.createVariable("ds2_sc", np.int8, dimensions=())
+            ds2_sc[:] = 42
 
             # Add attributes
             nc.test_attr_str = 'test_string'
@@ -142,6 +144,7 @@ def test_all_basic(self):
         self.assertTrue('ds2_f' in file_handler)
         self.assertFalse('fake_ds' in file_handler)
         self.assertIsNone(file_handler.file_handle)
+        self.assertEqual(file_handler["ds2_sc"], 42)
 
     def test_caching(self):
         """Test that caching works as intended.
@@ -152,10 +155,22 @@ def test_caching(self):
         self.assertIsNotNone(h.file_handle)
         self.assertTrue(h.file_handle.isopen())
 
-        self.assertEqual(sorted(h.cached_file_content.keys()), ["ds2_s"])
+        self.assertEqual(sorted(h.cached_file_content.keys()),
+                         ["ds2_s", "ds2_sc"])
+        # with caching, these tests access different lines than without
+        np.testing.assert_array_equal(h["ds2_s"], np.arange(10))
+        np.testing.assert_array_equal(h["test_group/ds1_i"],
+                                      np.arange(10 * 100).reshape((10, 100)))
         h.__del__()
         self.assertFalse(h.file_handle.isopen())
 
+    def test_filenotfound(self):
+        """Test that error is raised when file not found
+        """
+        from satpy.readers.netcdf_utils import NetCDF4FileHandler
+
+        with self.assertRaises(IOError):
+            h = NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {})
 
 def suite():
     """The test suite for test_netcdf_utils."""

From a379c16b7cc76fbbf97bc545d75091f2e11e638c Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Thu, 28 Nov 2019 11:45:38 +0100
Subject: [PATCH 17/19] PEP8 / flake8 fixes

Fix PEP8 / flake8 complaints
---
 satpy/tests/reader_tests/test_netcdf_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py
index 1b5959b432..731ea264f8 100644
--- a/satpy/tests/reader_tests/test_netcdf_utils.py
+++ b/satpy/tests/reader_tests/test_netcdf_utils.py
@@ -170,7 +170,8 @@ def test_filenotfound(self):
         from satpy.readers.netcdf_utils import NetCDF4FileHandler
 
         with self.assertRaises(IOError):
-            h = NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {})
+            NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {})
+
 
 def suite():
     """The test suite for test_netcdf_utils."""

From 88d22e6cd2a55889cc3b89588be065f2a62bef56 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Thu, 28 Nov 2019 13:10:01 +0100
Subject: [PATCH 18/19] Cosmetic fixes in netcdf utils caching

A few cosmetic changes to the netcdf utils caching.  Improve the API
documentation, change an argument name to better reflect its role, and
point out in additional places that we're not doing coordinates when
caching variables.
---
 satpy/readers/fci_l1c_fdhsi.py                |  4 +---
 satpy/readers/netcdf_utils.py                 | 20 +++++++++++++++----
 satpy/tests/reader_tests/test_netcdf_utils.py |  2 +-
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/satpy/readers/fci_l1c_fdhsi.py b/satpy/readers/fci_l1c_fdhsi.py
index 5d94435a62..7d4ed5e634 100644
--- a/satpy/readers/fci_l1c_fdhsi.py
+++ b/satpy/readers/fci_l1c_fdhsi.py
@@ -79,9 +79,7 @@ class using the :mod:`~satpy.Scene.load` method with the reader
     def __init__(self, filename, filename_info, filetype_info):
         super(FCIFDHSIFileHandler, self).__init__(filename, filename_info,
                                                   filetype_info,
-                                                  xarray_kwargs={
-                                                      "backend": "h5netcdf"},
-                                                  cache_vars=10000,
+                                                  cache_var_size=10000,
                                                   cache_handle=True)
         logger.debug('Reading: {}'.format(self.filename))
         logger.debug('Start: {}'.format(self.start_time))
diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index 20373f548f..7d7b821039 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -69,14 +69,24 @@ class NetCDF4FileHandler(BaseFileHandler):
     object, and instead of using `xarray.open_dataset` to open every data
     variable, a dask array will be created "manually".  This may be useful if
     you have a dataset distributed over many files, such as for FCI.  Note
-    that the coordinates will be missing in this case.
+    that the coordinates will be missing in this case.  If you use this option,
+    ``xarray_kwargs`` will have no effect.
+
+    Args:
+        filename (str): File to read
+        filename_info (dict): Dictionary with filename information
+        filetype_info (dict): Dictionary with filetype information
+        auto_maskandscale (bool): Apply mask and scale factors
+        xarray_kwargs (dict): Addition arguments to `xarray.open_dataset`
+        cache_var_size (int): Cache variables smaller than this size.
+        cache_handle (bool): Keep files open for lifetime of filehandler.
     """
 
     file_handle = None
 
     def __init__(self, filename, filename_info, filetype_info,
                  auto_maskandscale=False, xarray_kwargs=None,
-                 cache_vars=0, cache_handle=False):
+                 cache_var_size=0, cache_handle=False):
         super(NetCDF4FileHandler, self).__init__(
             filename, filename_info, filetype_info)
         self.file_content = {}
@@ -94,13 +104,13 @@ def __init__(self, filename, filename_info, filetype_info,
 
         self.collect_metadata("", file_handle)
         self.collect_dimensions("", file_handle)
-        if cache_vars > 0:
+        if cache_var_size > 0:
             self.collect_cache_vars(
                     [varname for (varname, var)
                         in self.file_content.items()
                         if isinstance(var, netCDF4.Variable)
                         and isinstance(var.dtype, np.dtype)  # vlen may be str
-                        and var.size * var.dtype.itemsize < cache_vars],
+                        and var.size * var.dtype.itemsize < cache_var_size],
                     file_handle)
         if cache_handle:
             self.file_handle = file_handle
@@ -205,6 +215,8 @@ def _get_var_from_xr(self, group, key):
         return val
 
     def _get_var_from_filehandle(self, group, key):
+        # Not getting coordinates as this is more work, therefore more
+        # overhead, and those are not used downstream.
         g = self.file_handle[group]
         v = g[key]
         x = xr.DataArray(
diff --git a/satpy/tests/reader_tests/test_netcdf_utils.py b/satpy/tests/reader_tests/test_netcdf_utils.py
index 731ea264f8..0204d88dc9 100644
--- a/satpy/tests/reader_tests/test_netcdf_utils.py
+++ b/satpy/tests/reader_tests/test_netcdf_utils.py
@@ -150,7 +150,7 @@ def test_caching(self):
         """Test that caching works as intended.
         """
         from satpy.readers.netcdf_utils import NetCDF4FileHandler
-        h = NetCDF4FileHandler("test.nc", {}, {}, cache_vars=1000,
+        h = NetCDF4FileHandler("test.nc", {}, {}, cache_var_size=1000,
                                cache_handle=True)
         self.assertIsNotNone(h.file_handle)
         self.assertTrue(h.file_handle.isopen())

From 2b75d177fc76b64dcc518d479783993dedc66182 Mon Sep 17 00:00:00 2001
From: Gerrit Holl <gerrit.holl@dwd.de>
Date: Mon, 9 Dec 2019 16:11:23 +0100
Subject: [PATCH 19/19] In optimised nc-utils, clarify caching

In the docstring for the optimised netcdf_utils, clarify the first
reference to caching.
---
 satpy/readers/netcdf_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/satpy/readers/netcdf_utils.py b/satpy/readers/netcdf_utils.py
index dcf7b3ebff..6e266b8e85 100644
--- a/satpy/readers/netcdf_utils.py
+++ b/satpy/readers/netcdf_utils.py
@@ -51,9 +51,9 @@ class NetCDF4FileHandler(BaseFileHandler):
 
         wrapper["/attr/platform_short_name"]
 
-    Note that loading uncached datasets requires reopening the original
-    file, but to get just the shape of the dataset append "/shape"
-    to the item string:
+    Note that loading datasets requires reopening the original file
+    (unless those datasets are cached, see below), but to get just the
+    shape of the dataset append "/shape" to the item string:
 
         wrapper["group/subgroup/var_name/shape"]