Change order of series vs. data check (#167)

* Change order of series vs. data check (fixes #166) Co-authored-by: Oliver Beckstein <orbeckst@gmail.com> Co-authored-by: Zhiyi Wu <zhiyi.wu@bioch.ox.ac.uk>
alchemistry · Sep 23, 2021 · 093ad87 · 093ad87
1 parent 8c24a3a
commit 093ad87
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 4 deletions.
diff --git a/AUTHORS b/AUTHORS
@@ -34,5 +34,9 @@ Chronological list of authors
   - Victoria Lim (@vlim)
   - Hyungro Lee (@lee212)
   - Mohammad S. Barhaghi (@msoroush)
+
 2020
-  - Zhiyi Wu (@xiki-tempula)
+  - Zhiyi Wu (@xiki-tempula)
+
+2021
+  - Alexander Schlaich (@schlaicha)
diff --git a/CHANGES b/CHANGES
@@ -13,7 +13,7 @@ The rules for this file:
   * release numbers follow "Semantic Versioning" https://semver.org
 
 ------------------------------------------------------------------------------
-??/??/2021 
+??/??/2021 schlaicha
 
   * 0.6.0
 
@@ -22,6 +22,7 @@ Changes
 Enhancements
 
 Fixes
+  - Subsampling now works with bounds and step (PR #167, issue #166).
 
 
 09/17/2021 xiki-tempula, orbeckst

diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py
@@ -170,11 +170,12 @@ def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None,
                            "values are sorted by time, increasing.")
 
     if series is not None:
-        series = slicing(series, lower=lower, upper=upper, step=step)
-
+
         if (len(series) != len(df) or
             not all(series.reset_index()['time'] == df.reset_index()['time'])):
             raise ValueError("series and data must be sampled at the same times")
+
+        series = slicing(series, lower=lower, upper=upper, step=step)
 
         # calculate statistical inefficiency of series (could use fft=True but needs test)
         statinef  = statisticalInefficiency(series, fast=False)

diff --git a/src/alchemlyb/tests/test_preprocessing.py b/src/alchemlyb/tests/test_preprocessing.py
@@ -75,6 +75,16 @@ def test_duplicated_exception(self, data):
         with pytest.raises(KeyError):
             self.slicer(data.sort_index(0), lower=200)
 
+    def test_subsample_bounds_and_step(self, gmx_ABFE):
+        """Make sure that slicing the series also works
+        """
+        subsample = statistical_inefficiency(gmx_ABFE,
+                                             gmx_ABFE.sum(axis=1),
+                                             lower=100,
+                                             upper=400,
+                                             step=2)
+        assert len(subsample) == 76
+
     def test_multiindex_duplicated(self, gmx_ABFE):
         subsample = statistical_inefficiency(gmx_ABFE,
                                              gmx_ABFE.sum(axis=1))