Converted assert of phys_min and phys_max into warnings (#266)

* Update highlevel.py Converted assert's into warnings. Reason: in some cases, storing an edf file fails because of roundoff errors: e.g. `phys_min -3565.82 vs. signal_min 3565.8200000000000006` will fail in write_edf (but shouldn't) * Removed references to distutils. Distutils is deprecated as mentioned in #267 * Updated tests for warnings in case phys_min > sig.min() * Added threshold for warning in case of small differences between sig_min and physical_min Updated tests as well. * Fix unit test.
holgern · Jan 22, 2025 · 9f8dde4 · 9f8dde4
1 parent b5df218
commit 9f8dde4
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 13 deletions.
diff --git a/pyedflib/highlevel.py b/pyedflib/highlevel.py
@@ -517,13 +517,23 @@ def write_edf(
             'for channel {}'.format(dmax, sig.max(), label)
             assert pmin != pmax, \
             f'physical_min {pmin} should be different from physical_max {pmax}'
-        else: # only warning, as this will not lead to clipping
-            assert pmin<=sig.min(), \
-            'phys_min is {}, but signal_min is {} ' \
-            'for channel {}'.format(pmin, sig.min(), label)
-            assert pmax>=sig.max(), \
-            'phys_max is {}, but signal_max is {} ' \
-            'for channel {}'.format(pmax, sig.max(), label)
+        else: # only warning if difference is larger than the rounding error (which is quite large as edf scales data between phys_min and phys_max using -dig_min and +dig_max)
+            edf_accuracy = min([sig.max()/dmax, sig.min()/dmin])
+            if abs(pmin - sig.min()) < edf_accuracy:
+                warnings.warn(f'phys_min is {pmin}, but signal_min is {sig.min()} ' \
+                'for channel {label}', category=UserWarning)
+            else: # difference is > edf_accuracy
+                assert pmin<=sig.min(), \
+                'phys_min is {}, but signal_min is {} ' \
+                'for channel {}'.format(pmin, sig.min(), label)
+            if abs(sig.max() - pmax) < edf_accuracy:
+                warnings.warn(f'phys_max is {pmax}, but signal_max is {sig.max()} ' \
+                'for channel {label}', category=UserWarning)
+            else:
+                assert pmax>=sig.max(), \
+                'phys_max is {}, but signal_max is {} ' \
+                'for channel {}'.format(pmax, sig.max(), label)
+
 
     # get annotations, in format [[timepoint, duration, description], [...]]
     annotations = header.get('annotations', [])

diff --git a/pyedflib/tests/test_highlevel.py b/pyedflib/tests/test_highlevel.py
@@ -215,20 +215,36 @@ def test_read_write_diff_sfreq(self):
     def test_assertion_dmindmax(self):
 
         # test digital and dmin wrong
-        signals =[np.random.randint(-2048, 2048, 256*60).astype(np.int32)]
+        signals =[np.random.randint(-2048, 2048, 256*60).astype(np.int16)]
         sheaders = [highlevel.make_signal_header('ch1', sample_frequency=256)]
         sheaders[0]['digital_min'] = -128
         sheaders[0]['digital_max'] = 128
         with self.assertRaises(AssertionError):
             highlevel.write_edf(self.edfplus_data_file, signals, sheaders, digital=True)
 
-        # test pmin wrong
-        signals = [np.random.randint(-2048, 2048, 256*60)]
+        # test physical min and max wrong
+        signals = [np.random.randint(-2048, 2048, 256*60).astype(np.int16)]
         sheaders = [highlevel.make_signal_header('ch1', sample_frequency=256)]
         sheaders[0]['physical_min'] = -200
         sheaders[0]['physical_max'] = 200
+
+        # a large difference between phys min and values should result in error
         with self.assertRaises(AssertionError):
             highlevel.write_edf(self.edfplus_data_file, signals, sheaders, digital=False)
+
+        # A small roundoff difference between phys min and values should result in warning
+        # edf_accuracy is calculated as: max_signals / digital_max or min_signals / digital_min
+        #   (whichever is smallest). digital_max = 2^16 / 2
+        edf_accuracy = min([max(signals[0])/sheaders[0]['digital_max'], min(signals[0])/sheaders[0]['digital_min']]).astype(np.float16)
+
+        sheaders[0]['physical_min'] = min(signals[0]) + 0.999 * edf_accuracy
+        sheaders[0]['physical_max'] = max(signals[0]) - 0.999 * edf_accuracy
+
+        with self.assertWarnsRegex(expected_warning=UserWarning, expected_regex="phys_min is.*"):
+            highlevel.write_edf(self.edfplus_data_file, signals, sheaders, digital=False)
+
+        # It would be nice to doublecheck the written data in the files here.
+        # However, the (rather inaccurate) data rescaling of EDF files makes this tricky.
 
 
     def test_read_write_accented(self):

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 import os
 import subprocess
 import sys
-from distutils.sysconfig import get_python_inc
+import sysconfig
 from functools import partial
 
 import setuptools
@@ -219,7 +219,7 @@ def write_version_py(filename='pyedflib/version.py'):
 # C files must be built once only for coverage to work
 c_lib = ('c_edf',{'sources': sources,
                  'depends': headers,
-                 'include_dirs': [make_ext_path("c"), get_python_inc()],
+                 'include_dirs': [make_ext_path("c"), sysconfig.get_path('include')],
                  'macros': c_macros,})
 
 ext_modules = [
@@ -270,7 +270,7 @@ def install_for_development(self):
             setuptools.bootstrap_install_from = None
 
         # create an .egg-link in the installation dir, pointing to our egg
-        from distutils import log
+        from setuptools import log
         log.info("Creating %s (link to %s)", self.egg_link, self.egg_base)
         if not self.dry_run:
             with open(self.egg_link, "w") as f: