Merge branch 'main' into change_roiresponseseries_schema_from_array_t…

…o_object_type
catalystneuro · Nov 11, 2023 · cf84ab7 · cf84ab7
2 parents b3f8fc6 + 1bb6e14
commit cf84ab7
Show file tree

Hide file tree

Showing 10 changed files with 60 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,9 @@
 * Changed the metadata schema for `Fluorescence` and `DfOverF` where the traces metadata can be provided as a dict instead of a list of dicts.
   The name of the plane segmentation is used to determine which traces to add to the `Fluorescence` and `DfOverF` containers. [PR #632](https://github.com/catalystneuro/neuroconv/pull/632)
 
-
+### Fixes
+* Fixed GenericDataChunkIterator (in hdmf.py) in the case where the number of dimensions is 1 and the size in bytes is greater than the threshold of 1 GB. [PR #638](https://github.com/catalystneuro/neuroconv/pull/638)
+* Changed `np.floor` and `np.prod` usage to `math.floor` and `math.prod` in various files. [PR #638](https://github.com/catalystneuro/neuroconv/pull/638)
 
 # v0.4.5
 

diff --git a/src/neuroconv/datainterfaces/behavior/video/video_utils.py b/src/neuroconv/datainterfaces/behavior/video/video_utils.py
@@ -1,3 +1,4 @@
+import math
 from typing import Optional, Tuple
 
 import numpy as np
@@ -206,13 +207,13 @@ def _fit_frames_to_size(self, size_mb):
     @staticmethod
     def _scale_shape_to_size(size_mb, shape, size, max_shape):
         """Given the shape and size of array, return shape that will fit size_mb."""
-        k = np.floor((size_mb / size) ** (1 / len(shape)))
+        k = math.floor((size_mb / size) ** (1 / len(shape)))
         return tuple([min(max(int(x), shape[j]), max_shape[j]) for j, x in enumerate(k * np.array(shape))])
 
     def _get_frame_details(self):
         """Get frame shape and size in MB"""
         frame_shape = (1, *self.video_capture_ob.get_frame_shape())
-        min_frame_size_mb = (np.prod(frame_shape) * self._get_dtype().itemsize) / 1e6
+        min_frame_size_mb = (math.prod(frame_shape) * self._get_dtype().itemsize) / 1e6
         return min_frame_size_mb, frame_shape
 
     def _get_data(self, selection: Tuple[slice]) -> np.ndarray:

diff --git a/src/neuroconv/tools/hdmf.py b/src/neuroconv/tools/hdmf.py
@@ -1,4 +1,5 @@
 """Collection of modifications of HDMF functions that are to be tested/used on this repo until propagation upstream."""
+import math
 from typing import Tuple
 
 import numpy as np
@@ -8,7 +9,7 @@
 class GenericDataChunkIterator(HDMFGenericDataChunkIterator):
     def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
         num_axes = len(self.maxshape)
-        chunk_bytes = np.prod(self.chunk_shape) * self.dtype.itemsize
+        chunk_bytes = math.prod(self.chunk_shape) * self.dtype.itemsize
         assert buffer_gb > 0, f"buffer_gb ({buffer_gb}) must be greater than zero!"
         assert (
             buffer_gb >= chunk_bytes / 1e9
@@ -20,31 +21,43 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
         maxshape = np.array(self.maxshape)
 
         # Early termination condition
-        if np.prod(maxshape) * self.dtype.itemsize / 1e9 < buffer_gb:
+        if math.prod(maxshape) * self.dtype.itemsize / 1e9 < buffer_gb:
             return tuple(self.maxshape)
 
         buffer_bytes = chunk_bytes
         axis_sizes_bytes = maxshape * self.dtype.itemsize
-        smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape)
         target_buffer_bytes = buffer_gb * 1e9
-
-        # If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes
-        sub_square_buffer_shape = np.array(self.chunk_shape)
-        if min(axis_sizes_bytes) > target_buffer_bytes:
-            k1 = np.floor((target_buffer_bytes / chunk_bytes) ** 0.5)
-            for axis in [smallest_chunk_axis, second_smallest_chunk_axis]:
-                sub_square_buffer_shape[axis] = k1 * sub_square_buffer_shape[axis]
-            return tuple(sub_square_buffer_shape)
+        if num_axes > 1:
+            smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape)
+            # If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes
+            sub_square_buffer_shape = np.array(self.chunk_shape)
+            if min(axis_sizes_bytes) > target_buffer_bytes:
+                k1 = math.floor((target_buffer_bytes / chunk_bytes) ** 0.5)
+                for axis in [smallest_chunk_axis, second_smallest_chunk_axis]:
+                    sub_square_buffer_shape[axis] = k1 * sub_square_buffer_shape[axis]
+                return tuple(sub_square_buffer_shape)
+        elif num_axes == 1:
+            smallest_chunk_axis = 0
+            # Handle the case where the single axis is too large to fit in the buffer
+            if axis_sizes_bytes[0] > target_buffer_bytes:
+                k1 = math.floor(target_buffer_bytes / chunk_bytes)
+                return tuple(
+                    [
+                        k1 * self.chunk_shape[0],
+                    ]
+                )
+        else:
+            raise ValueError(f"num_axes ({num_axes}) is less than one!")
 
         # Original one-shot estimation has good performance for certain shapes
         chunk_to_buffer_ratio = buffer_gb * 1e9 / chunk_bytes
-        chunk_scaling_factor = np.floor(chunk_to_buffer_ratio ** (1 / num_axes))
+        chunk_scaling_factor = math.floor(chunk_to_buffer_ratio ** (1 / num_axes))
         unpadded_buffer_shape = [
             np.clip(a=int(x), a_min=self.chunk_shape[j], a_max=self.maxshape[j])
             for j, x in enumerate(chunk_scaling_factor * np.array(self.chunk_shape))
         ]
 
-        unpadded_buffer_bytes = np.prod(unpadded_buffer_shape) * self.dtype.itemsize
+        unpadded_buffer_bytes = math.prod(unpadded_buffer_shape) * self.dtype.itemsize
 
         # Method that starts by filling the smallest axis completely or calculates best partial fill
         padded_buffer_shape = np.array(self.chunk_shape)
@@ -62,10 +75,10 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
                 buffer_bytes *= chunks_on_axis
                 padded_buffer_shape[axis] = self.maxshape[axis]
             else:  # Found an axis that is too large to use with the rest of the buffer; calculate how much can be used
-                k3 = np.floor(target_buffer_bytes / buffer_bytes)
+                k3 = math.floor(target_buffer_bytes / buffer_bytes)
                 padded_buffer_shape[axis] *= k3
                 break
-        padded_buffer_bytes = np.prod(padded_buffer_shape) * self.dtype.itemsize
+        padded_buffer_bytes = math.prod(padded_buffer_shape) * self.dtype.itemsize
 
         if padded_buffer_bytes >= unpadded_buffer_bytes:
             return tuple(padded_buffer_shape)

diff --git a/src/neuroconv/tools/roiextractors/imagingextractordatachunkiterator.py b/src/neuroconv/tools/roiextractors/imagingextractordatachunkiterator.py
@@ -1,4 +1,5 @@
 """General purpose iterator for all ImagingExtractor data."""
+import math
 from typing import Optional, Tuple
 
 import numpy as np
@@ -88,7 +89,7 @@ def _get_scaled_buffer_shape(self, buffer_gb: float, chunk_shape: tuple) -> tupl
 
         image_size = self._get_maxshape()[1:]
         min_buffer_shape = tuple([chunk_shape[0]]) + image_size
-        scaling_factor = np.floor((buffer_gb * 1e9 / (np.prod(min_buffer_shape) * self._get_dtype().itemsize)))
+        scaling_factor = math.floor((buffer_gb * 1e9 / (math.prod(min_buffer_shape) * self._get_dtype().itemsize)))
         max_buffer_shape = tuple([int(scaling_factor * min_buffer_shape[0])]) + image_size
         scaled_buffer_shape = tuple(
             [

diff --git a/src/neuroconv/tools/roiextractors/roiextractors.py b/src/neuroconv/tools/roiextractors/roiextractors.py
@@ -1,3 +1,4 @@
+import math
 from collections import defaultdict
 from copy import deepcopy
 from typing import Literal, Optional
@@ -467,7 +468,7 @@ def check_if_imaging_fits_into_memory(imaging: ImagingExtractor) -> None:
     image_size = imaging.get_image_size()
     num_frames = imaging.get_num_frames()
 
-    traces_size_in_bytes = num_frames * np.prod(image_size) * element_size_in_bytes
+    traces_size_in_bytes = num_frames * math.prod(image_size) * element_size_in_bytes
     available_memory_in_bytes = psutil.virtual_memory().available
 
     if traces_size_in_bytes > available_memory_in_bytes:

diff --git a/src/neuroconv/tools/testing/mock_ttl_signals.py b/src/neuroconv/tools/testing/mock_ttl_signals.py
@@ -1,3 +1,4 @@
+import math
 from pathlib import Path
 from typing import Optional, Union
 
@@ -84,7 +85,7 @@ def generate_mock_ttl_signal(
 
     if np.issubdtype(dtype, np.unsignedinteger):
         # If data type is an unsigned integer, increment the signed default values by the midpoint of the unsigned range
-        shift = np.floor(np.iinfo(dtype).max / 2).astype(int)
+        shift = math.floor(np.iinfo(dtype).max / 2)
         baseline_mean_int16_default += shift
         signal_mean_int16_default += shift
 
@@ -271,7 +272,7 @@ def regenerate_test_cases(folder_path: FolderPathType, regenerate_reference_imag
         if regenerate_reference_images:
             fig.add_trace(
                 go.Scatter(y=time_series_data, text=time_series_name),
-                row=np.floor(plot_index / num_cols).astype(int) + 1,
+                row=math.floor(plot_index / num_cols) + 1,
                 col=int(plot_index % num_cols) + 1,
             )
             plot_index += 1

diff --git a/tests/test_behavior/test_audio_interface.py b/tests/test_behavior/test_audio_interface.py
@@ -42,7 +42,7 @@ class TestAudioInterface(AudioInterfaceTestMixin, TestCase):
     @classmethod
     def setUpClass(cls):
         cls.session_start_time = datetime.now(tz=gettz(name="US/Pacific"))
-        cls.num_frames = 10000
+        cls.num_frames = int(1e7)
         cls.num_audio_files = 3
         cls.sampling_rate = 500
         cls.aligned_segment_starting_times = [0.0, 20.0, 40.0]
@@ -199,7 +199,17 @@ def test_run_conversion(self):
         audio_test_data = [read(filename=file_path, mmap=True)[1] for file_path in file_paths]
 
         nwbfile_path = str(self.test_dir / "audio_test_data.nwb")
-        self.nwb_converter.run_conversion(nwbfile_path=nwbfile_path, metadata=self.metadata)
+        self.nwb_converter.run_conversion(
+            nwbfile_path=nwbfile_path,
+            metadata=self.metadata,
+            conversion_options=dict(
+                Audio=dict(
+                    iterator_options=dict(
+                        buffer_gb=1e7 / 1e9,
+                    )
+                )
+            ),  # use a low buffer_gb so we can test the full GenericDataChunkIterator
+        )
 
         with NWBHDF5IO(path=nwbfile_path, mode="r") as io:
             nwbfile = io.read()

diff --git a/tests/test_behavior/test_video_utils.py b/tests/test_behavior/test_video_utils.py
@@ -1,3 +1,4 @@
+import math
 import os
 import tempfile
 import unittest
@@ -204,7 +205,7 @@ def test_custom_chunk_shape(self):
             )
 
     def test_small_buffer_size(self):
-        frame_size_mb = np.prod(self.frame_shape) / 1e6
+        frame_size_mb = math.prod(self.frame_shape) / 1e6
         buffer_size = frame_size_mb / 1e3 / 2
         video_file = self.create_video(self.fps, self.frame_shape, self.number_of_frames)
         with self.assertRaises(AssertionError):

diff --git a/tests/test_ophys/test_imagingextractordatachunkiterator.py b/tests/test_ophys/test_imagingextractordatachunkiterator.py
@@ -1,3 +1,5 @@
+import math
+
 import numpy as np
 from hdmf.testing import TestCase
 from numpy.testing import assert_array_equal
@@ -157,7 +159,7 @@ def test_data_validity(
         )
 
         if buffer_gb is not None:
-            assert ((np.prod(dci.buffer_shape) * self.imaging_extractor.get_dtype().itemsize) / 1e9) <= buffer_gb
+            assert ((math.prod(dci.buffer_shape) * self.imaging_extractor.get_dtype().itemsize) / 1e9) <= buffer_gb
 
         data_chunks = np.zeros(dci.maxshape)
         for data_chunk in dci:

diff --git a/tests/test_ophys/test_tools_roiextractors.py b/tests/test_ophys/test_tools_roiextractors.py
@@ -1,3 +1,4 @@
+import math
 import unittest
 from copy import deepcopy
 from datetime import datetime
@@ -1497,7 +1498,7 @@ def test_non_iterative_write_assertion(self):
         available_memory_in_bytes = psutil.virtual_memory().available
 
         excess = 1.5  # Of what is available in memory
-        num_frames_to_overflow = (available_memory_in_bytes * excess) / (element_size_in_bytes * np.prod(image_size))
+        num_frames_to_overflow = (available_memory_in_bytes * excess) / (element_size_in_bytes * math.prod(image_size))
 
         # Mock recording extractor with as much frames as necessary to overflow memory
         mock_imaging = Mock()