Skip to content

Commit

Permalink
Merge branch 'main' into change_roiresponseseries_schema_from_array_t…
Browse files Browse the repository at this point in the history
…o_object_type
  • Loading branch information
CodyCBakerPhD authored Nov 11, 2023
2 parents b3f8fc6 + 1bb6e14 commit cf84ab7
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 27 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
* Changed the metadata schema for `Fluorescence` and `DfOverF` where the traces metadata can be provided as a dict instead of a list of dicts.
The name of the plane segmentation is used to determine which traces to add to the `Fluorescence` and `DfOverF` containers. [PR #632](https://github.com/catalystneuro/neuroconv/pull/632)


### Fixes
* Fixed GenericDataChunkIterator (in hdmf.py) in the case where the number of dimensions is 1 and the size in bytes is greater than the threshold of 1 GB. [PR #638](https://github.com/catalystneuro/neuroconv/pull/638)
* Changed `np.floor` and `np.prod` usage to `math.floor` and `math.prod` in various files. [PR #638](https://github.com/catalystneuro/neuroconv/pull/638)

# v0.4.5

Expand Down
5 changes: 3 additions & 2 deletions src/neuroconv/datainterfaces/behavior/video/video_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from typing import Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -206,13 +207,13 @@ def _fit_frames_to_size(self, size_mb):
@staticmethod
def _scale_shape_to_size(size_mb, shape, size, max_shape):
"""Given the shape and size of array, return shape that will fit size_mb."""
k = np.floor((size_mb / size) ** (1 / len(shape)))
k = math.floor((size_mb / size) ** (1 / len(shape)))
return tuple([min(max(int(x), shape[j]), max_shape[j]) for j, x in enumerate(k * np.array(shape))])

def _get_frame_details(self):
"""Get frame shape and size in MB"""
frame_shape = (1, *self.video_capture_ob.get_frame_shape())
min_frame_size_mb = (np.prod(frame_shape) * self._get_dtype().itemsize) / 1e6
min_frame_size_mb = (math.prod(frame_shape) * self._get_dtype().itemsize) / 1e6
return min_frame_size_mb, frame_shape

def _get_data(self, selection: Tuple[slice]) -> np.ndarray:
Expand Down
43 changes: 28 additions & 15 deletions src/neuroconv/tools/hdmf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Collection of modifications of HDMF functions that are to be tested/used on this repo until propagation upstream."""
import math
from typing import Tuple

import numpy as np
Expand All @@ -8,7 +9,7 @@
class GenericDataChunkIterator(HDMFGenericDataChunkIterator):
def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
num_axes = len(self.maxshape)
chunk_bytes = np.prod(self.chunk_shape) * self.dtype.itemsize
chunk_bytes = math.prod(self.chunk_shape) * self.dtype.itemsize
assert buffer_gb > 0, f"buffer_gb ({buffer_gb}) must be greater than zero!"
assert (
buffer_gb >= chunk_bytes / 1e9
Expand All @@ -20,31 +21,43 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
maxshape = np.array(self.maxshape)

# Early termination condition
if np.prod(maxshape) * self.dtype.itemsize / 1e9 < buffer_gb:
if math.prod(maxshape) * self.dtype.itemsize / 1e9 < buffer_gb:
return tuple(self.maxshape)

buffer_bytes = chunk_bytes
axis_sizes_bytes = maxshape * self.dtype.itemsize
smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape)
target_buffer_bytes = buffer_gb * 1e9

# If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes
sub_square_buffer_shape = np.array(self.chunk_shape)
if min(axis_sizes_bytes) > target_buffer_bytes:
k1 = np.floor((target_buffer_bytes / chunk_bytes) ** 0.5)
for axis in [smallest_chunk_axis, second_smallest_chunk_axis]:
sub_square_buffer_shape[axis] = k1 * sub_square_buffer_shape[axis]
return tuple(sub_square_buffer_shape)
if num_axes > 1:
smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape)
# If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes
sub_square_buffer_shape = np.array(self.chunk_shape)
if min(axis_sizes_bytes) > target_buffer_bytes:
k1 = math.floor((target_buffer_bytes / chunk_bytes) ** 0.5)
for axis in [smallest_chunk_axis, second_smallest_chunk_axis]:
sub_square_buffer_shape[axis] = k1 * sub_square_buffer_shape[axis]
return tuple(sub_square_buffer_shape)
elif num_axes == 1:
smallest_chunk_axis = 0
# Handle the case where the single axis is too large to fit in the buffer
if axis_sizes_bytes[0] > target_buffer_bytes:
k1 = math.floor(target_buffer_bytes / chunk_bytes)
return tuple(
[
k1 * self.chunk_shape[0],
]
)
else:
raise ValueError(f"num_axes ({num_axes}) is less than one!")

# Original one-shot estimation has good performance for certain shapes
chunk_to_buffer_ratio = buffer_gb * 1e9 / chunk_bytes
chunk_scaling_factor = np.floor(chunk_to_buffer_ratio ** (1 / num_axes))
chunk_scaling_factor = math.floor(chunk_to_buffer_ratio ** (1 / num_axes))
unpadded_buffer_shape = [
np.clip(a=int(x), a_min=self.chunk_shape[j], a_max=self.maxshape[j])
for j, x in enumerate(chunk_scaling_factor * np.array(self.chunk_shape))
]

unpadded_buffer_bytes = np.prod(unpadded_buffer_shape) * self.dtype.itemsize
unpadded_buffer_bytes = math.prod(unpadded_buffer_shape) * self.dtype.itemsize

# Method that starts by filling the smallest axis completely or calculates best partial fill
padded_buffer_shape = np.array(self.chunk_shape)
Expand All @@ -62,10 +75,10 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]:
buffer_bytes *= chunks_on_axis
padded_buffer_shape[axis] = self.maxshape[axis]
else: # Found an axis that is too large to use with the rest of the buffer; calculate how much can be used
k3 = np.floor(target_buffer_bytes / buffer_bytes)
k3 = math.floor(target_buffer_bytes / buffer_bytes)
padded_buffer_shape[axis] *= k3
break
padded_buffer_bytes = np.prod(padded_buffer_shape) * self.dtype.itemsize
padded_buffer_bytes = math.prod(padded_buffer_shape) * self.dtype.itemsize

if padded_buffer_bytes >= unpadded_buffer_bytes:
return tuple(padded_buffer_shape)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""General purpose iterator for all ImagingExtractor data."""
import math
from typing import Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -88,7 +89,7 @@ def _get_scaled_buffer_shape(self, buffer_gb: float, chunk_shape: tuple) -> tupl

image_size = self._get_maxshape()[1:]
min_buffer_shape = tuple([chunk_shape[0]]) + image_size
scaling_factor = np.floor((buffer_gb * 1e9 / (np.prod(min_buffer_shape) * self._get_dtype().itemsize)))
scaling_factor = math.floor((buffer_gb * 1e9 / (math.prod(min_buffer_shape) * self._get_dtype().itemsize)))
max_buffer_shape = tuple([int(scaling_factor * min_buffer_shape[0])]) + image_size
scaled_buffer_shape = tuple(
[
Expand Down
3 changes: 2 additions & 1 deletion src/neuroconv/tools/roiextractors/roiextractors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from collections import defaultdict
from copy import deepcopy
from typing import Literal, Optional
Expand Down Expand Up @@ -467,7 +468,7 @@ def check_if_imaging_fits_into_memory(imaging: ImagingExtractor) -> None:
image_size = imaging.get_image_size()
num_frames = imaging.get_num_frames()

traces_size_in_bytes = num_frames * np.prod(image_size) * element_size_in_bytes
traces_size_in_bytes = num_frames * math.prod(image_size) * element_size_in_bytes
available_memory_in_bytes = psutil.virtual_memory().available

if traces_size_in_bytes > available_memory_in_bytes:
Expand Down
5 changes: 3 additions & 2 deletions src/neuroconv/tools/testing/mock_ttl_signals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from pathlib import Path
from typing import Optional, Union

Expand Down Expand Up @@ -84,7 +85,7 @@ def generate_mock_ttl_signal(

if np.issubdtype(dtype, np.unsignedinteger):
# If data type is an unsigned integer, increment the signed default values by the midpoint of the unsigned range
shift = np.floor(np.iinfo(dtype).max / 2).astype(int)
shift = math.floor(np.iinfo(dtype).max / 2)
baseline_mean_int16_default += shift
signal_mean_int16_default += shift

Expand Down Expand Up @@ -271,7 +272,7 @@ def regenerate_test_cases(folder_path: FolderPathType, regenerate_reference_imag
if regenerate_reference_images:
fig.add_trace(
go.Scatter(y=time_series_data, text=time_series_name),
row=np.floor(plot_index / num_cols).astype(int) + 1,
row=math.floor(plot_index / num_cols) + 1,
col=int(plot_index % num_cols) + 1,
)
plot_index += 1
Expand Down
14 changes: 12 additions & 2 deletions tests/test_behavior/test_audio_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TestAudioInterface(AudioInterfaceTestMixin, TestCase):
@classmethod
def setUpClass(cls):
cls.session_start_time = datetime.now(tz=gettz(name="US/Pacific"))
cls.num_frames = 10000
cls.num_frames = int(1e7)
cls.num_audio_files = 3
cls.sampling_rate = 500
cls.aligned_segment_starting_times = [0.0, 20.0, 40.0]
Expand Down Expand Up @@ -199,7 +199,17 @@ def test_run_conversion(self):
audio_test_data = [read(filename=file_path, mmap=True)[1] for file_path in file_paths]

nwbfile_path = str(self.test_dir / "audio_test_data.nwb")
self.nwb_converter.run_conversion(nwbfile_path=nwbfile_path, metadata=self.metadata)
self.nwb_converter.run_conversion(
nwbfile_path=nwbfile_path,
metadata=self.metadata,
conversion_options=dict(
Audio=dict(
iterator_options=dict(
buffer_gb=1e7 / 1e9,
)
)
), # use a low buffer_gb so we can test the full GenericDataChunkIterator
)

with NWBHDF5IO(path=nwbfile_path, mode="r") as io:
nwbfile = io.read()
Expand Down
3 changes: 2 additions & 1 deletion tests/test_behavior/test_video_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import os
import tempfile
import unittest
Expand Down Expand Up @@ -204,7 +205,7 @@ def test_custom_chunk_shape(self):
)

def test_small_buffer_size(self):
frame_size_mb = np.prod(self.frame_shape) / 1e6
frame_size_mb = math.prod(self.frame_shape) / 1e6
buffer_size = frame_size_mb / 1e3 / 2
video_file = self.create_video(self.fps, self.frame_shape, self.number_of_frames)
with self.assertRaises(AssertionError):
Expand Down
4 changes: 3 additions & 1 deletion tests/test_ophys/test_imagingextractordatachunkiterator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import math

import numpy as np
from hdmf.testing import TestCase
from numpy.testing import assert_array_equal
Expand Down Expand Up @@ -157,7 +159,7 @@ def test_data_validity(
)

if buffer_gb is not None:
assert ((np.prod(dci.buffer_shape) * self.imaging_extractor.get_dtype().itemsize) / 1e9) <= buffer_gb
assert ((math.prod(dci.buffer_shape) * self.imaging_extractor.get_dtype().itemsize) / 1e9) <= buffer_gb

data_chunks = np.zeros(dci.maxshape)
for data_chunk in dci:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_ophys/test_tools_roiextractors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import unittest
from copy import deepcopy
from datetime import datetime
Expand Down Expand Up @@ -1497,7 +1498,7 @@ def test_non_iterative_write_assertion(self):
available_memory_in_bytes = psutil.virtual_memory().available

excess = 1.5 # Of what is available in memory
num_frames_to_overflow = (available_memory_in_bytes * excess) / (element_size_in_bytes * np.prod(image_size))
num_frames_to_overflow = (available_memory_in_bytes * excess) / (element_size_in_bytes * math.prod(image_size))

# Mock recording extractor with as much frames as necessary to overflow memory
mock_imaging = Mock()
Expand Down

0 comments on commit cf84ab7

Please sign in to comment.