Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BinnedSpikeTrain optional CSC format #402

Merged
merged 3 commits into from
Feb 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 75 additions & 49 deletions elephant/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,10 @@ class BinnedSpikeTrain(object):
Tolerance for rounding errors in the binning process and in the input
data
Default: 1e-8
sparse_format : {'csr', 'csc'}, optional
The sparse matrix format. By default, CSR format is used to perform
slicing and computations efficiently.
Default: 'csr'

Raises
------
Expand Down Expand Up @@ -323,7 +327,11 @@ class BinnedSpikeTrain(object):

@deprecated_alias(binsize='bin_size', num_bins='n_bins')
def __init__(self, spiketrains, bin_size=None, n_bins=None, t_start=None,
t_stop=None, tolerance=1e-8):
t_stop=None, tolerance=1e-8, sparse_format="csr"):
if sparse_format not in ("csr", "csc"):
raise ValueError(f"Invalid 'sparse_format': {sparse_format}. "
"Available: 'csr' and 'csc'")

# Converting spiketrains to a list, if spiketrains is one
# SpikeTrain object
if isinstance(spiketrains, neo.SpikeTrain):
Expand All @@ -339,7 +347,8 @@ def __init__(self, spiketrains, bin_size=None, n_bins=None, t_start=None,
# Check all parameter, set also missing values
self._resolve_input_parameters(spiketrains)
# Now create the sparse matrix
self.sparse_matrix = self._create_sparse_matrix(spiketrains)
self.sparse_matrix = self._create_sparse_matrix(
spiketrains, sparse_format=sparse_format)

@property
def shape(self):
Expand Down Expand Up @@ -369,13 +378,10 @@ def num_bins(self):
return self.n_bins

def __repr__(self):
return "{klass}(t_start={t_start}, t_stop={t_stop}, " \
"bin_size={bin_size}; shape={shape})".format(
klass=type(self).__name__,
t_start=self.t_start,
t_stop=self.t_stop,
bin_size=self.bin_size,
shape=self.shape)
return f"{type(self).__name__}(t_start={self.t_start}, " \
f"t_stop={self.t_stop}, bin_size={self.bin_size}; " \
f"shape={self.shape}, " \
f"format={self.sparse_matrix.__class__.__name__})"

def rescale(self, units):
"""
Expand Down Expand Up @@ -590,7 +596,7 @@ def to_sparse_array(self):

Returns
-------
scipy.sparse.csr_matrix
scipy.sparse.csr_matrix or scipy.sparse.csc_matrix
Sparse matrix, version with spike counts.

See also
Expand All @@ -611,7 +617,7 @@ def to_sparse_bool_array(self):

Returns
-------
scipy.sparse.csr_matrix
scipy.sparse.csr_matrix or scipy.sparse.csc_matrix
Sparse matrix, binary, boolean version.

See also
Expand All @@ -638,7 +644,8 @@ def __eq__(self, other):
return False
sp1 = self.sparse_matrix
sp2 = other.sparse_matrix
if sp1.shape != sp2.shape or sp1.data.shape != sp2.data.shape:
if sp1.__class__ is not sp2.__class__ or sp1.shape != sp2.shape \
or sp1.data.shape != sp2.data.shape:
return False
return (sp1.data == sp2.data).all() and \
(sp1.indptr == sp2.indptr).all() and \
Expand All @@ -662,11 +669,18 @@ def copy(self):
tolerance=self.tolerance)

def __iter_sparse_matrix(self):
spmat = self.sparse_matrix
if isinstance(spmat, sps.csc_matrix):
warnings.warn("The sparse matrix format is CSC. For better "
"performance, specify the CSR format while "
"constructing a "
"BinnedSpikeTrain(sparse_format='csr')")
spmat = spmat.tocsr()
# taken from csr_matrix.__iter__()
i0 = 0
for i1 in self.sparse_matrix.indptr[1:]:
indices = self.sparse_matrix.indices[i0:i1]
data = self.sparse_matrix.data[i0:i1]
for i1 in spmat.indptr[1:]:
indices = spmat.indices[i0:i1]
data = spmat.data[i0:i1]
yield indices, data
i0 = i1

Expand Down Expand Up @@ -1000,45 +1014,51 @@ def to_array(self, dtype=None):
scipy.sparse.csr_matrix.toarray

"""
spmat = self.sparse_matrix
if dtype is not None and dtype != spmat.data.dtype:
# avoid a copy
spmat = sps.csr_matrix(
(spmat.data.astype(dtype), spmat.indices, spmat.indptr),
shape=spmat.shape)
return spmat.toarray()

def binarize(self, copy=None):
array = self.sparse_matrix.toarray()
if dtype is not None:
array = array.astype(dtype)
return array

def binarize(self, copy=True):
"""
Clip the internal array (no. of spikes in a bin) to `0` (no spikes) or
`1` (at least one spike) values only.

Parameters
----------
copy : bool, optional
Deprecated parameter. It has no effect.
If True, a **shallow** copy - a view of `BinnedSpikeTrain` - is
returned with the data array filled with zeros and ones. Otherwise,
the binarization (clipping) is done in-place. A shallow copy
means that :attr:`indices` and :attr:`indptr` of a sparse matrix
is shared with the original sparse matrix. Only the data is copied.
If you want to perform a deep copy, call
:func:`BinnedSpikeTrain.copy` prior to binarizing.
Default: True

Returns
-------
bst : BinnedSpikeTrainView
A view of `BinnedSpikeTrain` with a sparse matrix containing
data clipped to `0`s and `1`s.
bst : BinnedSpikeTrain or BinnedSpikeTrainView
A (view of) `BinnedSpikeTrain` with the sparse matrix data clipped
to zeros and ones.

"""
if copy is not None:
warnings.warn("'copy' parameter is deprecated - a view is always "
"returned; set this parameter to None.",
DeprecationWarning)
spmat = self.sparse_matrix
spmat = sps.csr_matrix(
(spmat.data.clip(max=1), spmat.indices, spmat.indptr),
shape=spmat.shape, copy=False)
bst = BinnedSpikeTrainView(t_start=self._t_start,
t_stop=self._t_stop,
bin_size=self._bin_size,
units=self.units,
sparse_matrix=spmat,
tolerance=self.tolerance)
if copy:
data = np.ones(len(spmat.data), dtype=spmat.data.dtype)
spmat = spmat.__class__(
(data, spmat.indices, spmat.indptr),
shape=spmat.shape, copy=False)
bst = BinnedSpikeTrainView(t_start=self._t_start,
t_stop=self._t_stop,
bin_size=self._bin_size,
units=self.units,
sparse_matrix=spmat,
tolerance=self.tolerance)
else:
spmat.data[:] = 1
bst = self

return bst

@property
Expand All @@ -1053,11 +1073,11 @@ def sparsity(self):
num_nonzero = self.sparse_matrix.data.shape[0]
return num_nonzero / np.prod(self.sparse_matrix.shape)

def _create_sparse_matrix(self, spiketrains):
def _create_sparse_matrix(self, spiketrains, sparse_format):
"""
Converts `neo.SpikeTrain` objects to a sparse matrix
(`scipy.sparse.csr_matrix`), which contains the binned spike times, and
stores it in :attr:`_sparse_mat_u`.
Converts `neo.SpikeTrain` objects to a scipy sparse matrix, which
contains the binned spike times, and
stores it in :attr:`sparse_matrix`.

Parameters
----------
Expand All @@ -1069,9 +1089,15 @@ def _create_sparse_matrix(self, spiketrains):
# The data type for numeric values
data_dtype = np.int32

if sparse_format == 'csr':
sparse_format = sps.csr_matrix
else:
# csc
sparse_format = sps.csc_matrix

if not _check_neo_spiketrain(spiketrains):
# a binned numpy array
sparse_matrix = sps.csr_matrix(spiketrains, dtype=data_dtype)
sparse_matrix = sparse_format(spiketrains, dtype=data_dtype)
return sparse_matrix

# Get index dtype that can accomodate the largest index
Expand Down Expand Up @@ -1120,9 +1146,9 @@ def _create_sparse_matrix(self, spiketrains):
column_ids = np.hstack(column_ids)
row_ids = np.hstack(row_ids)

sparse_matrix = sps.csr_matrix((counts, (row_ids, column_ids)),
shape=shape, dtype=data_dtype,
copy=False)
sparse_matrix = sparse_format((counts, (row_ids, column_ids)),
shape=shape, dtype=data_dtype,
copy=False)

return sparse_matrix

Expand Down
4 changes: 2 additions & 2 deletions elephant/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ def time_histogram(spiketrains, bin_size, t_start=None, t_stop=None,
bin_size=bin_size)

if binary:
bs = bs.binarize()
bs = bs.binarize(copy=False)
bin_hist = bs.get_num_of_spikes(axis=0)
# Flatten array
bin_hist = np.ravel(bin_hist)
Expand Down Expand Up @@ -1309,7 +1309,7 @@ def _epoch_with_spread(self):
tolerance=self.tolerance)

if self.binary:
bst = bst.binarize()
bst = bst.binarize(copy=False)
bincount = bst.get_num_of_spikes(axis=0)

nonzero_indices = np.nonzero(bincount)[0]
Expand Down
85 changes: 53 additions & 32 deletions elephant/test/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,19 @@ def setUp(self):
self.bin_size = 1 * pq.s
self.tolerance = 1e-8

def test_binarize(self):
spiketrains = [self.spiketrain_a, self.spiketrain_b,
self.spiketrain_a, self.spiketrain_b]
for sparse_format in ("csr", "csc"):
bst = cv.BinnedSpikeTrain(spiketrains=spiketrains,
bin_size=self.bin_size,
sparse_format=sparse_format)
bst_bin = bst.binarize(copy=True)
bst_copy = bst.copy()
assert_array_equal(bst_bin.to_array(), bst.to_bool_array())
bst_copy.sparse_matrix.data[:] = 1
self.assertEqual(bst_bin, bst_copy)

def test_slice(self):
spiketrains = [self.spiketrain_a, self.spiketrain_b,
self.spiketrain_a, self.spiketrain_b]
Expand Down Expand Up @@ -254,32 +267,38 @@ def test_time_slice(self):

def test_to_spike_trains(self):
np.random.seed(1)
bst1 = cv.BinnedSpikeTrain(
spiketrains=[self.spiketrain_a, self.spiketrain_b],
bin_size=self.bin_size
)
spiketrains = [homogeneous_poisson_process(rate=10 * pq.Hz,
t_start=-1 * pq.s,
t_stop=10 * pq.s)]
bst2 = cv.BinnedSpikeTrain(spiketrains=spiketrains,
bin_size=300 * pq.ms)
for bst in (bst1, bst2):
for spikes in ("random", "left", "center"):
spiketrains_gen = bst.to_spike_trains(spikes=spikes,
annotate_bins=True)
for st, indices in zip(spiketrains_gen, bst.spike_indices):
# check sorted
self.assertTrue((np.diff(st.magnitude) > 0).all())
assert_array_equal(st.array_annotations['bins'], indices)
self.assertEqual(st.annotations['bin_size'], bst.bin_size)
self.assertEqual(st.t_start, bst.t_start)
self.assertEqual(st.t_stop, bst.t_stop)
bst_same = cv.BinnedSpikeTrain(spiketrains_gen,
bin_size=bst.bin_size)
self.assertEqual(bst_same, bst)

# invalid mode
self.assertRaises(ValueError, bst.to_spike_trains, spikes='right')
for sparse_format in ("csr", "csc"):
bst1 = cv.BinnedSpikeTrain(
spiketrains=[self.spiketrain_a, self.spiketrain_b],
bin_size=self.bin_size, sparse_format=sparse_format
)
bst2 = cv.BinnedSpikeTrain(spiketrains=spiketrains,
bin_size=300 * pq.ms,
sparse_format=sparse_format)
for bst in (bst1, bst2):
for spikes in ("random", "left", "center"):
spiketrains_gen = bst.to_spike_trains(spikes=spikes,
annotate_bins=True)
for st, indices in zip(spiketrains_gen, bst.spike_indices):
# check sorted
self.assertTrue((np.diff(st.magnitude) > 0).all())
assert_array_equal(st.array_annotations['bins'],
indices)
self.assertEqual(st.annotations['bin_size'],
bst.bin_size)
self.assertEqual(st.t_start, bst.t_start)
self.assertEqual(st.t_stop, bst.t_stop)
bst_same = cv.BinnedSpikeTrain(spiketrains_gen,
bin_size=bst.bin_size,
sparse_format=sparse_format)
self.assertEqual(bst_same, bst)

# invalid mode
self.assertRaises(ValueError, bst.to_spike_trains,
spikes='right')

def test_get_num_of_spikes(self):
spiketrains = [self.spiketrain_a, self.spiketrain_b]
Expand All @@ -288,14 +307,16 @@ def test_get_num_of_spikes(self):
bin_size=1 * pq.s, t_start=0 * pq.s)
self.assertEqual(binned.get_num_of_spikes(),
len(binned.spike_indices[0]))
binned_matrix = cv.BinnedSpikeTrain(spiketrains, n_bins=10,
bin_size=1 * pq.s)
n_spikes_per_row = binned_matrix.get_num_of_spikes(axis=1)
n_spikes_per_row_from_indices = list(map(len,
binned_matrix.spike_indices))
assert_array_equal(n_spikes_per_row, n_spikes_per_row_from_indices)
self.assertEqual(binned_matrix.get_num_of_spikes(),
sum(n_spikes_per_row_from_indices))
for sparse_format in ("csr", "csc"):
binned_matrix = cv.BinnedSpikeTrain(spiketrains, n_bins=10,
bin_size=1 * pq.s,
sparse_format=sparse_format)
n_spikes_per_row = binned_matrix.get_num_of_spikes(axis=1)
n_spikes_per_row_from_indices = list(
map(len, binned_matrix.spike_indices))
assert_array_equal(n_spikes_per_row, n_spikes_per_row_from_indices)
self.assertEqual(binned_matrix.get_num_of_spikes(),
sum(n_spikes_per_row_from_indices))

def test_binned_spiketrain_sparse(self):
a = neo.SpikeTrain([1.7, 1.8, 4.3] * pq.s, t_stop=10.0 * pq.s)
Expand Down Expand Up @@ -662,7 +683,7 @@ def test_repr(self):
bin_size=1 * pq.ms)
self.assertEqual(repr(bst), "BinnedSpikeTrain(t_start=1.0 s, "
"t_stop=1.01 s, bin_size=0.001 s; "
"shape=(1, 10))")
"shape=(1, 10), format=csr_matrix)")

def test_binned_sparsity(self):
train = neo.SpikeTrain(np.arange(10), t_stop=10 * pq.s, units=pq.s)
Expand Down