Skip to content

Commit

Permalink
Reuse the astype(np.int64) array, instead of throwing it away
Browse files Browse the repository at this point in the history
  • Loading branch information
sfiligoi committed Nov 8, 2023
1 parent 9df9cc8 commit 2fd4948
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions biom/_subsample.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data,
cdef:
cnp.int64_t counts_sum, count_el, perm_count_el
cnp.int64_t count_rem
cnp.ndarray[cnp.int64_t, ndim=1] permuted
cnp.ndarray[cnp.int64_t, ndim=1] permuted, intdata
Py_ssize_t i, idx
cnp.int32_t length,el,start,end
cnp.int64_t el_cnt
Expand All @@ -94,7 +94,8 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data,
# We are relying on data being integers
# If there are rounding erros, fp64 sums can lead to
# big errors in sum, so convert to int64, first
counts_sum = data[start:end].astype(np.int64).sum()
intdata = data[start:end].astype(np.int64)
counts_sum = intdata.sum()

if counts_sum < n:
data[start:end] = 0
Expand All @@ -119,7 +120,7 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data,

el = 0 # index in result/data
count_el = 0 # index in permutted
count_rem = long(data[start]) # since each data has multiple els, keep track how many are left
count_rem = intdata[0] # since each data has multiple els, keep track how many are left
el_cnt = 0
for idx in range(n):
perm_count_el = permuted[idx]
Expand All @@ -133,7 +134,7 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data,
# move to the beginning of next element
count_el += count_rem
# Load how much we have avaialble
count_rem = long(data[start+el])
count_rem = intdata[el]
#re-start the el counter
el_cnt = 0
# increment the el counter
Expand Down

0 comments on commit 2fd4948

Please sign in to comment.