Skip to content

Commit

Permalink
Merge pull request #103 from Kudostoy0u/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesli124 authored Aug 27, 2024
2 parents 03f369c + b8be567 commit ade54a1
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 10 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ The format is based on
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.7.4] - 2024-08-24

### Changed
- Added many tests for util functions

### Fixed
- Changed a nopython function to use numba compatible indexing

## [0.7.3] - 2024-08-20

### Changed
Expand Down
16 changes: 7 additions & 9 deletions src/finaletoolkit/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ def frag_bam_to_bed(input_file: Union[str, pysam.AlignmentFile],

@jit(nopython=True)
def frags_in_region(frag_array: NDArray[np.int64],
minimum: int,
maximum: int) -> NDArray[np.int64]:
minimum: int,
maximum: int) -> NDArray[np.int64]:
"""
Takes an array of coordinates for ends of fragments and returns an
array of fragments with coverage in the specified region. That is, a
Expand All @@ -187,14 +187,12 @@ def frags_in_region(frag_array: NDArray[np.int64],
-------
filtered_frags : ndarray
"""
in_region = np.logical_and(
np.less(frag_array[:, 0], maximum),
np.greater_equal(frag_array[:, 1], minimum)
)
# Changed the code a bit to make it compatible with numba and not raise an error
starts = frag_array['start']
stops = frag_array['stop']
in_region = np.logical_and(np.less(starts,maximum), np.greater_equal(stops,minimum))
filtered_frags = frag_array[in_region]
return filtered_frags


def frag_generator(
input_file: Union[str, pysam.AlignmentFile, pysam.TabixFile, Path],
contig: str,
Expand Down Expand Up @@ -565,4 +563,4 @@ def overlaps(
in_same_contig = contigs_1 == contigs_2
raw_overlaps = np.logical_and(contig_blind_overlaps, in_same_contig)
any_overlaps = np.any(raw_overlaps, axis=1)
return any_overlaps
return any_overlaps
2 changes: 1 addition & 1 deletion src/finaletoolkit/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Single-source module for the package version number.
"""

__version__ = "0.7.3"
__version__ = "0.7.4"
5 changes: 5 additions & 0 deletions tests/data/intervals_overlapped.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
12 34443118 34443538
12 34444968 34442115
12 34445723 34445893
12 34446126 34446261
12 34446186 34446653
69 changes: 69 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Tests for finaletoolkit.utils
"""

import os
import filecmp
import difflib
from numpy import array
from numpy.testing import assert_array_equal

import pytest


from finaletoolkit.utils import *

class TestUtils:
def test_chrom_sizes_to_dict(self,request):
chrom_sizes = request.path.parent / 'data' / 'b37.chrom.sizes'
assert chrom_sizes_to_dict(chrom_sizes) == {'1': 249250621, '2': 243199373, '3': 198022430, '4': 191154276, '5': 180915260, '6': 171115067, '7': 159138663, '8': 146364022, '9': 141213431, '10': 135534747, '11': 135006516, '12': 133851895, '13': 115169878, '14': 107349540, '15': 102531392, '16': 90354753, '17': 81195210, '18': 78077248, '19': 59128983, '20': 63025520, '21': 48129895, '22': 51304566, 'X': 155270560, 'Y': 59373566, 'MT': 16569, 'GL000207.1': 4262, 'GL000226.1': 15008, 'GL000229.1': 19913, 'GL000231.1': 27386, 'GL000210.1': 27682, 'GL000239.1': 33824, 'GL000235.1': 34474, 'GL000201.1': 36148, 'GL000247.1': 36422, 'GL000245.1': 36651, 'GL000197.1': 37175, 'GL000203.1': 37498, 'GL000246.1': 38154, 'GL000249.1': 38502, 'GL000196.1': 38914, 'GL000248.1': 39786, 'GL000244.1': 39929, 'GL000238.1': 39939, 'GL000202.1': 40103, 'GL000234.1': 40531, 'GL000232.1': 40652, 'GL000206.1': 41001, 'GL000240.1': 41933, 'GL000236.1': 41934, 'GL000241.1': 42152, 'GL000243.1': 43341, 'GL000242.1': 43523, 'GL000230.1': 43691, 'GL000237.1': 45867, 'GL000233.1': 45941, 'GL000204.1': 81310, 'GL000198.1': 90085, 'GL000208.1': 92689, 'GL000191.1': 106433, 'GL000227.1': 128374, 'GL000228.1': 129120, 'GL000214.1': 137718, 'GL000221.1': 155397, 'GL000209.1': 159169, 'GL000218.1': 161147, 'GL000220.1': 161802, 'GL000213.1': 164239, 'GL000211.1': 166566, 'GL000199.1': 169874, 'GL000217.1': 172149, 'GL000216.1': 172294, 'GL000215.1': 172545, 'GL000205.1': 174588, 'GL000219.1': 179198, 'GL000224.1': 179693, 'GL000223.1': 180455, 'GL000195.1': 182896, 'GL000212.1': 186858, 'GL000222.1': 186861, 'GL000200.1': 187035, 'GL000193.1': 189789, 'GL000194.1': 191469, 'GL000225.1': 211173, 'GL000192.1': 547496, 'NC_007605': 171823}

def test_chrom_sizes_to_list(self,request):
chrom_sizes = request.path.parent / 'data' / 'b37.chrom.sizes'
assert chrom_sizes_to_list(chrom_sizes) == [('1', 249250621), ('2', 243199373), ('3', 198022430), ('4', 191154276), ('5', 180915260), ('6', 171115067), ('7', 159138663), ('8', 146364022), ('9', 141213431), ('10', 135534747), ('11', 135006516), ('12', 133851895), ('13', 115169878), ('14', 107349540), ('15', 102531392), ('16', 90354753), ('17', 81195210), ('18', 78077248), ('19', 59128983), ('20', 63025520), ('21', 48129895), ('22', 51304566), ('X', 155270560), ('Y', 59373566), ('MT', 16569), ('GL000207.1', 4262), ('GL000226.1', 15008), ('GL000229.1', 19913), ('GL000231.1', 27386), ('GL000210.1', 27682), ('GL000239.1', 33824), ('GL000235.1', 34474), ('GL000201.1', 36148), ('GL000247.1', 36422), ('GL000245.1', 36651), ('GL000197.1', 37175), ('GL000203.1', 37498), ('GL000246.1', 38154), ('GL000249.1', 38502), ('GL000196.1', 38914), ('GL000248.1', 39786), ('GL000244.1', 39929), ('GL000238.1', 39939), ('GL000202.1', 40103), ('GL000234.1', 40531), ('GL000232.1', 40652), ('GL000206.1', 41001), ('GL000240.1', 41933), ('GL000236.1', 41934), ('GL000241.1', 42152), ('GL000243.1', 43341), ('GL000242.1', 43523), ('GL000230.1', 43691), ('GL000237.1', 45867), ('GL000233.1', 45941), ('GL000204.1', 81310), ('GL000198.1', 90085), ('GL000208.1', 92689), ('GL000191.1', 106433), ('GL000227.1', 128374), ('GL000228.1', 129120), ('GL000214.1', 137718), ('GL000221.1', 155397), ('GL000209.1', 159169), ('GL000218.1', 161147), ('GL000220.1', 161802), ('GL000213.1', 164239), ('GL000211.1', 166566), ('GL000199.1', 169874), ('GL000217.1', 172149), ('GL000216.1', 172294), ('GL000215.1', 172545), ('GL000205.1', 174588), ('GL000219.1', 179198), ('GL000224.1', 179693), ('GL000223.1', 180455), ('GL000195.1', 182896), ('GL000212.1', 186858), ('GL000222.1', 186861), ('GL000200.1', 187035), ('GL000193.1', 189789), ('GL000194.1', 191469), ('GL000225.1', 211173), ('GL000192.1', 547496), ('NC_007605', 171823)]

def test_frag_array(self,request):
interval_file = request.path.parent / 'data' / '12.3444.b37.frag.gz'
contig = "12"
arr = frag_array(interval_file,contig)
assert_array_equal(arr,array([(34443118, 34443284, True), (34443139, 34443300, True),
(34443358, 34443538, False), (34443483, 34443660, True),
(34444089, 34444252, True), (34444696, 34444863, True),
(34444954, 34445075, True), (34444968, 34445105, True),
(34445136, 34445288, True), (34445511, 34445672, False),
(34445705, 34445852, True), (34445723, 34445893, True),
(34446126, 34446261, False), (34446486, 34446653, True)],
dtype=[('start', '<i8'), ('stop', '<i8'), ('strand', '?')]))

def test_frags_in_region(self,request):
interval_file = str(request.path.parent / 'data' / '12.3444.b37.frag.gz') # Numba can't work with PosixPath
start = 34443119
stop = 34445075
contig = "12"
arr = frag_array(interval_file,contig)
frags = frags_in_region(arr, start, stop)
assert_array_equal(frags,array([(34443118, 34443284, True), (34443139, 34443300, True),
(34443358, 34443538, False), (34443483, 34443660, True),
(34444089, 34444252, True), (34444696, 34444863, True),
(34444954, 34445075, True), (34444968, 34445105, True)],
dtype=[('start', '<i8'), ('stop', '<i8'), ('strand', '?')]))

def test_frag_generator(self,request):
interval_file = request.path.parent / 'data' / '12.3444.b37.frag.gz'
contig = "12"
start=34443119
stop=34443538
g = frag_generator(interval_file,contig=contig,start=start,stop=stop);
assert next(g) == ('12', 34443118, 34443284, 60, True)
assert next(g) == ('12', 34443139, 34443300, 60, True)
assert next(g) == ('12', 34443358, 34443538, 60, False)
assert next(g) == ('12', 34443483, 34443660, 54, True)
for _ in g: # Will iterate if there are more, unexpected values in the generator
assert False
def test_low_quality_read_pairs(self,request):
interval_file = request.path.parent / 'data' / '12.3444.b37.bam'
read = pysam.AlignmentFile(interval_file)
assert low_quality_read_pairs(next(read))
assert low_quality_read_pairs(next(read))
assert low_quality_read_pairs(next(read))
assert not low_quality_read_pairs(next(read))

0 comments on commit ade54a1

Please sign in to comment.