Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Corrections to fusion_finder.py and collapse_isoform_by_sam.py #171

Merged
merged 1 commit into from
Sep 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cupcake/tofu/collapse_isoforms_by_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def can_merge(m, r1, r2):
r1.ref_exons[-n2].start <= r2.ref_exons[0].start < r1.ref_exons[-n2].end
else:
return abs(r1.ref_exons[0].end - r2.ref_exons[0].end) <= internal_fuzzy_max_dist and \
r1.ref_exons[n2-1].start <= r2.ref_exons[-1].end < r1.ref_exons[n2].end
r1.ref_exons[n2-1].start <= r2.ref_exons[-1].end < r1.ref_exons[n2-1].end
return False

d = {}
Expand Down
12 changes: 7 additions & 5 deletions cupcake/tofu/fusion_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from Bio.Seq import Seq

from bx.intervals.cluster import ClusterTree

from cupcake.io import BioReaders
from cupcake.tofu.utils import check_ids_unique
from cupcake.io.SeqReaders import LazyFastaReader, LazyFastqReader
Expand Down Expand Up @@ -144,18 +143,22 @@ def is_fusion_compatible(r1, r2, max_fusion_point_dist, max_exon_end_dist, allow
"single-exon transcript to be exact!")
else: # multi-exon case, must be OK
return True
elif type == 'super' or type == 'subset':
if type == 'subset':
r1, r2 = r2, r1 # rotate so r1 is always the longer one
if type == 'super' or type == 'subset':
n2 = len(r2.segments)
if allow_extra_5_exons:
# check that the 3' junction is identical
# also check that the 3' end is relatively close
# AND the 5' start of r2 is sandwiched between the matching r1 exon coordinates
if in_5_portion and plus_is_5end:
if abs(r1.segments[-1].start - r2.segments[-1].start) > max_exon_end_dist: return False
if abs(r1.segments[-1].end - r2.segments[-1].end) > max_fusion_point_dist: return False
return True
return r1.segments[-n2].start <= r2.segments[0].start < r1.segments[-n2].end
elif in_5_portion and (not plus_is_5end):
if abs(r1.segments[0].end - r2.segments[0].end) > max_exon_end_dist: return False
if abs(r1.segments[0].start - r2.segments[0].start) > max_fusion_point_dist: return False
return True
return r1.segments[n2-1].start <= r2.segments[-1].end < r1.segments[n2-1].end
else:
return False
else: # not OK because number of exons must be the same
Expand Down Expand Up @@ -398,4 +401,3 @@ def fusion_main(fa_or_fq_filename, sam_filename, output_prefix, cluster_report_c
min_identity=args.min_identity,
is_flnc=args.is_flnc)