Skip to content

Commit

Permalink
fix (moPepGen): --include-coding and --find-ass added to `callNon…
Browse files Browse the repository at this point in the history
…coding` and `callVariant` to call novel ORF peptides from coding transcripts. #659
  • Loading branch information
zhuchcn committed Mar 21, 2024
1 parent ae63236 commit 7b51502
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 284 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]

## [1.3.2]

- `--include-coding` and `--find-ass` added to `callNoncoding` and `callVariant` to call novel ORF peptides from coding transcripts. #659

## [1.3.1]

### Added:
Expand Down
2 changes: 1 addition & 1 deletion moPepGen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import constant


__version__ = '1.3.1'
__version__ = '1.3.2'

## Error messages
ERROR_INDEX_IN_INTRON = 'The genomic index seems to be in an intron'
Expand Down
1 change: 0 additions & 1 deletion moPepGen/aa/VariantPeptideIdentifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

if TYPE_CHECKING:
from moPepGen.seqvar import VariantRecord
from moPepGen.gtf import GenomicAnnotation

def create_variant_peptide_id(transcript_id:str, variants:List[VariantRecord],
orf_id:str=None, index:int=None, gene_id:str=None) -> str:
Expand Down
1 change: 0 additions & 1 deletion moPepGen/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,3 @@
from .decoy_fasta import add_subparser_decoy_fasta, decoy_fasta
from .summarize_fasta import add_subparser_summarize_fasta, summarize_fasta
from .update_index import add_subparser_update_index, update_index
from .call_alt_start_site import add_subparser_call_alt_start, call_alt_start
2 changes: 0 additions & 2 deletions moPepGen/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
-- Calling
callVariant Call non-canonical peptides from genomic variants.
callNoncoding Call non-canonical peptides from noncoding transcripts.
callAltStart Call non-canonical peptides from alternative start sites.
callAltTranslation Call non-canonital peptides with alternative translation
from coding transcripts.
Expand Down Expand Up @@ -68,7 +67,6 @@ def main():
cli.add_subparser_parse_circexplorer(subparsers)
cli.add_subparser_call_variant(subparsers)
cli.add_subparser_call_noncoding(subparsers)
cli.add_subparser_call_alt_start(subparsers)
cli.add_subparser_call_alt_translation(subparsers)
cli.add_subparser_split_fasta(subparsers)
cli.add_subparser_filter_fasta(subparsers)
Expand Down
201 changes: 0 additions & 201 deletions moPepGen/cli/call_alt_start_site.py

This file was deleted.

35 changes: 22 additions & 13 deletions moPepGen/cli/call_noncoding_peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ def add_subparser_call_noncoding(subparsers:argparse._SubParsersAction):
default='max',
metavar='<choice>'
)
p.add_argument(
'--include-coding',
action='store_true',
help='Include coding transcripts to find alternative ORFs.'
)
p.add_argument(
'--w2f-reassignment',
action='store_true',
Expand Down Expand Up @@ -117,22 +122,26 @@ def call_noncoding_peptide(args:argparse.Namespace) -> None:

inclusion_biotypes, exclusion_biotypes = common.load_inclusion_exclusion_biotypes(args)

noncanonical_pool = aa.VariantPeptidePool()
noval_orf_peptide_pool = aa.VariantPeptidePool()
orf_pool = []

i = 0
for tx_id in anno.transcripts:
tx_model = anno.transcripts[tx_id]
if inclusion_biotypes and \
tx_model.transcript.biotype not in inclusion_biotypes:
continue
if exclusion_biotypes and \
tx_model.transcript.biotype in exclusion_biotypes:
continue
if tx_id in proteome:
continue
if tx_model.transcript_len() < args.min_tx_length:
continue
if tx_model.is_protein_coding:
if not args.include_coding:
pass
else:
if inclusion_biotypes and \
tx_model.transcript.biotype not in inclusion_biotypes:
continue
if exclusion_biotypes and \
tx_model.transcript.biotype in exclusion_biotypes:
continue
if tx_id in proteome:
continue
if tx_model.transcript_len() < args.min_tx_length:
continue

try:
peptides, orfs = call_noncoding_peptide_main(
Expand All @@ -149,7 +158,7 @@ def call_noncoding_peptide(args:argparse.Namespace) -> None:
orf_pool.extend(orfs)

for peptide in peptides:
noncanonical_pool.add_peptide(peptide, canonical_peptides,
noval_orf_peptide_pool.add_peptide(peptide, canonical_peptides,
cleavage_params)
except ReferenceSeqnameNotFoundError as e:
if not ReferenceSeqnameNotFoundError.raised:
Expand All @@ -163,7 +172,7 @@ def call_noncoding_peptide(args:argparse.Namespace) -> None:
if i % 5000 == 0:
logger.info('%i transcripts processed.', i)

noncanonical_pool.write(args.output_path)
noval_orf_peptide_pool.write(args.output_path)
if args.output_orf:
with open(args.output_orf, 'w') as handle:
write_orf(orf_pool, handle)
Expand Down
6 changes: 6 additions & 0 deletions moPepGen/svgraph/VariantPeptideDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ def __init__(self, query:FeatureLocation, ref:FeatureLocation, feature_type:str,
self.feature_id = feature_id
self.variant_id = variant_id

def __repr__(self):
""" str """
return f"<PeptideSegment query={self.query} ref={self.ref}" +\
f" feature_type={self.feature_type} feature_id={self.feature_id}" +\
f" variant_id={self.variant_id}>"

def merge(self, other:PeptideSegment) -> PeptideSegment:
""" merge """
query = FeatureLocation(
Expand Down
64 changes: 0 additions & 64 deletions test/integration/test_call_alt_start.py

This file was deleted.

Loading

0 comments on commit 7b51502

Please sign in to comment.