Skip to content

Commit

Permalink
removed > from id in output txt file
Browse files Browse the repository at this point in the history
  • Loading branch information
signalbash committed May 5, 2020
1 parent 22ab4de commit 929850d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
9 changes: 6 additions & 3 deletions borf/borf.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,11 @@ def main():
orf_data_strand_bias = orf_data_strand_bias.drop_duplicates('id', keep='first')

if len(orf_data_strand_bias) >= 10:
pos_neg_bias = orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"].value_counts()
positive_strand_bias = pos_neg_bias[0] / (pos_neg_bias[0]+pos_neg_bias[1])

pos_bias = (orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"] == "+").sum()
neg_bias = (orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"] == "-").sum()
positive_strand_bias = pos_bias / (pos_bias+neg_bias)

if positive_strand_bias > 0.7 and args.strand == True:
#data is likely from a stranded assembly.
print("Are you sure your input .fasta file isn't stranded?")
Expand All @@ -118,8 +121,8 @@ def main():
min_upstream_length=args.upstream_incomplete_length,
genetic_code=args.genetic_code)

write_orf_fasta(orf_data, output_path_pep)
write_orf_data(orf_data, output_path_txt)
write_orf_fasta(orf_data, output_path_pep)

start_seq_n = (i*batch_size) + 1
end_seq_n = min(start_seq_n + (batch_size - 1), n_seqs)
Expand Down
4 changes: 2 additions & 2 deletions borf/get_orfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def get_orfs(all_sequences, both_strands=False, min_orf_length=100,
# add ORF classification
orf_df['orf_class'] = add_orf_classification(orf_df)
# Generate ids for writing to fasta
orf_df['fasta_id'] = ('>' + orf_df.id + '.orf' + orf_df.isoform_number.map(str) + ' ' + orf_df.orf_class + ':' + orf_df.start_site_nt.map(str) + '-' + orf_df.stop_site_nt.map(str) + ' strand:' + orf_df.strand.map(str))
orf_df['fasta_id'] = (orf_df.id + '.orf' + orf_df.isoform_number.map(str) + ' ' + orf_df.orf_class + ':' + orf_df.start_site_nt.map(str) + '-' + orf_df.stop_site_nt.map(str) + ' strand:' + orf_df.strand.map(str))

return orf_df

Expand Down Expand Up @@ -735,7 +735,7 @@ def write_orf_fasta(orf_df, file_out):
path to file to write fasta sequences
"""

orf_df['fasta_id'] = '>' + orf_df.fasta_id
orf_df.to_csv(file_out, mode = 'a', index=False, sep='\n', header=False, columns=['fasta_id', 'orf_sequence'])

def batch_iterator(iterator, batch_size):
Expand Down

0 comments on commit 929850d

Please sign in to comment.