-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnucleotide_composition.py
35 lines (27 loc) · 1.4 KB
/
nucleotide_composition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# set the name of the input sequence file
filename = 'dna.txt'
# open the input file & assign to file handle called "infile"
infile = open(filename, 'r') #creating a file handle
# read the file & save contents of the file as a variable
dna_sequence = infile.read().rstrip() # the .rstrip() method removes the "enter" return at the end of the code, leaves only the sequence
#close the file connection
infile.close()
# Find the length of the DNA sequence
seqlen = len(dna_sequence)
print("Sequence length:", seqlen, "bp")
# count the number of each nucleotide in the sequence and divide by total number of characters
freqA = dna_sequence.count("A") / seqlen
freqC = dna_sequence.count("C") / seqlen
freqG = dna_sequence.count("G") / seqlen
freqT = dna_sequence.count("T") / seqlen
freqGC = round((dna_sequence.count("G") + dna_sequence.count("C") )/ seqlen, 3)
#check to see if the frequencies add up to 1 (so all characters are one of the nucleotides, not another letter)
if (freqA + freqC + freqG + freqT) == 1:
# print the frequencies of each nucleotide for the sequence
print("Freq of A:", round(freqA, 3))
print("Freq of C:", round(freqC, 3))
print("Freq of G:", round(freqG, 3))
print("Freq of T:", round(freqT, 3))
print("G+C content:", round(freqGC, 3))
else:
print("Oh no! It appears this sequence is not composed of only A,G,T,C characters. Is the input only a nucleotide sequence (without a header")