-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneuronal_helper.py
64 lines (54 loc) · 1.93 KB
/
neuronal_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
@author__ = "Juan Francisco Illan"
@license__ = "GPL"
@version__ = "1.0.1"
@email__ = "juanfrancisco.illan@gmail.com"
"""
import numpy as np
def parse_alpha_to_seq(sequence):
output = np.arange(len(sequence))
for i in range(0, len(sequence)):
snippet = sequence[i]
if snippet == 'A':
output[i] = 0
elif snippet == 'C':
output[i] = 1
elif snippet == 'T':
output[i] = 2
elif snippet == 'G':
output[i] = 3
elif snippet == 'N':
output[i] = -1
else:
raise AssertionError("Cannot handle snippet: " + snippet)
return output
def to_categorical(y, nb_classes=None):
'''Convert class vector (integers from 0 to nb_classes)
to binary class matrix, for use with categorical_crossentropy
'''
y = np.asarray(y, dtype='int32')
if not nb_classes:
nb_classes = np.max(y) + 1
Y = np.zeros((len(y), nb_classes))
for i in range(len(y)):
if y[i] != -1:
Y[i, y[i]] = 1.
return Y
def do_one_hot_encoding(sequence, seq_length, f=parse_alpha_to_seq):
X = np.zeros((sequence.shape[0], seq_length, 4))
for idx in range(0, len(sequence)):
X[idx] = to_categorical(f(sequence[idx]), 4)
return X
def parse_protein_to_array(output, value, class_length):
for i in range(0, class_length):
if value == i:
output[i] = '1'
return output
def do_one_hot_encoding_protein_clasif(values, class_length, f=parse_protein_to_array):
X = np.zeros((values.shape[0], class_length))
for idx in range(0, len(values)):
X[idx] = f(X[idx], values[idx], class_length)
return X
# function to convert sequence strings into k-mer words, default size = 6 (hexamer words)
def getKmers(sequence, size):
return [sequence[x:x+size].lower() for x in range(len(sequence) - size + 1)]