forked from yoann-dufresne/alignmentfreeTP1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloading.py
42 lines (32 loc) · 1.1 KB
/
loading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from os import listdir, path
def load_fna(filename):
""" Loads a fasta formated file into a list of sequences.
:param str filename: The file to load
:return Array: An array of strings where each string is a sequence from the fasta
"""
texts = []
txt = []
with open(filename) as fp:
for line in fp:
if line[0] == '>':
if len(txt) > 0:
texts.append("".join(txt))
txt = []
else:
txt.append(line.strip())
if len(txt) > 0:
texts.append("".join(txt))
return texts
def load_directory(directory):
""" Loads all the fasta files from a directory
:param str directory: Path to the directory to load.
:return dict: A dict containing pairs filename: sequence array.
"""
files = {}
for filename in listdir(directory):
if filename[filename.rfind('.')+1:] in ["fa", "fasta", "fna"]:
files[filename] = load_fna(path.join(directory, filename))
return files
if __name__ == "__main__":
files = load_directory("data")
print(len(files))