-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunctionCalls.py
149 lines (131 loc) · 4.99 KB
/
functionCalls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import csv
from prettytable import PrettyTable
# const locations of information in the list
FILE_LOC = 0 # filename location
RSID_LOC = 1 # rsid location
CHRO_LOC = 2 # chromosome location
POS_LOC = 3 # position of the SNP location
GENO_LOC = 4 # genotype location
class Genotype:
def __init__(self):
self.most_occur = ''
self.average = 0
self.geno = {
'TT': 0,
'AA': 0,
'CC': 0,
'GG': 0,
'AT': 0,
'TA': 0,
'AC': 0,
'CA': 0,
'AG': 0,
'GA': 0,
'GT': 0,
'TG': 0,
'CT': 0,
'TC': 0,
'CG': 0,
'GC': 0
}
# holds all the actions we are going to be doing
class FunctionCalls:
def __init__(self):
self.data = []
self.geno = Genotype()
self.rs12913832 = Genotype()
self.rs1393350 = Genotype()
self.rs1800407 = Genotype()
self.rs1805008 = Genotype()
self.rs7495174 = Genotype()
self.rs16891982 = Genotype()
self.rsids = {
'rs12913832': self.rs12913832,
'rs1393350': self.rs1393350,
'rs1800407': self.rs1800407,
'rs1805008': self.rs1805008,
'rs7495174': self.rs7495174,
'rs16891982': self.rs16891982
}
# reads in and parses all files in the directory
# formats the data as a list containing filename, rsid, chromosome, position, genotype
# then stores as a 2d list
def parse_file(self, directory):
for file in os.listdir(directory):
filename = directory + '/' + file
f = open(filename, "r")
text = f.readlines()
for line in text:
words = line.split()
if words[0] in self.rsids:
words.insert(0, file)
self.data.append(words)
# displays data neatly
def display_table(self):
table = PrettyTable()
table.field_names = ["filename", "rsid", "chromosome", "position", "genotype"]
for x in self.data:
table.add_row(x)
print(table)
# takes in another function calls object and prints a table comparison of the results
def compare_results(self, hairColor, other, otherHairColor):
table = PrettyTable()
table.field_names = ["Hair color", "RSID", "Genotype"]
for rsid in self.rsids:
temp = [hairColor, rsid, self.rsids[rsid].most_occur]
table.add_row(temp)
temp = [otherHairColor, rsid, other.rsids[rsid].most_occur]
table.add_row(temp)
print(table)
# compares the genotypes of a given rsid and two hair colors
def compare_genos_by_rsid(self, rsid, self_color, other, other_color):
table = PrettyTable()
table.field_names = ["Genotype", self_color, other_color]
for genos in self.rsids[rsid].geno:
temp = [genos, self.rsids[rsid].geno[genos], other.rsids[rsid].geno[genos]]
table.add_row(temp)
print(table)
# takes the list of data and then counts all of the individual genotype pairs and stores it
def count_genotypes(self):
for genotype in self.data:
if genotype[GENO_LOC] in self.geno.geno:
self.geno.geno[genotype[GENO_LOC]] += 1
# displays the count of the total number of each genotype
def display_total_genos(self):
for genotype in self.geno.geno.items():
print(genotype)
# counts and stores the count of each genotype in a each rsid and the highest occurrences
def count_geno_by_rsid(self):
for genotype in self.data:
rsid = genotype[RSID_LOC]
geno = genotype[GENO_LOC]
if geno in self.rsids[rsid].geno:
self.rsids[rsid].geno[geno] += 1
for rsid in self.rsids:
max_count = 0
for key in self.rsids[rsid].geno:
if self.rsids[rsid].geno[key] > max_count:
max_count = self.rsids[rsid].geno[key]
self.rsids[rsid].average = max_count
# displays the occurrences of genotypes by each rsid
def display_geno_by_rsid(self):
for rsid in self.rsids:
print(rsid)
for item in self.rsids[rsid].geno.items():
print(item)
# stores a string of the most common occurrences
def get_most_occur(self):
for rsid in self.rsids:
flag = True
for key in self.rsids[rsid].geno:
if self.rsids[rsid].geno[key] >= self.rsids[rsid].average:
if not flag:
self.rsids[rsid].most_occur += ' or ' + key
else:
self.rsids[rsid].most_occur += key
flag = False
# prints the most common occurrences of genotypes in each rsids
def display_most_occur(self):
for rsid in self.rsids:
print(rsid + " " + self.rsids[rsid].most_occur)