-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtesting_data_compiler.py
123 lines (91 loc) · 2.85 KB
/
testing_data_compiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# %%
import os
import pandas as pd
import re
experimentInfoDir = r"C:\Users\dfoss\Documents\Projects\RaderLab\RaderLabCode\ExperimentInfo"
all_experiments = {}
all_variables = {}
#_Replicate is necessary for the numbering replicates other then that the column data can be what ever you want.
data = pd.read_csv(r"C:\Users\dfoss\Documents\Projects\RaderLab\RNASeqData\AllCombinedRNASeqData.csv", index_col=0)
# %%
def py_initializeExpInfo():
for root, dirs, files in os.walk(experimentInfoDir):
for file in files:
all_experiments[file[:-4]] = {}
with open(os.path.join(root,file), "r") as csv_file:
for line in csv_file:
key, val = line.split(",")
all_experiments[file[:-4]][key] = val.rstrip("\n")
if key in all_variables:
all_variables[key].add(val.rstrip("\n"))
else:
all_variables[key] = set([val.rstrip("\n")])
# %%
def py_getExp(filter=None):
if not filter:
# global all_experiments
return sorted(list(all_experiments.keys()))
#Right now filter is just OR of all given elements
keep = set()
for val in filter:
for key in all_experiments.keys():
if val in key:
keep.add(val)
return sorted(list(keep))
# %%
def py_getExpInfo(key):
return(all_experiments[key])
# %%
def py_getExpVariables():
return sorted(list(all_variables))
# %%
def py_getExpVariableOptions(variable):
return sorted(list(all_variables[variable]))
# %%
def py_getColumnsOfExp(experiments):
re_exp = "|".join(experiments)
col_keep = []
for col in data.columns:
if re.match(re_exp, col):
col_keep.append(col)
col_keep.sort()
return col_keep
# %%
def py_getExperimentData(experiments):
col_keep = py_getColumnsOfExp(experiments)
return data[col_keep]
# %%
def py_getAnnotation(experiments):
col_keep = py_getColumnsOfExp(experiments)
annontation = pd.DataFrame()
for exp in experiments:
for col in col_keep:
if exp in col:
hold_s = pd.Series(all_experiments[exp])
hold_s.name = col
annontation = annontation.append(hold_s)
annontation = annontation.dropna(axis=1)
return annontation
# %%
def py_combineVariables(annotation, variables):
variables = sorted(variables)
annotation["_".join(annotation)] = annotation[variables].agg("_".join, axis=1)
return annotation
# %%
py_initializeExpInfo()
# %%
print(py_getExp())
# %%
print(py_getExpInfo('LP_49h'))
#%%
print(py_getExpVariables())
#%%
print(py_getExpVariableOptions("time"))
# %%
py_getExperimentData(["LP_49h", "RM_49h"])
# %%
df = py_getAnnotation(["LP_49h", "RM_49h"])
df
# %%
py_combineVariables(df, ["time","condition"])
# %%