-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDistance_Projection_spearman_Part3.R
executable file
·126 lines (100 loc) · 3.08 KB
/
Distance_Projection_spearman_Part3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
rm(list = ls())
if (is.integer(dev.list())) {
dev.off()
}
cat("\014")
set.seed(1)
head2 <- function(x)
head(x)[, 1:5]
`%!in%` <- Negate(`%in%`)
library(readr)
library(dplyr)
library(tidyr)
#library(edgeR)
library(preprocessCore)
print("step 3")
### this is dataset provided by merve of genes that are never expressed, used to filter out genes that we are not interested in
never_exp <-
read.table(
"./never_exp_GMM_dream_comp.txt",
quote = "\"",
comment.char = ""
)
###creating a matching cell_id name
cell_id <-
c("ASPC1",
"DU145",
"EFO21",
"NCIH1793",
"HCC1143",
"LNCAPCLONEFGC",
"U87MG")
### CCLE expression
ccle_dat <- read.csv("./data_ccle_RNAseq_DREAMv2_FIXED.csv")
###filtering for overlapping genes
ccle_dat <- ccle_dat %>% filter(X %!in% never_exp$V1)
colnames(ccle_dat)[1] <- "gene"
ccle_cells <- colnames(ccle_dat)[-1]
### At this point ccle_dat and mean_exp_known have both been put in a form where both can be used, pushed through umap
spearman_pipe <- function(rna_seq_dat) {
rows <- rna_seq_dat$gene
rna_seq_dat$gene <- NULL
cols <- colnames(rna_seq_dat)
rna_seq_dat <-
normalize.quantiles(as.matrix(rna_seq_dat)) %>% as.data.frame()
colnames(rna_seq_dat) <- cols
rownames(rna_seq_dat) <- rows
dist_df <- cor(rna_seq_dat, method = "spearman")
return(dist_df)
}
ccle_dist <- spearman_pipe(ccle_dat)
###this is sensitivity date of 11 cell lines against 30 drugs.
sensitivity_pred <-
read.csv("./sens_v2.txt", sep="") %>% filter(X %!in% c("PANC1", "HSTS", "KRJ1", "HF2597"))
rownames(sensitivity_pred) <-
c("ASPC1",
"DU145",
"EFO21",
"NCIH1793",
"HCC1143",
"LNCAPCLONEFGC",
"U87MG")
drugs <- colnames(sensitivity_pred)
drugs <- drugs[1:30]
ccle_dist_dat <- ccle_dist %>% as.matrix() %>% as.data.frame()
ccle_dist_dat <-
ccle_dist_dat %>% dplyr::select(ASPC1, DU145, EFO21, NCIH1793, HCC1143, LNCAPCLONEFGC, U87MG)
ccle_dist_dat$cell = rownames(ccle_dist_dat)
ccle_dist_dat <-
ccle_dist_dat %>% filter(cell %in% colnames(ccle_dist_dat))
sensitivity_pred$cell = rownames(sensitivity_pred)
lm_perturb_dat <- merge(ccle_dist_dat, sensitivity_pred)
results <-
ccle_dist %>% as.matrix() %>% as.data.frame() %>% dplyr::select()
for (drug in drugs) {
sensitivity_pred_models <-
lm(
eval(parse(text = drug)) ~ ASPC1 + DU145 + EFO21 + NCIH1793 + HCC1143 + LNCAPCLONEFGC + U87MG,
lm_perturb_dat
)
temp <- ccle_dist %>% as.matrix() %>% as.data.frame()
temp <-
temp %>% dplyr::select(ASPC1, DU145, EFO21, NCIH1793, HCC1143, LNCAPCLONEFGC, U87MG)
temp$cell = rownames(temp)
lm_estimates <- predict(sensitivity_pred_models, temp)
results[drug] <- lm_estimates
}
head(results)
range01 <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
res <- range01(results)
head(res)
###ordering to the template
template_final <- read.csv("./template_final.csv")
res$cell_line <- rownames(res)
rownames(res) <- NULL
try_res <- res[colnames(template_final)]
try_res <-
try_res[match(template_final$cell_line, try_res$cell_line), ]
write.csv(try_res, "/output/submission_final.csv", row.names = FALSE)