-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathQoala_T_A_model_based_github.R
126 lines (109 loc) · 6.11 KB
/
Qoala_T_A_model_based_github.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
## Qoala-T: Estimations of MRI Qoala-T using BrainTime model
# Code to reproduce step 4 of our Qoala-T Tool
# Copyright (C) 2017-2019 Lara Wierenga - Leiden University, Brain and Development Research Center
#
# This package contains data and R code for use of the Qoala-T tool based on the BrainTime model:
#
# title: Qoala-T: A supervised-learning tool for quality control of FreeSurfer segmented MRI data
# author:
# - name: Klapwijk, E.T., van de Kamp, F., Meulen, M., Peters, S. and Wierenga, L.M.
# https://doi.org/10.1016/j.neuroimage.2019.01.014
#
# If you have any question or suggestion, dont hesitate to get in touch:
# https://github.com/Qoala-T/QC/issues
## ============================
# dependencies: the following packages are used in this code
packages <- c("caret", "corrplot", "gbm", "plyr", "randomForest", "e1071",
"pROC", "DMwR","dplyr","pbkrtest","car","pbkrtest","doParallel","ROSE","repmis")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
lapply(packages, library, character.only = TRUE)
## ============================
# EDIT THIS PART
# -----------------------------------------------------------------
# set inputFolder and outputFolder
# -----------------------------------------------------------------
# Input directory to your data file
inputFolder <- "~/Desktop/input_datafiles/"
# Create output directory if it doesnt exist
outputFolder <- "~/Desktop/Output_Qoala_T/"
ifelse(dir.exists(outputFolder),FALSE,dir.create(outputFolder))
# -----------------------------------------------------------------
# EDIT THIS PART
# -----------------------------------------------------------------
# Load your dataset
# -----------------------------------------------------------------
# set working directory and dataset name (here the output of Stats2Table is expected,
# see https://github.com/Qoala-T/QC/blob/master/Scripts/Stats2Table/Stats2Table.R
setwd(inputFolder)
dataset_name <- "Dataset_Name" # Provide name of your study and/or dataset (same as dataset name in Stats2Table script)
stats2Table <- read.csv(paste("FreeSurfer_Output_", dataset_name, ".csv", sep=""), header=T, row.names=1)
test_data <- stats2Table
dataset_name <- "your_dataset_name"
# -----------------------------------------------------------------
# Or Load example with simulated data
# -----------------------------------------------------------------
# This is an example file
# githubURL <- "https://github.com/Qoala-T/QC/blob/master/ExampleData/simulated_data_A_model.Rdata?raw=true","Qoala_T_model"
# test_data <- get(load(url(githubURL)))
# -----------------------------------------------------------------
# Load model Qoala-T model based on BrainTime data
# -----------------------------------------------------------------
githubURL <- "https://github.com/Qoala-T/QC/blob/master/Qoala_T_model.Rdata?raw=true"
rf.tune <- get(load(url(githubURL)))
# -----------------------------------------------------------------
#reorder colnames of dataset to match trainingset
# -----------------------------------------------------------------
dataset_colnames <- names(rf.tune$trainingData)[-ncol(rf.tune$trainingData)]
testing <- test_data[,dataset_colnames]
testing <- testing[complete.cases(testing),]
# -----------------------------------------------------------------
## External validation of unseen data on Qoala-T model
# -----------------------------------------------------------------
rf.pred <- predict(rf.tune,testing)
rf.probs <- predict(rf.tune,testing,type="prob") # probability of belonging in either category (certainty..)
head(rf.probs)
# -----------------------------------------------------------------
# Saving output
# ----------------------------------------------------------------
# create empty data frame
Qoala_T_predictions <- data.frame(matrix(ncol = 4, nrow = nrow(rf.probs)))
colnames(Qoala_T_predictions) = c('participant_id','Scan_QoalaT', 'Recommendation', 'manual_QC_adviced')
# fill data frame
Qoala_T_predictions$participant_id <- row.names(rf.probs)
Qoala_T_predictions$Scan_QoalaT <- rf.probs$Include*100
Qoala_T_predictions$Recommendation <- rf.pred
Qoala_T_predictions$manual_QC_adviced <- ifelse(Qoala_T_predictions$Scan_QoalaT<70&Qoala_T_predictions$Scan_QoalaT>30,"yes","no")
Qoala_T_predictions <- Qoala_T_predictions[order(Qoala_T_predictions$Scan_QoalaT, Qoala_T_predictions$participant_id),]
csv_Qoala_T_predictions = paste(outputFolder,'Qoala_T_predictions_model_based_',dataset_name,'.csv', sep = '')
write.csv(Qoala_T_predictions, file = csv_Qoala_T_predictions, row.names=F)
# -----------------------------------------------------------------
# PLOT results
# -----------------------------------------------------------------
excl_rate <- table(Qoala_T_predictions$Recommendation)
fill_colour <- rev(c("#88A825","#CF4A30"))
font_size <- 12
text_col <- "Black"
p <- ggplot(Qoala_T_predictions, aes(x=Scan_QoalaT,y=1,col=Recommendation)) +
annotate("rect", xmin=30, xmax=70, ymin=1.12, ymax=.88, alpha=0.2, fill="#777777") +
geom_jitter(alpha=.8,height=.1,size=6) +
ggtitle(paste("Qoala-T estimation model based ",dataset_name,"\nMean Qoala-T Score = ",round(mean(Qoala_T_predictions$Scan_QoalaT),1),sep="")) +
annotate("text", x=20, y=1.15, label=paste("Excluded = ",as.character(round(excl_rate[1]))," scans",sep="")) +
annotate("text", x=80, y=1.15, label=paste("Included = ",as.character(round(excl_rate[2]))," scans",sep="")) +
scale_colour_manual(values=fill_colour) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.text.x = element_text (size = font_size,color=text_col),
axis.text.y = element_blank(),
axis.title.x = element_text (size = font_size,color=text_col),
axis.title.y = element_blank(),
axis.ticks=element_blank(),
plot.title=element_text (size =16,color=text_col,hjust=.5)
)
print(p)
filename<- paste(outputFolder,"Figure_Rating_model_based_",dataset_name,".pdf",sep="")
dev.copy(pdf,filename,width=30/2.54, height=20/2.54)
dev.off()