-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathICGC_API_pull.R
117 lines (93 loc) · 4.12 KB
/
ICGC_API_pull.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
require(jsonlite)
require(curl)
require(plyr)
#An example of the function input.
#Specify the project name, fields, and number of entries(size) to pull in each list
data <- list(list(project = "THCA-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 6659),
list(project = "ORCA-IN", fields = c("id", "mutation", "chromosome", "start", "end"), size = 13626),
list(project = "THCA-SA", fields = c("id", "mutation", "chromosome", "start", "end"), size = 45126),
list(project = "LUSC-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 65063),
list(project = "LUSC-KR", fields = c("id", "mutation", "chromosome", "start", "end"), size = 64671),
list(project = "LUSC-CN", fields = c("id", "mutation", "chromosome", "start", "end"), size = 419),
list(project = "KIRC-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 26371),
list(project = "LAML-KR", fields = c("id", "mutation", "chromosome", "start", "end"), size = 42977))
#Function to pull data from ICGC Rest API and convert to UpSetR data frame
icgcData <- function(data){
aggregateData <- data.frame()
projectNameCol <- data.frame()
IDs <- list()
for(i in 1:length(data)){
IDs[[i]] <- c(0)
size <- data[[i]]$size
remainder <- size%%100
size <- floor(size/100)
if(remainder != 0){
size <- size + 1
}
for(j in seq(size)){
if(j == size && remainder != 0){
count <- remainder
}
else{
count <- 100
}
from <- ((j-1)*100)+1
partURL <- ""
for(k in 1:length(data[[i]]$fields)){
field <- data[[i]]$fields[k]
if(k != 1){
field <- paste0("%2C", field)
}
partURL <- paste0(partURL, field)
}
url <- paste0("https://dcc.icgc.org:443/api/v1/projects/",
data[[i]]$project, "/mutations?field=",
partURL, "&&&from=", as.character(from),
"&size=",as.character(count),"&&order=desc")
temp <- fromJSON(url)$hits
aggregateData <- rbind(aggregateData, temp)
IDs[[i]] <- c(IDs[[i]], as.vector(temp$id))
print(paste("On page", as.character(j), "of project", data[[i]]$project))
}
addcolumn <- as.data.frame(matrix(data = rep(data[[i]]$project, data[[i]]$size), nrow = data[[i]]$size, ncol = 1))
names(addcolumn) <- "project"
projectNameCol <- rbind(projectNameCol, addcolumn)
}
aggregateData <- cbind(aggregateData, projectNameCol)
num <- nrow(aggregateData)
projects <- c()
for(i in 1:length(data)){
name <- data[[i]]$project
setCol <- rep(0, num)
setCol <- as.data.frame(setCol)
names(setCol) <- name
aggregateData <- cbind(aggregateData, setCol)
projects[i] <- name
}
aggregateData <- aggregateData[-which(duplicated(aggregateData$id)), ]
for(i in 1:length(data)){
IDs[[i]] <- IDs[[i]][-1]
aggregateData[which(aggregateData$id %in% IDs[[i]]), projects[i]] <- 1
}
projects <- apply(aggregateData, 1, function(x){x <- x[-c(1:6)]; x <- names(x[which(x == "1")]); x <- paste(unlist(x), collapse = " + ")})
projects <- as.character(projects)
aggregateData$project <- projects
names(aggregateData$project) <- "project"
return(aggregateData)
}
#Run the function to generate a data frame compatible with UpSetR
myData <- icgcData(data)
setdata <- data.frame(
projects= c("THCA-US", "THCA-SA", "ORCA-IN", "BLCA-US",
"BLCA-CN", "LUSC-CN", "LUSC-US", "LUSC-KR",
"KIRC-US", "KIRP-US", "LAML-KR"),
Donors = c(507, 15, 119, 412, 103, 10, 504, 36, 537, 291, 171),
Site = c("Head & Neck", "Head & Neck", "Head & Neck",
"Bladder", "Bladder", "Lung", "Lung", "Lung", "Kidney",
"Kidney", "Blood"),
Country = c("United States", "Saudi Arabia", "India", "United States",
"China", "China", "United States", "South Korea",
"United States", "United States", "South Korea")
)
setdata$Country <- as.character(setdata$Country)
setdata$Site <- as.character(setdata$Site)