forked from hassanfa/autoseq-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqdnaseq.R
executable file
·99 lines (86 loc) · 3.51 KB
/
qdnaseq.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env Rscript
###########################################
# check command line
library(getopt)
#long, short, argmask, datatype, desk
#argmask 0=no arg, 1=req, 2=optional
args <- rbind(c("bam", "b", 1, "character", "Tumor low-pass WGS bam file"),
c("output", "o", 1, "character", "Output tsv with bed file with segmented copy numbers"),
c("background", "x", 2, "character", "Background set to use, as a RData file"))
# opts <- getopt(args,opt = c("--bam", "~/L11466T_wgs.bam",
# "--output", "~/tmp/L11466T_wgs.qdnaseq.txt",
# "--background", "~/bin/autoseqer/resources/qdnaseq_background.Rdata") )
opts <- getopt(args)
if(is.null(opts$bam)){
stop("Must specify input bam file --bam/-b.")
}
if(is.null(opts$output)){
stop("Must specify output tsv file name --segments/-s.")
}
# if(is.null(opts$background)){
# stop("Must specify background file --background/-x.")
# }
###########################################
# load packages
library(data.table)
library(QDNAseq)
library(CGHcall)
library(Biobase)
###########################################
# run qdnaseq
if(! is.null(opts$background)){
bgReadCounts <- readRDS(opts$background)
}
bins <- getBinAnnotations(binSize=15)
readCounts <- binReadCounts(bins, bamfiles=opts$bam)
sampleName <- readCounts@phenoData@data$name
if(! is.null(opts$background)){
if( sampleName %in% bgReadCounts@phenoData@data$name ){
allReadCounts <- bgReadCounts
} else {
allReadCounts <- combine(readCounts, bgReadCounts)
}
}else{
allReadCounts <- readCounts
}
readCountsFiltered <- applyFilters(allReadCounts,residual=TRUE, blacklist=TRUE)
readCountsFiltered <- estimateCorrection(readCountsFiltered)
copyNumbers <- correctBins(readCountsFiltered)
copyNumbersNormalized <- normalizeBins(copyNumbers)
copyNumbersSmooth <- smoothOutlierBins(copyNumbersNormalized)
copyNumbersSegmented <- segmentBins(copyNumbersSmooth)
copyNumbersSegmentedNormalized <- normalizeSegmentedBins(copyNumbersSegmented)
if(! is.null(opts$background) ){
copyNumbersCalled <- callBins(copyNumbersSegmentedNormalized, cellularity=.75)
}
##################################################################
## Make final data matrix
dat <- data.table( copyNumbersSegmentedNormalized@featureData@data )
dat[, readcount := readCounts@assayData$counts]
dat[, copynumber := copynumber(copyNumbersSegmentedNormalized)[,sampleName] ]
dat[, segmented := segmented(copyNumbersSegmentedNormalized)[,sampleName] ]
if(! is.null(opts$background) ){
dat[, call := calls(copyNumbersCalled)[,sampleName] ]
dat[, probdloss := probdloss(copyNumbersCalled)[,sampleName] ]
dat[, probloss := probloss(copyNumbersCalled)[,sampleName] ]
dat[, probnorm := probnorm(copyNumbersCalled)[,sampleName] ]
dat[, probgain := probgain(copyNumbersCalled)[,sampleName] ]
dat[, probamp := probamp(copyNumbersCalled)[,sampleName] ]
dat[, probdloss := probdloss(copyNumbersCalled)[,sampleName] ]
dat[, probdloss := probdloss(copyNumbersCalled)[,sampleName] ]
}
##################################################################
## Supress scientific notation in output files
options(scipen=999)
##################################################################
## Write to outfile, gzip if outfile end with gz.
cat("Writing outfile...\n")
ofile <- opts$output
if( grepl("gz$", opts$output) ){
ofile <- gzfile( opts$output, 'w' )
}
write.table(dat, ofile, col.names=TRUE, dec=".", quote=FALSE, sep="\t", row.names=FALSE)
if( grepl("gz$", opts$output) ){
close(ofile)
}
cat("Done.\n")