Skip to content

Commit

Permalink
Merge branch 'devel' into issue_342
Browse files Browse the repository at this point in the history
  • Loading branch information
lima1 committed Sep 13, 2024
2 parents f1530d5 + 59dad0e commit f14ac86
Show file tree
Hide file tree
Showing 14 changed files with 247 additions and 22 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Package: PureCN
Type: Package
Title: Copy number calling and SNV classification using
targeted short read sequencing
Version: 2.9.4
Date: 2024-01-12
Version: 2.11.2
Date: 2024-09-13
Authors@R: c(person("Markus", "Riester",
role = c("aut", "cre"),
email = "markus.riester@novartis.com",
Expand Down Expand Up @@ -69,4 +69,4 @@ biocViews: CopyNumberVariation, Software, Sequencing,
VariantAnnotation, VariantDetection, Coverage, ImmunoOncology
NeedsCompilation: no
ByteCompile: yes
RoxygenNote: 7.2.3.9000
RoxygenNote: 7.3.1
14 changes: 6 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM bioconductor/bioconductor_docker:RELEASE_3_18
FROM bioconductor/bioconductor_docker:RELEASE_3_19
#FROM bioconductor/bioconductor_docker:devel

# install base packages
Expand Down Expand Up @@ -37,11 +37,9 @@ ENV GENOMICSDB_BRANCH=master
RUN mkdir $GENOMICSDB_PATH
ENV INSTALL_PREFIX=$GENOMICSDB_PATH
ENV PREREQS_ENV=$GENOMICSDB_PATH/genomicsdb_prereqs.sh
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
#ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64
ENV MAVEN_VERSION=3.9.5

RUN ls $JAVA_HOME
#ARG TARGETPLATFORM
#RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64"; else JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64; fi
#ENV MAVEN_VERSION=3.9.5

WORKDIR /tmp

Expand All @@ -63,7 +61,7 @@ remotes::install_github("nalinigans/GenomicsDB-R", ref="master", configure.args=

# install PureCN
RUN Rscript -e 'BiocManager::install("PureCN", dependencies = TRUE)'
#RUN Rscript -e 'BiocManager::install("lima1/PureCN", ref = "RELEASE_3_18", dependencies = TRUE)'
#RUN Rscript -e 'BiocManager::install("lima1/PureCN", ref = "RELEASE_3_19", dependencies = TRUE)'
ENV PURECN=/usr/local/lib/R/site-library/PureCN/extdata

# add symbolic link and paths
Expand All @@ -72,7 +70,7 @@ WORKDIR /opt
RUN ln -s $PURECN /opt/PureCN

# install GATK4
ENV GATK_VERSION="4.4.0.0"
ENV GATK_VERSION="4.5.0.0"
RUN wget --no-verbose https://github.com/broadinstitute/gatk/releases/download/${GATK_VERSION}/gatk-${GATK_VERSION}.zip && \
unzip gatk-${GATK_VERSION}.zip -d /opt && \
rm gatk-${GATK_VERSION}.zip
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(adjustLogRatio)
export(annotateTargets)
export(bootstrapResults)
export(calculateBamCoverageByInterval)
Expand All @@ -22,6 +23,7 @@ export(filterVcfBasic)
export(filterVcfMuTect)
export(filterVcfMuTect2)
export(findFocal)
export(findHighQualitySNPs)
export(getSexFromCoverage)
export(getSexFromVcf)
export(plotAbs)
Expand Down
22 changes: 22 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
Changes in version 2.12.0
-------------------------

NEW FEATURES

o Added --num-eigen-vectors to PureCN.R to set the number of eigen vectors.
Tuning parameter for coverage normalization. Default should in most cases
be a good compromise between removing most normal noise and not
overfitting to pool of normal samples.
o Added findHighQualitySNPs function to extract good SNPs from mapping bias
database.


Changes in version 2.10.0
-------------------------

NEW FEATURES
o adjustLogRatio function for adjusting a tumor vs normal coverage
ratio for purity and ploidy. Useful for downstream tools that
expect ratios instead of absolute copy numbers such as GISTIC.
Thanks @tedtoal (#40).

SIGNIFICANT USER-VISIBLE CHANGES

o Provide interval-level likelihood scores in runAbsoluteCN return
Expand All @@ -11,6 +30,9 @@ BUGFIXES

o Bugfix #296 was not merged into the developer branch and did not make
it into 2.8.0.
o Log ratios not shiften to median of sample medians as intended (#356).
Thanks @sleyn.
o Fixed crash with small toy examples (fewer than 2000 baits, #363)


Changes in version 2.8.0
Expand Down
40 changes: 40 additions & 0 deletions R/adjustLogRatio.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Adjust tumor vs. normal coverage log ratio for tumor purity and ploidy
#'
#' This function can be used to adjust the log ratio for tumor purity and
#' ploidy for downstream tools that expect a log2 ratio (for example GISTIC).
#'
#'
#' @param ratio Vector of log2 tumor vs normal coverage ratios.
#' @param purity Purity of sample.
#' @param ploidy Ploidy of sample.
#' @param is.log2 \code{log.ratio} is \code{log2} transformed.
#' @param min.ratio Minimum (non-log2-transformed) ratio. Set to approx -8
#' \code{log2} adjusted.
#' @return \code{numeric(length(log.ratio))}, \code{log.ratio} adjusted
#' for \code{purity} and \code{ploidy}
#' @author Markus Riester
#' @references
# * Zack et al. (2012), Pan-cancer patterns of somatic copy number alteration
#' Nature Biotechnology.
#' * Toal (2018), https://github.com/lima1/PureCN/issues/40
#'
#' @examples
#'
#' normal.coverage.file <- system.file("extdata", "example_normal.txt.gz",
#' package = "PureCN")
#' tumor.coverage.file <- system.file("extdata", "example_tumor.txt.gz",
#' package = "PureCN")
#' normal <- readCoverageFile(normal.coverage.file)
#' tumor <- readCoverageFile(tumor.coverage.file)
#' log.ratio <- calculateLogRatio(normal, tumor)
#' log.ratio.adjusted <- adjustLogRatio(log.ratio, 0.65, 1.73)
#'
#' @export adjustLogRatio
adjustLogRatio <- function(ratio, purity, ploidy, is.log2 = TRUE, min.ratio = 2^-8) {
if (is.log2) ratio <- 2^ratio
adjusted <- (purity * ploidy * ratio + 2 * (1 - purity) * ratio - 2 * (1 - purity)) / (purity * ploidy)
adjusted <- pmax(min.ratio, adjusted)
if (is.log2) adjusted <- log2(adjusted)
return(adjusted)
}

47 changes: 47 additions & 0 deletions R/calculateMappingBiasVcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,50 @@ calculateMappingBiasGatk4 <- function(workspace, reference.genome,
gr$clustered[idxa] <- TRUE
return(gr)
}

#' Find High Quality SNPs
#'
#' Function to extract high quality SNPs from the mapping bias database.
#' Useful for generating fingerprinting panels etc.
#'
#'
#' @param mapping.bias.file Generated by \code{\link{calculateMappingBiasVcf}}.
#' @param max.bias Maximum mapping bias
#' @param min.pon Minimum number of normal samples, useful to get reliable
#' mapping bias.
#' @param triallelic By default, ignore positions with multiple alt alleles.
#' @param vcf.file Optional VCF file (for example dbSNP). Needs to be
#' bgzip and tabix processed.
#' @param genome See \code{readVcf}
#' @return A \code{GRanges} object with mapping bias passing filters.
#' If \code{vcf.file} is provided, it will be the variants in the
#' corresponding file overlapping with the passed variants.
#' @author Markus Riester
#' @examples
#'
#' normal.panel.vcf <- system.file("extdata", "normalpanel.vcf.gz",
#' package = "PureCN")
#' bias <- calculateMappingBiasVcf(normal.panel.vcf, genome = "h19")
#'
#' @export findHighQualitySNPs
findHighQualitySNPs <- function(mapping.bias.file, max.bias = 0.2, min.pon = 2,
triallelic = FALSE, vcf.file = NULL, genome) {
if (is(mapping.bias.file, "GRanges")) {
bias <- mapping.bias.file
} else {
bias <- readRDS(mapping.bias.file)
}
bias <- bias[which(abs(bias$bias - 1) <= max.bias & (triallelic | !bias$triallelic) & bias$pon.count >= min.pon), ]
if (is.null(vcf.file)) return(bias)
vcfSeqStyle <- .getSeqlevelsStyle(headerTabix(
TabixFile(vcf.file))$seqnames)

biasRenamedSL <- bias
if (!length(intersect(seqlevelsStyle(bias), vcfSeqStyle))) {
seqlevelsStyle(biasRenamedSL) <- vcfSeqStyle[1]
}
flog.info("Reading VCF...")
vcf <- readVcf(vcf.file, genome = genome, ScanVcfParam(which = reduce(biasRenamedSL)))
ov <- findOverlaps(biasRenamedSL, vcf, type = "equal")
return(vcf[subjectHits(ov)])
}
12 changes: 6 additions & 6 deletions R/createNormalDatabase.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,17 +204,17 @@ optimal.off.target.counts = 120, plot = FALSE, ...) {
fcnts_std[, i] <- fcnts_std[,i] / iv
}
} else {
fcnts_std <- apply(fcnts,2,function(x) x/fcnts_interval_medians)
fcnts_std <- apply(fcnts, 2, function(x) x / fcnts_interval_medians)
}
fcnts_interval_non_zero_medians <- apply(fcnts_std, 1, function(x) median(x[x>0]))
fcnts_std_imp <- apply(fcnts_std, 2, function(x) { x[x<=0] <- fcnts_interval_non_zero_medians[x<=0]; x})
p=0.001
li <- quantile(as.vector(fcnts_std_imp), probs= c(p, 1-p))
fcnts_interval_non_zero_medians <- apply(fcnts_std, 1, function(x) median(x[x > 0]))
fcnts_std_imp <- apply(fcnts_std, 2, function(x) { x[x <= 0] <- fcnts_interval_non_zero_medians[x <= 0]; x})
p <- 0.001
li <- quantile(as.vector(fcnts_std_imp), probs = c(p, 1 - p))
fcnts_std_trunc <- fcnts_std_imp
fcnts_std_trunc[fcnts_std_imp < li[1]] <- li[1]
fcnts_std_trunc[fcnts_std_imp > li[2]] <- li[2]
fcnts_std_final <- apply(fcnts_std_trunc, 2, function(x) log2(x / median(x)))
fcnts_std_final - median(apply(fcnts_std_final,2,median))
fcnts_std_final <- fcnts_std_final - median(apply(fcnts_std_final, 2, median))
s <- svd(fcnts_std_final)

ret <- list(
Expand Down
1 change: 1 addition & 0 deletions R/segmentationCBS.R
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ plot.cnv = TRUE, max.segments = NULL, min.logr.sdev = 0.15, chr.hash = chr.hash)
}

.getAverageWeightPV <- function(seg, weights, perm = 2000) {
perm <- min(length(weights), perm)
num_marks <- sort(unique(seg$num.mark))
.do_permutation <- function(i, l) {
if (l > 25) return(0)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ For outdated R/Bioconductor versions, you can try backporting the latest stable
version (this should work fine for Bioconductor 3.3 and later):

```
BiocManager::install("lima1/PureCN", ref = "RELEASE_3_18")
BiocManager::install("lima1/PureCN", ref = "RELEASE_3_19")
```

If you want the latest and greatest from the developer branch:
Expand Down
5 changes: 4 additions & 1 deletion inst/extdata/PureCN.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ option_list <- list(
make_option(c("--min-fraction-offtarget"), action = "store", type = "double",
default = formals(PureCN::filterIntervals)$min.fraction.offtarget,
help = "Interval Filter: Ignore off-target internals when only the specified fraction of all intervals are off-target intervals [default %default]"),
make_option(c("--num-eigen-vectors"), action = "store", type = "integer",
default = formals(PureCN::calculateTangentNormal)$num.eigen,
help = "Coverage Normalization: Number of eigen vectors when --normaldb is provided [default %default]"),
make_option(c("--fun-segmentation"), action = "store", type = "character", default = "CBS",
help = "Segmentation: Algorithm. CBS, PSCBS, GATK4, Hclust, or none [default %default]"),
make_option(c("--alpha"), action = "store", type = "double",
Expand Down Expand Up @@ -283,7 +286,7 @@ if (file.exists(file.rds) && !opt$force) {
if (!is.null(normalDB)) {
if (is.null(normal.coverage.file)) {
normal.coverage.file <- calculateTangentNormal(tumor.coverage.file,
normalDB)
normalDB, num.eigen = opt[["num_eigen_vectors"]])
}
} else if (is.null(normal.coverage.file) && is.null(seg.file) &&
is.null(log.ratio)) {
Expand Down
47 changes: 47 additions & 0 deletions man/adjustLogRatio.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/filterVcfMuTect2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions man/findHighQualitySNPs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test_adjustLogRatio.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
context("adjustLogRatio")

test_that("Function returns expected values for example coverage", {
data(purecn.example.output)
log.ratio <- purecn.example.output$results[[1]]$seg$seg.mean
purity <- purecn.example.output$results[[1]]$purity
ploidy <- purecn.example.output$results[[1]]$ploidy
log.ratio.adjusted <- adjustLogRatio(log.ratio, purity, ploidy)
total.ploidy <- 1.73
p <- 1
log.ratio.offset <- 0
opt.C <- (2^(log.ratio.adjusted + log.ratio.offset) * total.ploidy)/p - ((2 * (1 - p))/p)
expect_lt(abs(min(log.ratio.adjusted, na.rm=TRUE) + 8), 0.001)
expect_lt(median(abs(opt.C - purecn.example.output$results[[1]]$seg$C)), 0.1)
})

0 comments on commit f14ac86

Please sign in to comment.