Skip to content

Commit

Permalink
Merge pull request #106 from azodichr/splatPop
Browse files Browse the repository at this point in the history
Add the splatPop simulation, an extension to splat that includes eQTLs
  • Loading branch information
lazappi authored Oct 22, 2020
2 parents 5671553 + 0b4b165 commit 9606839
Show file tree
Hide file tree
Showing 52 changed files with 2,824 additions and 33 deletions.
18 changes: 14 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 1.13.0
Date: 2020-04-29
Version: 1.13.1
Date: 2020-10-22
Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"),
email = "luke@lazappi.id.au",
comment = c(ORCID = "0000-0001-7744-8565")),
person("Belinda", "Phipson", role = c("aut"),
email = "belinda.phipson@petermac.org",
comment = c(ORCID = "0000-0002-1711-7454")),
person("Christina", "Azodi", role = c("ctb"),
email = "cazodi@svi.edu.au",
comment = c(ORCID = "0000-0002-6097-606X")),
person("Alicia", "Oshlack", role = c("aut"),
email = "alicia.oshlack@petermac.org",
comment = c(ORCID = "0000-0001-9788-5690")))
Expand Down Expand Up @@ -45,12 +48,14 @@ Suggests:
BiocStyle,
covr,
cowplot,
magick,
knitr,
limSolve,
lme4,
progress,
pscl,
testthat,
preprocessCore,
rmarkdown,
scDD,
scran,
Expand All @@ -63,12 +68,17 @@ Suggests:
spelling,
igraph,
DropletUtils,
BiocSingular
BiocSingular,
VariantAnnotation,
Biostrings,
GenomeInfoDb,
GenomicRanges,
IRanges
biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing,
Software, ImmunoOncology
URL: https://github.com/Oshlack/splatter
BugReports: https://github.com/Oshlack/splatter/issues
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
Encoding: UTF-8
VignetteBuilder: knitr
Language: en-GB
20 changes: 20 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ export(makeDiffPanel)
export(makeOverallPanel)
export(mfaEstimate)
export(mfaSimulate)
export(mockBulkMatrix)
export(mockBulkeQTL)
export(mockGFF)
export(mockVCF)
export(newBASiCSParams)
export(newKersplatParams)
export(newLun2Params)
Expand All @@ -54,6 +58,7 @@ export(newSCDDParams)
export(newSimpleParams)
export(newSparseDCParams)
export(newSplatParams)
export(newSplatPopParams)
export(newZINBParams)
export(phenoEstimate)
export(phenoSimulate)
Expand All @@ -66,6 +71,11 @@ export(simpleSimulate)
export(sparseDCEstimate)
export(sparseDCSimulate)
export(splatEstimate)
export(splatPopEstimate)
export(splatPopQuantNorm)
export(splatPopSimulate)
export(splatPopSimulateMeans)
export(splatPopSimulateSC)
export(splatSimulate)
export(splatSimulateGroups)
export(splatSimulatePaths)
Expand All @@ -83,13 +93,15 @@ exportClasses(SCDDParams)
exportClasses(SimpleParams)
exportClasses(SparseDCParams)
exportClasses(SplatParams)
exportClasses(SplatPopParams)
exportClasses(ZINBParams)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply)
importFrom(S4Vectors,"metadata<-")
importFrom(S4Vectors,metadata)
importFrom(SingleCellExperiment,"cpm<-")
importFrom(SingleCellExperiment,SingleCellExperiment)
importFrom(SingleCellExperiment,cbind)
importFrom(SingleCellExperiment,cpm)
importFrom(SummarizedExperiment,"assays<-")
importFrom(SummarizedExperiment,"colData<-")
Expand Down Expand Up @@ -129,7 +141,9 @@ importFrom(ggplot2,theme)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(grDevices,boxplot.stats)
importFrom(locfit,locfit)
importFrom(matrixStats,rowMedians)
importFrom(methods,"slot<-")
importFrom(methods,as)
importFrom(methods,callNextMethod)
Expand All @@ -141,14 +155,17 @@ importFrom(methods,slotNames)
importFrom(methods,validObject)
importFrom(stats,aggregate)
importFrom(stats,approxfun)
importFrom(stats,complete.cases)
importFrom(stats,cor)
importFrom(stats,dbeta)
importFrom(stats,density)
importFrom(stats,dnbinom)
importFrom(stats,ks.test)
importFrom(stats,median)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,nls)
importFrom(stats,quantile)
importFrom(stats,rbinom)
importFrom(stats,rchisq)
importFrom(stats,rgamma)
Expand All @@ -157,5 +174,8 @@ importFrom(stats,rnbinom)
importFrom(stats,rnorm)
importFrom(stats,rpois)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,shapiro.test)
importFrom(utils,data)
importFrom(utils,head)
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# DEVELOPMENT VERSION

## Version 1.13.1 (2020-10-22)

* Add the splatPop simulation (PR #106)

## Version 1.13.0 (2020-04-29)

* Bioconductor 3.12 devel
Expand Down
98 changes: 98 additions & 0 deletions R/AllClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ setClass("Params",
prototype = prototype(nGenes = 10000, nCells = 100,
seed = sample(seq_len(1e6), 1)))


#' The SimpleParams class
#'
#' S4 class that holds parameters for the simple simulation.
Expand Down Expand Up @@ -417,6 +418,103 @@ setClass("KersplatParams",
ambient.scale = 0.05,
ambient.nEmpty = 0))

#' The SplatPopParams class
#'
#' S4 class that holds parameters for the splatPop simulation.
#'
#' @section Parameters:
#'
#' In addition to the \code{\link{SplatParams}} parameters, splatPop simulation
#' requires the following parameters:
#'
#' \describe{
#' \item{\code{[similarity.scale]}}{Scaling factor for pop.cv.param.rate,
#' where values larger than 1 increase the similarity between individuals in
#' the population and values less than one make the individuals less
#' similar.}
#' \item{\code{[eqtl.n]}}{The number (>1) or percent (<=1) of genes to
#' assign eQTL effects.}
#' \item{\code{[eqtl.dist]}}{Maximum distance between eSNP and eGene}
#' \item{\code{[eqtl.maf.min]}}{Minimum Minor Allele Frequency of eSNPs.}
#' \item{\code{[eqtl.maf.max]}}{Maximum Minor Allele Frequency of eSNPs.}
#' \item{\code{[eqtl.group.specific]}}{Percent of eQTL effects to simulate
#' as group specific.}
#' \item{\emph{eQTL Effect size distribution parameters. Defaults estimated
#' from GTEx eQTL mapping results, see vignette for more information.}}{
#' \describe{
#' \item{\code{eqtl.ES.shape}}{Shape parameter for the effect size
#' gamma distribution.}
#' \item{\code{eqtl.ES.rate}}{Rate parameter for the effect size
#' gamma distribution.}
#' }
#' }
#' \item{\emph{Bulk Mean Expression distribution parameters. Defaults
#' estimated from GTEx data, see vignette for more information.}}{
#' \describe{
#' \item{\code{pop.mean.shape}}{Shape parameter for the mean (i.e.
#' bulk) expression gamma distribution}
#' \item{\code{pop.mean.rate}}{Rate parameter for the mean (i.e.
#' bulk) expression gamma distribution}
#' }
#' }
#' \item{\emph{Bulk Expression Coefficient of Variation distribution
#' parameters binned. Defaults estimated from GTEx data, see vignette for
#' more information.}}{
#' \describe{
#' \item{\code{pop.cv.param}}{Dataframe containing gene
#' mean bin range, and the CV shape, and CV rate parameters for
#' each of those bins.}
#' }
#' }
#'}
#' The parameters not shown in brackets can be estimated from real data using
#' \code{\link{splatPopEstimate}}. For details of the eQTL simulation
#' see \code{\link{splatPopSimulate}}.
#'
#' @name SplatPopParams
#' @rdname SplatPopParams
#' @aliases SplatPopParams-class
#' @exportClass SplatPopParams
setClass("SplatPopParams",
contains = "SplatParams",
slots = c(similarity.scale = "numeric",
pop.mean.shape = "numeric",
pop.mean.rate = "numeric",
pop.cv.bins = "numeric",
pop.cv.param = "data.frame",
eqtl.n = "numeric",
eqtl.dist = "numeric",
eqtl.maf.min = "numeric",
eqtl.maf.max = "numeric",
eqtl.ES.shape = "numeric",
eqtl.ES.rate = "numeric",
eqtl.group.specific = "numeric"),
prototype = prototype(similarity.scale = 1.0,
pop.mean.shape = 0.3395709,
pop.mean.rate = 0.008309486,
pop.cv.bins = 10,
pop.cv.param =
data.frame(
start = c(0, 0.476, 0.955, 1.86, 3.49,
6.33, 10.4, 16.3, 26.5,49.9),
end = c(0.476 ,0.955, 1.86, 3.49, 6.33,
10.4, 16.3, 26.5, 49.9, 1e+10),
shape = c(11.636709, 5.084263, 3.161149,
2.603407, 2.174618, 2.472718,
2.911565, 3.754947, 3.623545,
2.540001),
rate = c(8.229737, 3.236401, 1.901426,
1.615142, 1.467896, 2.141105,
3.005807, 4.440894, 4.458207,
2.702462)),
eqtl.n = 1,
eqtl.dist = 1000000,
eqtl.maf.min = 0.05,
eqtl.maf.max = 0.5,
eqtl.ES.shape = 2.538049,
eqtl.ES.rate = 5.962323,
eqtl.group.specific = 0.2))

#' The LunParams class
#'
#' S4 class that holds parameters for the Lun simulation.
Expand Down
94 changes: 94 additions & 0 deletions R/SplatPopParams-methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#' @rdname newParams
#' @importFrom methods new
#' @export
newSplatPopParams <- function(...) {

for (pkg in c("VariantAnnotation", "preprocessCore")) {
if (!requireNamespace(pkg, quietly = TRUE)) {
stop("The splatPop simulation requires the ", pkg, " package.")
}
}

params <- new("SplatPopParams")
params <- setParams(params, ...)

return(params)
}


#' @importFrom checkmate checkInt checkIntegerish checkNumber checkNumeric
#' checkFlag
setValidity("SplatPopParams", function(object) {

v <- getParams(object, c(slotNames(object)))

checks <- c(eqtl.n = checkNumber(v$eqtl.n, lower = 0),
eqtl.dist = checkInt(v$eqtl.dist, lower = 1),
eqtl.maf.min = checkNumber(v$eqtl.maf.min, lower = 0,
upper = 0.5),
eqtl.maf.max = checkNumber(v$eqtl.maf.max, lower = 0,
upper = 0.5),
eqtl.ES.shape = checkNumber(v$eqtl.ES.shape, lower = 0),
eqtl.ES.rate = checkNumber(v$eqtl.ES.rate, lower = 0),
eqtl.group.specific = checkNumber(v$eqtl.group.specific,
lower = 0, upper = 1),
pop.mean.shape = checkNumber(v$pop.mean.shape, lower = 0),
pop.mean.rate = checkNumber(v$pop.mean.rate, lower = 0),
pop.cv.bins = checkInt(v$pop.cv.bins, lower = 1),
pop.cv.param = checkDataFrame(v$pop.cv.param),
similarity.scale = checkNumber(v$similarity.scale, lower = 0))

if (all(checks == TRUE)) {
valid <- TRUE
} else {
valid <- checks[checks != TRUE]
valid <- paste(names(valid), valid, sep = ": ")
}

return(valid)
})


#' @importFrom methods callNextMethod
setMethod("show", "SplatPopParams", function(object) {

pp <- list("Population params:" = c("(mean.shape)" = "pop.mean.shape",
"(mean.rate)" = "pop.mean.rate",
"[similarity.scale]" = "similarity.scale",
"[cv.bins]" = "pop.cv.bins",
"(cv.params)" = "pop.cv.param"),
"eQTL params:" = c("[eqtl.n]" = "eqtl.n",
"[eqtl.dist]" = "eqtl.dist",
"[eqtl.maf.min]" = "eqtl.maf.min",
"[eqtl.maf.max]" = "eqtl.maf.max",
"[eqtl.group.specific]" = "eqtl.group.specific",
"(eqtl.ES.shape)" = "eqtl.ES.shape",
"(eqtl.ES.rate)" = "eqtl.ES.rate"))

callNextMethod()
showPP(object, pp)
})


#' @rdname setParam
setMethod("setParam", "SplatPopParams", function(object, name, value) {
checkmate::assertString(name)

# splatPopParam checks
if (name == "pop.cv.param") {
if (getParam(object, "pop.cv.bins") != nrow(value)) {
stop("Need to set pop.cv.bins to length of pop.cv.param")
}
}

if (name == "eqtl.maf.min") {
if (getParam(object, "eqtl.maf.min") >= getParam(object, "eqtl.maf.max")) {
stop("Range of acceptable Minor Allele Frequencies is too small...
Be sure eqtl.maf.min < eqtl.maf.max.")
}
}

object <- callNextMethod()

return(object)
})
6 changes: 6 additions & 0 deletions R/listSims.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ listSims <- function(print = TRUE) {
"The Kersplat simulation extends the Splat model by adding a
gene network, more complex cell structure, doublets and
empty cells (Experimental)."),
c("splatPop", "splatPop", "",
"Oshlack/splatter",
"The splatPop simulation enables splat simulations to be
generated for multiple individuals in a population,
accounting for correlation structure by simulating
expression quantitative trait loci (eQTL)."),
c("Simple", "simple", "10.1186/s13059-017-1305-0",
"Oshlack/splatter",
"A simple simulation with gamma means and negative binomial
Expand Down
Loading

0 comments on commit 9606839

Please sign in to comment.