diff --git a/DESCRIPTION b/DESCRIPTION
index d3bd9814..74659a01 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.13.0
-Date: 2020-04-29
+Version: 1.13.1
+Date: 2020-10-22
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
       email = "luke@lazappi.id.au",
@@ -10,6 +10,9 @@ Authors@R:
       person("Belinda", "Phipson", role = c("aut"),
       email = "belinda.phipson@petermac.org",
       comment = c(ORCID = "0000-0002-1711-7454")),
+      person("Christina", "Azodi", role = c("ctb"),
+      email = "cazodi@svi.edu.au",
+      comment = c(ORCID = "0000-0002-6097-606X")),
       person("Alicia", "Oshlack", role = c("aut"),
       email = "alicia.oshlack@petermac.org",
       comment = c(ORCID = "0000-0001-9788-5690")))
@@ -45,12 +48,14 @@ Suggests:
     BiocStyle,
     covr,
     cowplot,
+    magick,
     knitr,
     limSolve,
     lme4,
     progress,
     pscl,
     testthat,
+    preprocessCore,
     rmarkdown,
     scDD,
     scran,
@@ -63,12 +68,17 @@ Suggests:
     spelling,
     igraph,
     DropletUtils,
-    BiocSingular
+    BiocSingular,
+    VariantAnnotation,
+    Biostrings,
+    GenomeInfoDb,
+    GenomicRanges,
+    IRanges
 biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing,
     Software, ImmunoOncology
 URL: https://github.com/Oshlack/splatter
 BugReports: https://github.com/Oshlack/splatter/issues
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
 Encoding: UTF-8
 VignetteBuilder: knitr
 Language: en-GB
diff --git a/NAMESPACE b/NAMESPACE
index b598fdb6..09d64700 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -44,6 +44,10 @@ export(makeDiffPanel)
 export(makeOverallPanel)
 export(mfaEstimate)
 export(mfaSimulate)
+export(mockBulkMatrix)
+export(mockBulkeQTL)
+export(mockGFF)
+export(mockVCF)
 export(newBASiCSParams)
 export(newKersplatParams)
 export(newLun2Params)
@@ -54,6 +58,7 @@ export(newSCDDParams)
 export(newSimpleParams)
 export(newSparseDCParams)
 export(newSplatParams)
+export(newSplatPopParams)
 export(newZINBParams)
 export(phenoEstimate)
 export(phenoSimulate)
@@ -66,6 +71,11 @@ export(simpleSimulate)
 export(sparseDCEstimate)
 export(sparseDCSimulate)
 export(splatEstimate)
+export(splatPopEstimate)
+export(splatPopQuantNorm)
+export(splatPopSimulate)
+export(splatPopSimulateMeans)
+export(splatPopSimulateSC)
 export(splatSimulate)
 export(splatSimulateGroups)
 export(splatSimulatePaths)
@@ -83,6 +93,7 @@ exportClasses(SCDDParams)
 exportClasses(SimpleParams)
 exportClasses(SparseDCParams)
 exportClasses(SplatParams)
+exportClasses(SplatPopParams)
 exportClasses(ZINBParams)
 importFrom(BiocParallel,SerialParam)
 importFrom(BiocParallel,bplapply)
@@ -90,6 +101,7 @@ importFrom(S4Vectors,"metadata<-")
 importFrom(S4Vectors,metadata)
 importFrom(SingleCellExperiment,"cpm<-")
 importFrom(SingleCellExperiment,SingleCellExperiment)
+importFrom(SingleCellExperiment,cbind)
 importFrom(SingleCellExperiment,cpm)
 importFrom(SummarizedExperiment,"assays<-")
 importFrom(SummarizedExperiment,"colData<-")
@@ -129,7 +141,9 @@ importFrom(ggplot2,theme)
 importFrom(ggplot2,theme_minimal)
 importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
+importFrom(grDevices,boxplot.stats)
 importFrom(locfit,locfit)
+importFrom(matrixStats,rowMedians)
 importFrom(methods,"slot<-")
 importFrom(methods,as)
 importFrom(methods,callNextMethod)
@@ -141,6 +155,7 @@ importFrom(methods,slotNames)
 importFrom(methods,validObject)
 importFrom(stats,aggregate)
 importFrom(stats,approxfun)
+importFrom(stats,complete.cases)
 importFrom(stats,cor)
 importFrom(stats,dbeta)
 importFrom(stats,density)
@@ -148,7 +163,9 @@ importFrom(stats,dnbinom)
 importFrom(stats,ks.test)
 importFrom(stats,median)
 importFrom(stats,model.matrix)
+importFrom(stats,na.omit)
 importFrom(stats,nls)
+importFrom(stats,quantile)
 importFrom(stats,rbinom)
 importFrom(stats,rchisq)
 importFrom(stats,rgamma)
@@ -157,5 +174,8 @@ importFrom(stats,rnbinom)
 importFrom(stats,rnorm)
 importFrom(stats,rpois)
 importFrom(stats,runif)
+importFrom(stats,sd)
+importFrom(stats,setNames)
 importFrom(stats,shapiro.test)
+importFrom(utils,data)
 importFrom(utils,head)
diff --git a/NEWS.md b/NEWS.md
index 5c469027..cee24790 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # DEVELOPMENT VERSION
 
+## Version 1.13.1 (2020-10-22)
+
+* Add the splatPop simulation (PR #106)
+
 ## Version 1.13.0 (2020-04-29)
 
 * Bioconductor 3.12 devel
diff --git a/R/AllClasses.R b/R/AllClasses.R
index bc1ce616..cc9dc41d 100644
--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -25,6 +25,7 @@ setClass("Params",
          prototype = prototype(nGenes = 10000, nCells = 100,
                                seed = sample(seq_len(1e6), 1)))
 
+
 #' The SimpleParams class
 #'
 #' S4 class that holds parameters for the simple simulation.
@@ -417,6 +418,103 @@ setClass("KersplatParams",
                                ambient.scale = 0.05,
                                ambient.nEmpty = 0))
 
+#' The SplatPopParams class
+#'
+#' S4 class that holds parameters for the splatPop simulation.
+#'
+#' @section Parameters:
+#'
+#' In addition to the \code{\link{SplatParams}} parameters, splatPop simulation
+#' requires the following parameters:
+#'
+#' \describe{
+#'     \item{\code{[similarity.scale]}}{Scaling factor for pop.cv.param.rate,
+#'     where values larger than 1 increase the similarity between individuals in
+#'     the population and values less than one make the individuals less
+#'     similar.}
+#'     \item{\code{[eqtl.n]}}{The number (>1) or percent (<=1) of genes to
+#'     assign eQTL effects.}
+#'     \item{\code{[eqtl.dist]}}{Maximum distance between eSNP and eGene}
+#'     \item{\code{[eqtl.maf.min]}}{Minimum Minor Allele Frequency of eSNPs.}
+#'     \item{\code{[eqtl.maf.max]}}{Maximum Minor Allele Frequency of eSNPs.}
+#'     \item{\code{[eqtl.group.specific]}}{Percent of eQTL effects to simulate
+#'     as group specific.}
+#'     \item{\emph{eQTL Effect size distribution parameters. Defaults estimated
+#'     from GTEx eQTL mapping results, see vignette for more information.}}{
+#'         \describe{
+#'             \item{\code{eqtl.ES.shape}}{Shape parameter for the effect size
+#'             gamma distribution.}
+#'             \item{\code{eqtl.ES.rate}}{Rate parameter for the effect size
+#'             gamma distribution.}
+#'         }
+#'     }
+#'     \item{\emph{Bulk Mean Expression distribution parameters. Defaults
+#'     estimated from GTEx data, see vignette for more information.}}{
+#'         \describe{
+#'             \item{\code{pop.mean.shape}}{Shape parameter for the mean (i.e.
+#'             bulk) expression gamma distribution}
+#'             \item{\code{pop.mean.rate}}{Rate parameter for the mean (i.e.
+#'             bulk) expression gamma distribution}
+#'         }
+#'     }
+#'     \item{\emph{Bulk Expression Coefficient of Variation distribution
+#'     parameters binned. Defaults estimated from GTEx data, see vignette for
+#'     more information.}}{
+#'         \describe{
+#'             \item{\code{pop.cv.param}}{Dataframe containing gene
+#'             mean bin range, and the CV shape, and CV rate parameters for
+#'             each of those bins.}
+#'         }
+#'     }
+#'}
+#' The parameters not shown in brackets can be estimated from real data using
+#' \code{\link{splatPopEstimate}}. For details of the eQTL simulation
+#' see \code{\link{splatPopSimulate}}.
+#'
+#' @name SplatPopParams
+#' @rdname SplatPopParams
+#' @aliases SplatPopParams-class
+#' @exportClass SplatPopParams
+setClass("SplatPopParams",
+         contains = "SplatParams",
+         slots = c(similarity.scale = "numeric",
+                   pop.mean.shape = "numeric",
+                   pop.mean.rate = "numeric",
+                   pop.cv.bins = "numeric",
+                   pop.cv.param = "data.frame",
+                   eqtl.n = "numeric",
+                   eqtl.dist = "numeric",
+                   eqtl.maf.min = "numeric",
+                   eqtl.maf.max = "numeric",
+                   eqtl.ES.shape = "numeric",
+                   eqtl.ES.rate = "numeric",
+                   eqtl.group.specific = "numeric"),
+         prototype = prototype(similarity.scale = 1.0,
+                               pop.mean.shape = 0.3395709,
+                               pop.mean.rate = 0.008309486,
+                               pop.cv.bins = 10,
+                               pop.cv.param =
+                                   data.frame(
+                                       start = c(0, 0.476, 0.955, 1.86, 3.49,
+                                                 6.33, 10.4, 16.3, 26.5,49.9),
+                                       end = c(0.476 ,0.955, 1.86, 3.49, 6.33,
+                                               10.4, 16.3, 26.5, 49.9, 1e+10),
+                                       shape = c(11.636709, 5.084263, 3.161149,
+                                                 2.603407, 2.174618, 2.472718,
+                                                 2.911565, 3.754947, 3.623545,
+                                                 2.540001),
+                                       rate = c(8.229737, 3.236401, 1.901426,
+                                                1.615142, 1.467896, 2.141105,
+                                                3.005807, 4.440894, 4.458207,
+                                                2.702462)),
+                               eqtl.n = 1,
+                               eqtl.dist = 1000000,
+                               eqtl.maf.min = 0.05,
+                               eqtl.maf.max = 0.5,
+                               eqtl.ES.shape = 2.538049,
+                               eqtl.ES.rate = 5.962323,
+                               eqtl.group.specific = 0.2))
+
 #' The LunParams class
 #'
 #' S4 class that holds parameters for the Lun simulation.
diff --git a/R/SplatPopParams-methods.R b/R/SplatPopParams-methods.R
new file mode 100644
index 00000000..71a96c79
--- /dev/null
+++ b/R/SplatPopParams-methods.R
@@ -0,0 +1,94 @@
+#' @rdname newParams
+#' @importFrom methods new
+#' @export
+newSplatPopParams <- function(...) {
+
+    for (pkg in c("VariantAnnotation", "preprocessCore")) {
+        if (!requireNamespace(pkg, quietly = TRUE)) {
+            stop("The splatPop simulation requires the ", pkg, " package.")
+        }
+    }
+
+    params <- new("SplatPopParams")
+    params <- setParams(params, ...)
+
+    return(params)
+}
+
+
+#' @importFrom checkmate checkInt checkIntegerish checkNumber checkNumeric
+#' checkFlag
+setValidity("SplatPopParams", function(object) {
+
+    v <- getParams(object, c(slotNames(object)))
+
+    checks <- c(eqtl.n = checkNumber(v$eqtl.n, lower = 0),
+                eqtl.dist = checkInt(v$eqtl.dist, lower = 1),
+                eqtl.maf.min = checkNumber(v$eqtl.maf.min, lower = 0,
+                                           upper = 0.5),
+                eqtl.maf.max = checkNumber(v$eqtl.maf.max, lower = 0,
+                                           upper = 0.5),
+                eqtl.ES.shape = checkNumber(v$eqtl.ES.shape, lower = 0),
+                eqtl.ES.rate = checkNumber(v$eqtl.ES.rate, lower = 0),
+                eqtl.group.specific = checkNumber(v$eqtl.group.specific,
+                                                 lower = 0, upper = 1),
+                pop.mean.shape = checkNumber(v$pop.mean.shape, lower = 0),
+                pop.mean.rate = checkNumber(v$pop.mean.rate, lower = 0),
+                pop.cv.bins = checkInt(v$pop.cv.bins, lower = 1),
+                pop.cv.param = checkDataFrame(v$pop.cv.param),
+                similarity.scale = checkNumber(v$similarity.scale, lower = 0))
+
+    if (all(checks == TRUE)) {
+        valid <- TRUE
+    } else {
+        valid <- checks[checks != TRUE]
+        valid <- paste(names(valid), valid, sep = ": ")
+    }
+
+    return(valid)
+})
+
+
+#' @importFrom methods callNextMethod
+setMethod("show", "SplatPopParams", function(object) {
+
+    pp <- list("Population params:" = c("(mean.shape)" = "pop.mean.shape",
+                                        "(mean.rate)" = "pop.mean.rate",
+                                        "[similarity.scale]" = "similarity.scale",
+                                        "[cv.bins]" = "pop.cv.bins",
+                                        "(cv.params)" = "pop.cv.param"),
+               "eQTL params:" = c("[eqtl.n]"    = "eqtl.n",
+                                  "[eqtl.dist]" = "eqtl.dist",
+                                  "[eqtl.maf.min]" = "eqtl.maf.min",
+                                  "[eqtl.maf.max]" = "eqtl.maf.max",
+                                  "[eqtl.group.specific]" = "eqtl.group.specific",
+                                  "(eqtl.ES.shape)" = "eqtl.ES.shape",
+                                  "(eqtl.ES.rate)" = "eqtl.ES.rate"))
+
+    callNextMethod()
+    showPP(object, pp)
+})
+
+
+#' @rdname setParam
+setMethod("setParam", "SplatPopParams", function(object, name, value) {
+    checkmate::assertString(name)
+
+    # splatPopParam checks
+    if (name == "pop.cv.param") {
+        if (getParam(object, "pop.cv.bins") != nrow(value)) {
+            stop("Need to set pop.cv.bins to length of pop.cv.param")
+        }
+    }
+
+    if (name == "eqtl.maf.min") {
+        if (getParam(object, "eqtl.maf.min") >= getParam(object, "eqtl.maf.max")) {
+            stop("Range of acceptable Minor Allele Frequencies is too small...
+                 Be sure eqtl.maf.min < eqtl.maf.max.")
+        }
+    }
+
+    object <- callNextMethod()
+
+    return(object)
+})
diff --git a/R/listSims.R b/R/listSims.R
index 891bb408..48f67a0c 100644
--- a/R/listSims.R
+++ b/R/listSims.R
@@ -36,6 +36,12 @@ listSims <- function(print = TRUE) {
                    "The Kersplat simulation extends the Splat model by adding a
                    gene network, more complex cell structure, doublets and
                    empty cells (Experimental)."),
+                 c("splatPop", "splatPop", "",
+                   "Oshlack/splatter",
+                   "The splatPop simulation enables splat simulations to be 
+                   generated for multiple individuals in a population, 
+                   accounting for correlation structure by simulating 
+                   expression quantitative trait loci (eQTL)."),
                  c("Simple", "simple", "10.1186/s13059-017-1305-0",
                    "Oshlack/splatter",
                    "A simple simulation with gamma means and negative binomial
diff --git a/R/mock-data.R b/R/mock-data.R
new file mode 100644
index 00000000..48899cc6
--- /dev/null
+++ b/R/mock-data.R
@@ -0,0 +1,156 @@
+#' Generate mock gff
+#'
+#' Quick function to generate a mock gff.
+#'
+#' @param n.genes Number of genes in mock gff file
+#' @param chromosome Chromosome name
+#'
+#' @return data.frame containing mock gff data.
+#'
+#' @export
+#'
+mockGFF <- function(n.genes = 500, chromosome = 22){
+
+    mock.gff <- data.frame(list(V1 = chromosome,
+                                V2 = "source",
+                                V3 = "gene",
+                                V4 = sort(sample(1e4:2e8, n.genes))))
+    mock.gff$V5 <- mock.gff$V4 + floor(rnorm(n.genes, 1500, 1000))
+    mock.gff[, c("V6", "V7", "V8", "V9")] <- "."
+
+    return(mock.gff)
+}
+
+
+#' Generate mock vcf
+#'
+#' Quick function to generate mock vcf file. Note this data has unrealistic
+#' population structure.
+#'
+#' @param n.snps Number of SNPs in mock vcf file.
+#' @param n.samples Number of samples in mock bulk data.
+#' @param chromosome Chromosome name
+#'
+#' @return data.frame containing mock gff data.
+#'
+#' @importFrom stats setNames
+#'
+#' @export
+#'
+mockVCF <- function(n.snps = 1e4, n.samples = 10, chromosome = 22){
+
+
+    if (!requireNamespace("VariantAnnotation", quietly = TRUE)) {
+        stop("Creating a mock VCF requires the 'VariantAnnotation' package.")
+    }
+
+    sample_names <- paste0("sample_", formatC(seq_len(n.samples),
+                                              width = nchar(n.samples),
+                                              format = "d",
+                                              flag = "0"))
+    snp_names <- paste0("snp_", formatC(seq_len(n.snps),
+                                        width = nchar(n.snps),
+                                        format = "d", flag = "0"))
+    # rowRanges
+    vcf.rowRanges <- GenomicRanges::GRanges(
+        seqnames = S4Vectors::Rle(rep(chromosome, n.snps)),
+        ranges = IRanges::IRanges(sample(1:2e8, n.snps, replace = FALSE),
+                                  names = snp_names),
+        strand = S4Vectors::Rle(BiocGenerics::strand(rep("*", n.snps))),
+        paramRangeID = S4Vectors::Rle(rep(NA, n.snps))
+    )
+
+    # Geno
+    genotypes <- c(rep("0|0", 100), rep("1|0", 10), rep("1|1", 10))
+    geno <- setNames(data.frame(matrix(ncol = n.samples, nrow = n.snps)),
+                     sample_names)
+    geno[,  sample_names] <- sample(genotypes, n.samples * n.snps,
+                                    replace = TRUE)
+    row.names(geno) <- snp_names
+    geno <- as.matrix(geno)
+
+    # colData, info, and fixed
+    vcf.col.data <- S4Vectors::DataFrame(list(Samples = seq_len(n.samples)))
+    row.names(vcf.col.data) <- sample_names
+
+    vcf.info <- S4Vectors::DataFrame(list(VT = rep("SNP", n.snps)))
+    row.names(vcf.info) <- snp_names
+
+    vcf.fixed <- S4Vectors::DataFrame(
+        REF = Biostrings::DNAStringSet(rep("A", n.snps)),
+        ALT = rep(Biostrings::DNAStringSetList("T"), n.snps),
+        QUAL = rep(100, n.snps),
+        FILTER = rep("PASS", n.snps)
+    )
+
+    # Merge into mock VCF object
+    mock.vcf <- VariantAnnotation::VCF(rowRanges = vcf.rowRanges,
+                colData = vcf.col.data,
+                info = vcf.info,
+                fixed = vcf.fixed,
+                geno = list(GT=geno),
+                collapsed = TRUE)
+
+    return(mock.vcf)
+}
+
+
+#' Generate mock bulk population scale expression data
+#'
+#' Quick function to generate mock bulk expression data for a population, with
+#' parameters estimated using real thyroid tissue data from GTEx.
+#'
+#' @param n.genes Number of genes in mock bulk data.
+#' @param n.samples Number of samples in mock bulk data.
+#'
+#' @return matrix containing mock bulk expression data.
+#'
+#' @export
+
+mockBulkMatrix <- function(n.genes = 1000, n.samples = 100){
+
+    tmp.params <- newSplatPopParams()
+    mean.shape <- getParam(tmp.params, "pop.mean.shape")
+    mean.rate <- getParam(tmp.params, "pop.mean.rate")
+    cv.df <- getParam(tmp.params, "pop.cv.param")
+    cv.shape <- cv.df[5, "shape"]
+    cv.rate <- cv.df[5, "rate"]
+
+    key <- data.frame(list(id = seq_len(n.genes),
+                           mean = rgamma(n.genes, mean.shape, mean.rate),
+                           cv = rgamma(n.genes, cv.shape, cv.rate)))
+
+    mock.means <- lapply(key$id, function(g) rnorm(n.samples,
+                                              mean = key[key$id == g,]$mean,
+                                              sd = key[key$id == g,]$mean *
+                                                  key[key$id == g,]$cv))
+
+    mock.means <- do.call(rbind, mock.means)
+    mock.means[mock.means < 0] <- 0
+
+    return(mock.means)
+}
+
+#' Generate mock eQTL mapping results
+#'
+#' Quick function to generate mock eQTL mapping results, with parameters
+#' estimated using real eQTL mapping results from GTEX using thyroid tissue.
+#'
+#' @param n.genes Number of genes in mock eQTL data.
+#'
+#' @return data.frame containing mock bulk eQTL mapping results.
+#'
+#' @export
+#'
+mockBulkeQTL <- function(n.genes = 1000){
+
+    tmp.params <- newSplatPopParams()
+    eqtl.shape <- getParam(tmp.params, "eqtl.ES.shape")
+    eqtl.rate <- getParam(tmp.params, "eqtl.ES.rate")
+
+    mock.eq <- data.frame(list(gene_id = seq_len(n.genes),
+                               pval_nominal = 0.01,
+                               slope = rgamma(n.genes, eqtl.shape, eqtl.rate)))
+
+    return(mock.eq)
+}
diff --git a/R/splat-estimate.R b/R/splat-estimate.R
index d2fe3d50..6d69eae2 100644
--- a/R/splat-estimate.R
+++ b/R/splat-estimate.R
@@ -13,7 +13,7 @@
 #' \code{\link{splatEstOutlier}}, \code{\link{splatEstBCV}},
 #' \code{\link{splatEstDropout}}
 #'
-#' @return SplatParams object containing the estimated parameters.
+#' @return SplatParams object with estimated values.
 #'
 #' @examples
 #' # Load example data
@@ -78,7 +78,7 @@ splatEstimate.matrix <- function(counts, params = newSplatParams()) {
 #' winsorized by setting the top and bottom 10 percent of values to the 10th
 #' and 90th percentiles.
 #'
-#' @return SplatParams object with estimated values.
+#' @return SplatParams object containing the estimated parameters.
 splatEstMean <- function(norm.counts, params) {
 
     means <- rowMeans(norm.counts)
diff --git a/R/splat-simulate.R b/R/splat-simulate.R
index 927e0c69..471016e0 100644
--- a/R/splat-simulate.R
+++ b/R/splat-simulate.R
@@ -7,7 +7,7 @@
 #'        See \code{\link{SplatParams}} for details.
 #' @param method which simulation method to use. Options are "single" which
 #'        produces a single population, "groups" which produces distinct groups
-#'        (eg. cell types) or "paths" which selects cells from continuous
+#'        (eg. cell types), or "paths" which selects cells from continuous
 #'        trajectories (eg. differentiation processes).
 #' @param verbose logical. Whether to print progress messages.
 #' @param ... any additional parameter settings to override what is provided in
@@ -156,8 +156,8 @@ splatSimulate <- function(params = newSplatParams(),
         method <- "single"
     }
 
-    if (verbose) {message("Creating simulation object...")}
     # Set up name vectors
+    if (verbose) {message("Creating simulation object...")}
     cell.names <- paste0("Cell", seq_len(nCells))
     gene.names <- paste0("Gene", seq_len(nGenes))
     batch.names <- paste0("Batch", seq_len(nBatches))
@@ -219,6 +219,7 @@ splatSimulate <- function(params = newSplatParams(),
 
     if (verbose) {message("Done!")}
     return(sim)
+
 }
 
 #' @rdname splatSimulate
@@ -295,6 +296,10 @@ splatSimLibSizes <- function(sim, params) {
 #' @importFrom stats rgamma median
 splatSimGeneMeans <- function(sim, params) {
 
+    # Note: splatPopSimGeneMeans in splatPop-simulate.R mirrors this function.
+    # If changes are made to the "add expression outliers" method here, please
+    # make the same changes in splatPopSimGeneMeans.
+
     nGenes <- getParam(params, "nGenes")
     mean.shape <- getParam(params, "mean.shape")
     mean.rate <- getParam(params, "mean.rate")
diff --git a/R/splatPop-estimate.R b/R/splatPop-estimate.R
new file mode 100644
index 00000000..57232fc1
--- /dev/null
+++ b/R/splatPop-estimate.R
@@ -0,0 +1,172 @@
+#' Estimate population/eQTL simulation parameters
+#'
+#' Estimate simulation parameters for the eQTL population simulation from
+#' real data. See the individual estimation functions for more details on
+#' how this is done.
+#'
+#' @param params SplatPopParams object containing parameters for the
+#'        simulation of the mean expression levels for the population.
+#'        See \code{\link{SplatPopParams}} for details.
+#' @param counts either a counts matrix or a SingleCellExperiment object
+#'        containing count data to estimate parameters from.
+#' @param means Matrix of real gene means across a population, where
+#'        each row is a gene and each column is an individual in the population.
+#' @param eqtl data.frame with all or top eQTL pairs from a real eQTL analysis.
+#'         Must include columns: 'gene_id', 'pval_nominal', and 'slope'.
+#'
+#' @seealso
+#' \code{\link{splatPopEstimateEffectSize}},
+#' \code{\link{splatPopEstimateMeanCV}}
+#'
+#' @return SplatPopParams object containing the estimated parameters.
+#'
+#' @export
+splatPopEstimate <- function(params = newSplatPopParams(), counts = NULL,
+                             means = NULL, eqtl = NULL) {
+
+    checkmate::assertClass(params, "SplatPopParams")
+
+    # Estimate single-cell parameters using base splatEstimate function
+    if (!is.null(counts)) {
+        params <- splatEstimate(counts, params)
+    }
+
+    # Get parameters for eQTL Effect Size distribution
+    if (!is.null(eqtl)) {
+        params <- splatPopEstimateEffectSize(params, eqtl)
+    }
+
+    # Get parameters for population wide gene mean and variance distributions
+    if (!is.null(means)) {
+        params <- splatPopEstimateMeanCV(params, means)
+    }
+
+    return(params)
+
+}
+
+#' Estimate eQTL Effect Size parameters
+#'
+#' Estimate rate and shape parameters for the gamma distribution used to
+#' simulate eQTL (eSNP-eGene) effect sizes.
+#'
+#' @param eqtl data.frame with all or top eQTL pairs from a real eQTL analysis.
+#'        Must include columns: gene_id, pval_nominal, and slope.
+#' @param params SplatPopParams object containing parameters for the
+#'        simulation of the mean expression levels for the population.
+#'        See \code{\link{SplatPopParams}} for details.
+#'
+#' @details
+#' Parameters for the gamma distribution are estimated by fitting the top eSNP-
+#' eGene pair effect sizes using \code{\link[fitdistrplus]{fitdist}}. The
+#' maximum goodness-of-fit estimation method is used to minimise the
+#' Cramer-von Mises distance. This can fail in some situations, in which case
+#' the method of moments estimation method is used instead.
+#'
+#' @return params object with estimated values.
+#'
+splatPopEstimateEffectSize <- function(params, eqtl) {
+
+    # Test input eSNP-eGene pairs
+    if (!("gene_id" %in% names(eqtl) &
+          "pval_nominal" %in% names(eqtl) &
+          "slope" %in% names(eqtl))) {
+        stop("Incorrect format for eqtl data.")}
+
+    # Select top eSNP for each gene (i.e. lowest p.value)
+    eqtl.top <- eqtl[order(eqtl$gene_id, eqtl$pval_nominal), ]
+    eqtl.top <- eqtl.top[!duplicated(eqtl.top$gene_id), ]
+
+    # Fit absolute value of effect sizes to gamma distribution
+    e.sizes <- abs(eqtl.top$slope)
+    fit <- fitdistrplus::fitdist(e.sizes, "gamma", method = "mge", gof = "CvM")
+    if (fit$convergence > 0) {
+        warning("Fitting effect sizes using the Goodness of Fit method failed,",
+                " using the Method of Moments instead")
+        fit <- fitdistrplus::fitdist(e.sizes, "gamma", method = "mme")
+    }
+
+    params <- setParams(params, eqtl.ES.shape = unname(fit$estimate["shape"]),
+                        eqtl.ES.rate = unname(fit$estimate["rate"]))
+
+    return(params)
+}
+
+#' Estimate gene mean and gene mean variance parameters
+#'
+#' @param params SplatPopParams object containing parameters for the
+#'        simulation of the mean expression levels for the population.
+#'        See \code{\link{SplatPopParams}} for details.
+#' @param means data.frame of real gene means across a population, where
+#'        each row is a gene and each column is an individual in the population.
+#'
+#' @details
+#' Parameters for the mean gamma distribution are estimated by fitting the mean
+#' (across the population) expression of genes that meet the criteria (<50% of
+#' samples have exp <0.1) and parameters for the cv gamma distribution are
+#' estimated for each bin of mean expression using the cv of expression across
+#' the population for genes in that bin. Both are fit using
+#' \code{\link[fitdistrplus]{fitdist}}. The "Nelder-Mead" method is used to fit
+#' the mean gamma distribution and the maximum goodness-of-fit estimation
+#' method is used to minimise the Cramer-von Mises distance for the CV
+#' distribution.
+#'
+#' @return params object with estimated values.
+#' @importFrom stats quantile
+#' @importFrom grDevices boxplot.stats
+#' @importFrom matrixStats rowMedians
+#'
+splatPopEstimateMeanCV <- function(params, means) {
+
+    # Test input gene means
+    if ((anyNA(means) | !(validObject(rowSums(means))))) {
+        stop("Incorrect format or NAs present in gene.means. See example data.")
+    }
+
+    # Remove genes with low variance/low means
+    abv.thr <- data.frame(perc = (rowSums(means >= 0.1)/ncol(means)))
+
+    means.use <- means[abv.thr$perc > 0.5, ]
+
+    # Calculate mean expression parameters
+    row.means <- rowMedians(means.use)
+    names(row.means) <- row.names(means.use)
+    mfit <- fitdistrplus::fitdist(row.means, "gamma",
+                                  optim.method = "Nelder-Mead")
+
+    # Calculate CV parameters for genes based on 10 mean expression bins
+    nbins <- getParam(params, "pop.cv.bins")
+    bins <- split(row.means, cut(row.means, quantile(row.means,(0:nbins)/nbins),
+                             include.lowest = TRUE))
+    cvparams <- data.frame(start = character(), end = character(),
+                           shape = character(), rate = character(),
+                           stringsAsFactors = FALSE)
+
+    for(b in names(bins)){
+        re.brack.paren <- "\\[|\\]|\\)|\\("
+        min.max <- strsplit(gsub(re.brack.paren, "", unlist(b)), split = ",")
+
+        b.gene.means <- means.use[row.means > as.numeric(min.max[[1]][1]) &
+                                      row.means < as.numeric(min.max[[1]][2]), ]
+
+        cv <- apply(b.gene.means, 1, co.var)
+        cv[is.na(cv)] <- 0
+        cv <- cv[!cv %in% boxplot.stats(cv)$out]
+        cvfit <- fitdistrplus::fitdist(cv, "gamma", method = "mge", gof = "CvM")
+        cvparams <- rbind(cvparams,
+                          list(start = as.numeric(as.numeric(min.max[[1]][1])),
+                               end = as.numeric(as.numeric(min.max[[1]][2])),
+                               shape = cvfit$estimate["shape"],
+                               rate = cvfit$estimate["rate"]),
+                          stringsAsFactors = FALSE)
+    }
+
+    cvparams[1, "start"] <- 0
+    cvparams[nrow(cvparams), "end"] <- 1e100
+    params <- setParams(params,
+                        pop.mean.shape = unname(mfit$estimate["shape"]),
+                        pop.mean.rate = unname(mfit$estimate["rate"]),
+                        pop.cv.param = cvparams)
+
+    return(params)
+}
diff --git a/R/splatPop-simulate.R b/R/splatPop-simulate.R
new file mode 100644
index 00000000..c2172802
--- /dev/null
+++ b/R/splatPop-simulate.R
@@ -0,0 +1,843 @@
+#' splatPop simulation
+#'
+#' Simulate scRNA-seq count data using the splat model for a population of
+#' individuals with correlation structure.
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param vcf VariantAnnotation object containing genotypes of samples.
+#' @param method which simulation method to use. Options are "single" which
+#'        produces a single population, "groups" which produces distinct groups
+#'        (eg. cell types), "paths" which selects cells from continuous
+#'        trajectories (eg. differentiation processes).
+#' @param gff Either NULL or a data.frame object containing a GFF/GTF file.
+#' @param key Either NULL or a data.frame object containing a full or partial
+#'        splatPop key.
+#' @param counts.only logical. Whether to save only counts in sce object.
+#' @param verbose logical. Whether to print progress messages.
+#' @param ... any additional parameter settings to override what is provided in
+#'        \code{params}.
+#'
+#' @details
+#'
+#' This functions is for simulating data in a single step. It consists of a
+#' call to \code{\link{splatPopSimulateMeans}}, which simulates a mean
+#' expression level per gene per sample, followed by a call to
+#' \code{\link{splatPopSimulateSC}}, which uses the splat model to simulate
+#' single-cell counts per individual. Please see the documentation for those
+#' functions for more details.
+#'
+#' @seealso
+#' \code{\link{splatPopSimulateMeans}}, \code{\link{splatPopSimulateSC}}
+#'
+#' @return SingleCellExperiment object containing simulated counts,
+#' intermediate values like the gene means simulated in `splatPopSimulateMeans`,
+#' and information about the differential expression and eQTL effects assigned
+#' to each gene.
+#'
+#' @export
+splatPopSimulate <- function(params = newSplatPopParams(nGenes = 1000),
+                             vcf = mockVCF(),
+                             method = c("single", "groups", "paths"),
+                             gff = NULL,
+                             key = NULL,
+                             counts.only = FALSE,
+                             verbose = TRUE, ...) {
+
+    if (verbose) {message("Getting parameters...")}
+    params <- setParams(params, ...)
+    params <- expandParams(params)
+    validObject(params)
+
+
+    sim.means <- splatPopSimulateMeans(vcf = vcf,
+                                       params = params,
+                                       gff = gff,
+                                       key = key,
+                                       verbose = verbose)
+
+    sim.sc <- splatPopSimulateSC(sim.means = sim.means$means,
+                                 params = params,
+                                 key = sim.means$key,
+                                 method = method,
+                                 counts.only = counts.only,
+                                 verbose = verbose)
+
+    return(sim.sc)
+}
+
+
+#' splatPopSimulateMeans
+#'
+#' Simulate mean expression levels for all genes for all samples, with between
+#' sample correlation structure simulated with eQTL effects and with the option
+#' to simulate multiple groups (i.e. cell-types).
+#'
+#' @param vcf VariantAnnotation object containing genotypes of samples.
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param gff Either NULL or a data.frame object containing a GFF/GTF file.
+#' @param key Either FALSE or a data.frame object containing a full or partial
+#'        splatPop key.
+#' @param verbose logical. Whether to print progress messages.
+#' @param ... any additional parameter settings to override what is provided in
+#'        \code{params}.
+#'
+#' @details SplatPopParams can be set in a variety of ways. 1. If
+#' not provided, default parameters are used. 2. Default parameters can be
+#' overridden by supplying desired parameters using \code{\link{setParams}}.
+#' 3. Parameters can be estimated from real data of your choice using
+#' \code{\link{splatPopEstimate}}.
+#'
+#' `splatPopSimulateMeans` involves the following steps:
+#' \enumerate{
+#'     \item Load population key or generate random or GFF/GTF based key.
+#'     \item Format and subset genotype data from the VCF file.
+#'     \item If not in key, assign expression mean and variance to each gene.
+#'     \item If not in key, assign eGenes-eSNPs pairs and effect sizes.
+#'     \item If not in key and groups >1, assign subset of eQTL associations as
+#'     group-specific and assign DEG group effects.
+#'     \item Simulate mean gene expression matrix without eQTL effects
+#'     \item Quantile normalize by sample to fit single-cell expression
+#'     distribution as defined in `splatEstimate`.
+#'     \item Add quantile normalized gene mean and cv info the eQTL key.
+#'     \item Add eQTL effects to means matrix.
+#' }
+#'
+#' @return A list containing: `means` a matrix (or list of matrices if
+#' n.groups > 1) with the simulated mean gene expression value for each gene
+#' (row) and each sample (column) and `key` a data.frame with population
+#' information including eQTL and group effects.
+#'
+#' @seealso
+#' \code{\link{splatPopParseVCF}}, \code{\link{splatPopParseGenes}},
+#' \code{\link{splatPopAssignMeans}},
+#' \code{\link{splatPopQuantNorm}}, \code{\link{splatPopQuantNormKey}}
+#' \code{\link{splatPopeQTLEffects}}, \code{\link{splatPopGroupEffects}},
+#' \code{\link{splatPopSimMeans}}, \code{\link{splatPopSimEffects}},
+#'
+#' @export
+splatPopSimulateMeans <- function(vcf = mockVCF(),
+                                  params = newSplatPopParams(nGenes = 1000),
+                                  verbose = TRUE, key = NULL, gff = NULL, ...){
+
+    set.seed(getParam(params, "seed"))
+
+    nGroups <- getParam(params, "nGroups")
+
+    vcf <- splatPopParseVCF(vcf, params)
+    group.names <- paste0("Group", seq_len(nGroups))
+
+    # Genes from key or gff or mock (in that order)
+    if (is.null(key)) {
+        key <- splatPopParseGenes(params, gff)
+    }
+
+    if (!all(c("meanSampled", "cvSampled") %in% names(key))) {
+        key <- splatPopAssignMeans(params, key)
+    }
+
+    if (!all(c("eQTL.type", "eSNP.ID", "eQTL.EffectSize") %in% names(key))) {
+        key <- splatPopeQTLEffects(params, key, vcf)
+
+        if(length(group.names) > 1){
+            key <- splatPopGroupEffects(params, key, group.names)
+        }
+    }
+
+    if (verbose) {message("Simulating gene means for population...")}
+
+    means.pop <- splatPopSimMeans(vcf, key)
+
+    means.pop <- splatPopQuantNorm(params, means.pop)
+    key <- splatPopQuantNormKey(key, means.pop)
+
+    eMeansPop <- splatPopSimEffects("global", key, vcf, means.pop)
+
+    if (length(group.names) > 1) {
+        eMeansPopq.groups <- list()
+
+        for (id in group.names) {
+            eMeansPop.g <- splatPopSimEffects(id, key, vcf, eMeansPop)
+            eMeansPop.g[eMeansPop.g <= 0] <- 1e-5
+            eMeansPopq.groups[[id]] <- eMeansPop.g
+        }
+
+        return(list(means = eMeansPopq.groups, key = key))
+
+    } else {
+        eMeansPop[eMeansPop <= 0] <- 1e-5
+        return(list(means = eMeansPop, key = key))
+    }
+}
+
+
+#' splatPopSimulateSC
+#'
+#' Simulate count data for a population from a fictional single-cell
+#' RNA-seq experiment using the Splat method.
+#'
+#' @param sim.means Matrix or list of matrices of gene means for the population.
+#'        Output from `splatPopSimulateMeans()`.
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param key data.frame object containing a full or partial splatPop key.
+#'        Output from `splatPopSimulateMeans()`.
+#' @param method which simulation method to use. Options are "single" which
+#'        produces a single cell population for each sample, "groups" which
+#'        produces distinct groups (eg. cell types) for each sample (note, this
+#'        creates separate groups from those created in `popSimulate` with only
+#'        DE effects), and "paths" which selects cells from continuous
+#'        trajectories (eg. differentiation processes).
+#' @param counts.only logical. Whether to return only the counts.
+#' @param verbose logical. Whether to print progress messages.
+#' @param ... any additional parameter settings to override what is provided in
+#'        \code{params}.
+#'
+#' @return SingleCellExperiment object containing simulated counts,
+#' intermediate values like the gene means simulated in `splatPopSimulateMeans`,
+#' and information about the differential expression and eQTL effects assigned
+#' to each gene.
+#'
+#' @importFrom SingleCellExperiment SingleCellExperiment cbind
+#' @importFrom SummarizedExperiment rowData rowData<-
+#' @export
+splatPopSimulateSC <- function(sim.means,
+                               params,
+                               key,
+                               method = c("single", "groups", "paths"),
+                               counts.only = FALSE,
+                               verbose = TRUE, ...){
+
+    set.seed(getParam(params, "seed"))
+    method <- match.arg(method)
+
+    params <- setParams(params, ...)
+    params <- expandParams(params)
+    validObject(params)
+
+    seed <- getParam(params, "seed")
+    set.seed(seed)
+
+    nGroups <- getParam(params, "nGroups")
+    group.names <- paste0("Group", seq_len(nGroups))
+    group.prob <- getParam(params, "group.prob")
+    batchCells <- getParam(params, "batchCells")
+
+    # Simulate single-cell counts with group-specific effects
+    if (is.list(sim.means)){
+        if (length(group.prob) != length(sim.means)) {
+            group.prob <- rep(1 / length(sim.means), length(sim.means))
+        }
+
+        group.n <- lapply(group.prob, function(x) {ceiling(x * batchCells)})
+        names(group.n) <- group.names
+        samples <- colnames((sim.means[[1]]))
+        group.sims <- list()
+
+        for (g in group.names) {
+            if (verbose) {message(paste0("Simulating sc counts for ", g, "..."))}
+            paramsG <- setParams(params, batchCells = unlist(group.n[g]))
+
+            sims <- lapply(samples, function(x) {
+                splatPopSimulateSample(params = paramsG,
+                                       method = method,
+                                       sample.means = sim.means[[g]][, x],
+                                       counts.only = counts.only,
+                                       verbose = verbose)
+            })
+
+            for (i in seq(1, length(sims))) {
+                s <- samples[i]
+                sims[i][[1]]$Sample <- s
+                sims[i][[1]]$Group <- g
+                names(rowData(sims[i][[1]])) <- paste(s, g,
+                                                names(rowData(sims[i][[1]])),
+                                                sep = "_")}
+
+            group.sims[[g]] <- do.call(SingleCellExperiment::cbind, sims)
+        }
+
+        sim.all <- do.call(SingleCellExperiment::cbind, group.sims)
+
+    }else{
+        if (verbose) {message("Simulating population single cell counts...")}
+        samples <- colnames(sim.means)
+        sims <- lapply(samples, function(x) {
+            splatPopSimulateSample(params = params,
+                                   method = method,
+                                   sample.means = sim.means[, x],
+                                   counts.only = counts.only,
+                                   verbose = verbose)
+        })
+
+        for (i in seq(1, length(sims))) {
+            s <- samples[i]
+            sims[i][[1]]$Sample <- s
+            names(rowData(sims[i][[1]])) <- paste(s,
+                                                  names(rowData(sims[i][[1]])),
+                                                  sep = "_")
+        }
+
+        sim.all <- do.call(SingleCellExperiment::cbind, sims)
+    }
+
+    sim.all <- splatPopCleanSCE(sim.all)
+
+    metadata(sim.all)$Simulated_Means <- sim.means
+    rowData(sim.all) <- merge(rowData(sim.all), key,
+                              by.x = "row.names", by.y = "geneID")
+
+    if (verbose) {message("Done!")}
+    return (sim.all)
+}
+
+#' splatPopSimulateSample simulation
+#'
+#' Simulate count data for one sample from a fictional single-cell RNA-seq
+#' experiment using the Splat method.
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param method which simulation method to use. Options are "single" which
+#'        produces a single population, "groups" which produces distinct groups
+#'        (eg. cell types), "paths" which selects cells from continuous
+#'        trajectories (eg. differentiation processes).
+#' @param sample.means Gene means to use if running splatSimulatePop().
+#' @param counts.only logical. Whether to return only the counts.
+#' @param verbose logical. Whether to print progress messages.
+#' @param ... any additional parameter settings to override what is provided in
+#'        \code{params}.
+#'
+#' @details
+#' This function closely mirrors \code{\link{splatSimulate}}. The main
+#' difference is that it takes the means simulated by splatPopSimulateMeans
+#' instead of randomly sampling a mean for each gene. For details about this
+#' function see the documentation for \code{\link{splatSimulate}}.
+#'
+#' @return SingleCellExperiment object containing the simulated counts and
+#' intermediate values for one sample.
+#'
+#' @seealso
+#' \code{\link{splatSimLibSizes}}, \code{\link{splatPopSimGeneMeans}},
+#' \code{\link{splatSimBatchEffects}}, \code{\link{splatSimBatchCellMeans}},
+#' \code{\link{splatSimDE}}, \code{\link{splatSimCellMeans}},
+#' \code{\link{splatSimBCVMeans}}, \code{\link{splatSimTrueCounts}},
+#' \code{\link{splatSimDropout}}, \code{\link{splatPopSimulateSC}}
+#'
+#' @importFrom SummarizedExperiment rowData colData colData<- assays
+#' @importFrom SingleCellExperiment SingleCellExperiment
+splatPopSimulateSample <- function(params = newSplatPopParams(),
+                                   method = c("single", "groups", "paths"),
+                                   counts.only = FALSE,
+                                   verbose = TRUE,
+                                   sample.means, ...) {
+
+    method <- match.arg(method)
+    set.seed(getParam(params, "seed"))
+
+    # Get the parameters we are going to use
+    nCells <- getParam(params, "nCells")
+    nGenes <- length(sample.means)
+    params <- setParams(params, nGenes = nGenes)
+    nBatches <- getParam(params, "nBatches")
+    batch.cells <- getParam(params, "batchCells")
+    nGroups <- getParam(params, "nGroups")
+    group.prob <- getParam(params, "group.prob")
+
+    # Run sanity checks
+    if (nGroups == 1 && method == "groups") {
+        warning("nGroups is 1, switching to single mode")
+        method <- "single"
+    }
+
+    # Set up name vectors
+    cell.names <- paste0("Cell", seq_len(nCells))
+    gene.names <- names(sample.means)
+    batch.names <- paste0("Batch", seq_len(nBatches))
+    if (method == "groups") {
+        group.names <- paste0("Group", seq_len(nGroups))
+    } else if (method == "paths") {
+        group.names <- paste0("Path", seq_len(nGroups))
+    }
+
+    # Create SingleCellExperiment to store simulation
+    cells <-  data.frame(Cell = cell.names)
+    rownames(cells) <- cell.names
+    features <- data.frame(Gene = gene.names)
+    rownames(features) <- gene.names
+
+    sim <- SingleCellExperiment(rowData = features, colData = cells,
+                                metadata = list(Params = params))
+
+    # Make batches vector which is the index of param$batchCells repeated
+    # params$batchCells[index] times
+    batches <- lapply(seq_len(nBatches), function(i, b) {rep(i, b[i])},
+                      b = batch.cells)
+    batches <- unlist(batches)
+    colData(sim)$Batch <- batch.names[batches]
+
+    if (method != "single") {
+        groups <- sample(seq_len(nGroups), nCells, prob = group.prob,
+                         replace = TRUE)
+        colData(sim)$Group <- factor(group.names[groups], levels = group.names)
+    }
+
+    sim <- splatSimLibSizes(sim, params)
+    sim <- splatPopSimGeneMeans(sim, params, base.means.gene = sample.means)
+
+    if (nBatches > 1) {
+        sim <- splatSimBatchEffects(sim, params)
+    }
+    sim <- splatSimBatchCellMeans(sim, params)
+
+    if (method == "single") {
+        sim <- splatSimSingleCellMeans(sim, params)
+    } else if (method == "groups") {
+        sim <- splatSimGroupDE(sim, params)
+        sim <- splatSimGroupCellMeans(sim, params)
+    } else {
+        sim <- splatSimPathDE(sim, params)
+        sim <- splatSimPathCellMeans(sim, params)
+    }
+
+    sim <- splatSimBCVMeans(sim, params)
+    sim <- splatSimTrueCounts(sim, params)
+    sim <- splatSimDropout(sim, params)
+
+    if (counts.only) {
+        assays(sim)[!grepl('counts', names(assays(sim)))] <- NULL
+    }
+
+    return(sim)
+
+}
+
+
+#' Format and subset genotype data from a VCF file.
+#'
+#' Extract numeric alleles from vcf object and filter out SNPs missing genotype
+#' data or outside the Minor Allele Frequency range in `SplatPopParams`.
+#'
+#' @param vcf VariantAnnotation object containing genotypes of samples.
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#'
+#' @return Genotype data.frame
+#'
+#' @importFrom stats complete.cases na.omit
+#' @importFrom utils data
+splatPopParseVCF <- function(vcf, params){
+
+    # Filter SNPs with NAs and outside MAF range
+    eqtl.maf.min <- getParam(params, "eqtl.maf.min")
+    eqtl.maf.max <- getParam(params, "eqtl.maf.max")
+
+    SummarizedExperiment::rowRanges(vcf)$MAF <-
+        VariantAnnotation::snpSummary(vcf)$a1Freq
+
+    vcf <- vcf[SummarizedExperiment::rowRanges(vcf)$MAF >= eqtl.maf.min &
+                   SummarizedExperiment::rowRanges(vcf)$MAF <= eqtl.maf.max]
+
+    return(vcf)
+}
+
+#' Generate population key matrix from random or gff provided gene information
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param gff Either NULL or a data.frame object containing a GFF/GTF file.
+#'
+#' @return The Partial splatPop key data.frame.
+splatPopParseGenes <- function(params, gff){
+
+    nGenes <- getParam(params, "nGenes")
+
+    if(is.null(gff)){
+        gff <- mockGFF(nGenes)
+    }else{
+        gff <- as.data.frame(gff)
+        if ((length(names(gff)) < 8 | nrow(gff[gff[,3] == "gene",]) < 1)) {
+            stop("GFF file did not contain gene features or other issue with
+            file format. See example data.")}
+        nGenes <- nrow(gff)
+    }
+
+    key <- gff[gff[,3] %in% c("gene", "Gene"),]
+    key$geneID <- paste0("gene_", formatC(seq_len(nGenes),
+                                          width = nchar(nrow(key)),
+                                          format = "d", flag = "0"))
+
+    key[['chromosome']] <- key[, 1]
+    key[['geneStart']] <- key[, 4]
+    key[['geneEnd']] <- key[, 5]
+    key[['geneMiddle']] <- floor(abs((key$geneStart - key$geneEnd)/2)) +
+        key$geneStart
+    key <- key[,c("geneID", "chromosome", "geneStart", "geneEnd", "geneMiddle")]
+
+    return(key)
+}
+
+#' Sample expression mean and variance for each gene
+#'
+#' A mean and coefficient of variation is assigned to each gene by sampling from
+#' gamma distributions parameterized from real data in `splatPopEstimate`.
+#' The cv gamma distributions are binned by gene mean because the distribution
+#' of variance in real data is not independent from the mean. The degree of
+#' similarity between individuals can be further tuned using the
+#' similarity.scale parameter in `SplatPopParams`.
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param key Partial splatPop key data.frame.
+#'
+#' @return The key updated with assigned means and variances.
+splatPopAssignMeans <- function(params, key){
+
+    mean.shape <- getParam(params, "pop.mean.shape")
+    mean.rate <- getParam(params, "pop.mean.rate")
+    cv.param <- getParam(params, "pop.cv.param")
+    similarity.scale <- getParam(params, "similarity.scale")
+
+    # Scale the CV rate parameters
+    cv.param$rate <- cv.param$rate * similarity.scale
+
+    # Sample gene means
+    key[["meanSampled"]] <- rgamma(nrow(key), shape = mean.shape, rate = mean.rate)
+    key[["cvSampled"]] <- NULL
+
+    # Sample coefficient of variation for each gene
+    for (g in seq_len(nrow(key))) {
+        exp.mean <- key[g, "meanSampled"]
+        bin <- cv.param[(cv.param$start < exp.mean) &
+                            (cv.param$end >= exp.mean), ]
+        key[g,"cvSampled"] <- rgamma(1, shape = bin$shape, rate = bin$rate)
+    }
+
+    return(key)
+}
+
+
+#' Assign eGenes-eSNPs pairs and effect sizes.
+#'
+#' Randomly pairs N genes (eGene) a SNP (eSNP) within the window size
+#' (eqtl.dist) and assigns each pair an effect size sampled from a gamma
+#' distribution parameterized using the effect sizes from a real eQTL study.
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param key Partial splatPop key data.frame.
+#' @param vcf VariantAnnotation object containing genotypes of samples.
+#'
+#' @return The key updated with assigned eQTL effects.
+splatPopeQTLEffects <- function(params, key, vcf){
+
+    eqtl.n <- getParam(params, "eqtl.n")
+    if (eqtl.n > nrow(key)){eqtl.n <- nrow(key)} # Can't be greater than nGenes
+    if (eqtl.n <= 1){eqtl.n <- nrow(key) * eqtl.n} # If <= 1 it is a percent
+
+    eqtl.dist <- getParam(params, "eqtl.dist")
+    eqtlES.shape <- getParam(params, "eqtl.ES.shape")
+    eqtlES.rate <- getParam(params, "eqtl.ES.rate")
+
+    # Set up data.frame to save info about selected eSNP-eGENE pairs
+    snps.list <- row.names(vcf)
+    key2 <- key
+    key[["eQTL.type"]] <- NA
+    key[["eSNP.ID"]] <- NA
+    key[["eSNP.chromosome"]] <- NA
+    key[["eSNP.loc"]] <- NA
+    key[["eSNP.MAF"]] <- NA
+    key[["eQTL.EffectSize"]] <- 0
+
+    for(i in seq_len(eqtl.n)){
+        again <- TRUE
+        while (again == TRUE){
+            if(length(snps.list) == 0) {
+                stop("Not enough SNPs in MAF range.")
+            }
+            s <- sample(snps.list, 1)
+            snps.list <- snps.list[!snps.list == s]
+            s.chr <- as.character(GenomeInfoDb::seqnames(vcf[s])@values)
+            s.loc <- BiocGenerics::start(vcf[s])
+
+            matches <- subset(key2, (as.character(key2$chromosome) == s.chr &
+                                         key2$geneMiddle > s.loc - eqtl.dist &
+                                         key2$geneMiddle < s.loc + eqtl.dist))
+            if(nrow(matches) > 0){
+                match <- sample(matches$geneID, 1)
+                again <- FALSE
+            }
+        }
+
+        key2 <- key2[!(key2$geneID == match),]
+        ES <- rgamma(1, shape = eqtlES.shape, rate = eqtlES.rate)
+
+        key[key$geneID == match, ]$eSNP.ID <- s
+        key[key$geneID == match, ]$eSNP.chromosome <-
+            as.character(GenomeInfoDb::seqnames(vcf[s]))
+        key[key$geneID == match, ]$eSNP.loc <- BiocGenerics::start(vcf[s])
+        key[key$geneID == match, ]$eQTL.EffectSize <- ES
+        key[key$geneID == match, ]$eSNP.MAF <-
+            SummarizedExperiment::rowRanges(vcf[s])$MAF
+        key[key$geneID == match, ]$eQTL.type <- "global"
+
+        # Randomly make some effects negative
+        key$eQTL.EffectSize <- key$eQTL.EffectSize * sample(c(1, -1),
+                                                  length(key$eQTL.EffectSize),
+                                                  replace = TRUE)
+    }
+
+    return(key)
+}
+
+
+#' Assign group-specific eQTL and DEGs.
+#'
+#' If groups > 1, n eSNP-eGene pairs (n = 'eqtl.group.specific') are randomly
+#' assigned as group specific.
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param key Partial splatPop key data.frame.
+#' @param groups array of group names
+#'
+#' @return The key updated with group eQTL and non-eQTL effects.
+splatPopGroupEffects <- function(params, key, groups){
+
+    # Assign group-specific eQTL
+    eqtl.n <- getParam(params, "eqtl.n")
+    if (eqtl.n > nrow(key)){eqtl.n <- nrow(key)} # Can't be greater than nGenes
+    if (eqtl.n <= 1){eqtl.n <- nrow(key) * eqtl.n} # If <= 1 it is a percent
+
+    g.specific.perc <- getParam(params, "eqtl.group.specific")
+    n.groups <- length(groups)
+    n.specific.each <- ceiling(eqtl.n * g.specific.perc / n.groups)
+
+    for(g in groups){
+        glob.genes <- subset(key, key$eQTL.type == "global")$geneID
+        g.specific <- sample(glob.genes, size = n.specific.each)
+        key[["eQTL.type"]][key$geneID %in% g.specific] <- g
+    }
+
+    # Assign group-specific effects (differential expression, not eQTL)
+    nGenes <- nrow(key)
+    de.prob <- getParam(params, "de.prob")
+    de.downProb <- getParam(params, "de.downProb")
+    de.facLoc <- getParam(params, "de.facLoc")
+    de.facScale <- getParam(params, "de.facScale")
+
+    for (idx in seq_len(n.groups)) {
+        de.facs <- getLNormFactors(nGenes, de.prob, de.downProb,
+                                   de.facLoc, de.facScale)
+        key[, paste0(groups[idx], ".GroupEffect")] <- de.facs
+    }
+    return(key)
+}
+
+#' Simulate mean gene expression matrix without eQTL effects
+#'
+#' Gene mean expression levels are assigned to each gene for each pair randomly
+#' from a normal distribution parameterized using the mean and cv assigned to
+#' each gene in the key.
+#'
+#' @param vcf VariantAnnotation object containing genotypes of samples.
+#' @param key Partial splatPop key data.frame.
+#'
+#' @return matrix of gene mean expression levels WITHOUT eQTL effects.
+#'
+#' @importFrom stats rnorm
+splatPopSimMeans <- function(vcf, key){
+
+    means <- matrix(rnorm(ncol(vcf) * nrow(key), mean = key$meanSampled,
+                          sd = key$meanSampled * key$cvSampled),
+                    nrow = nrow(key), ncol = ncol(vcf))
+
+    rownames(means) <- key$geneID
+    colnames(means) <- colnames(VariantAnnotation::geno(vcf)$GT)
+
+    return(means)
+}
+
+
+#' Add eQTL effects to means matrix
+#'
+#' Add eQTL effects and non-eQTL group effects to simulated means matrix.
+#' The eQTL effects are incorporated using the following equation:
+#' \deqn{Ygs = (ESg x Mgs x Gs) + Mgs }
+#' Where Ygs is the mean for gene g and sample s, ESg is the effect size
+#' assigned to g, Mgs is the mean expression assigned to g for s, and Gs
+#' is the genotype (number of minor alleles) for s. Non-eQTL group effects are
+#' incorporated as:
+#' \deqn{Ygs = Mgs x GEg}
+#' Where GEg is the group effect (i.e. differential expression) assigned to g.
+#' To simulate multiple gene mean matrices with different group effects, this
+#' function can be run with `id` designating the group id.
+#'
+#' @param id The group ID (e.g. "global" or "g1")
+#' @param key Partial splatPop key data.frame.
+#' @param vcf VariantAnnotation object containing genotypes of samples.
+#' @param means.pop Population mean gene expression matrix
+#'
+#' @return data.frame of gene mean expression levels WITH eQTL effects.
+#'
+splatPopSimEffects <- function(id, key, vcf, means.pop){
+
+    # Add group-specific eQTL effects
+    genes.use <- subset(key, key$eQTL.type == id)$geneID
+    samples <- colnames(means.pop)
+
+    for (g in genes.use) {
+        without.eqtl <- as.numeric(means.pop[g, ])
+        ES <- key[key$geneID == g, "eQTL.EffectSize"]
+        eSNPsample <- key[key$geneID == g, "eSNP.ID"]
+        genotype_code <- as.array(VariantAnnotation::geno(
+            vcf[eSNPsample, samples])$GT
+        )
+        genotype <- lengths(regmatches(genotype_code,
+                                       gregexpr("1", genotype_code)))
+
+        means.pop[g, ] <- without.eqtl + (ES * genotype * means.pop[g, ])
+    }
+
+    # Add group-specific non-eQTL effects
+    if (id != "global") {
+        means.pop <- means.pop * key[, paste0(id, ".GroupEffect")]
+    }
+
+    means.pop[means.pop < 0] <- 0
+
+    return(means.pop)
+}
+
+
+#' Quantile normalize by sample to fit sc expression distribution.
+#'
+#' For each sample, expression values are quantile normalized (qgamma)
+#' using the gamma distribution parameterized from splatEstimate(). This ensures
+#' the simulated gene means reflect the distribution expected from a sc dataset
+#' and not a bulk dataset.
+#'
+#' @param params SplatPopParams object containing parameters for population
+#'        scale simulations. See \code{\link{SplatPopParams}} for details.
+#' @param means Mean gene expression matrix with eQTL effects.
+#'
+#' @return matrix of quantile normalized gene mean expression levels.
+#'
+#' @export
+
+splatPopQuantNorm <- function(params, means){
+
+    # Generate sample target distribution from sc parameters
+    mean.shape <- getParam(params, "mean.shape")
+    mean.rate <- getParam(params, "mean.rate")
+    target <- rgamma(10000, shape = mean.shape, rate = mean.rate)
+
+    means.norm <- preprocessCore::normalize.quantiles.use.target(means, target)
+    means.norm[means.norm < 0] <- 0
+
+    rownames(means.norm) <- rownames(means)
+    colnames(means.norm) <- colnames(means)
+
+    return(means.norm)
+}
+
+#' Add quantile normalized gene mean and cv info the eQTL key.
+#'
+#' @param key Partial splatPop key data.frame.
+#' @param means matrix or list of matrices containing means from
+#'        `splatPopQuantNorm`
+#'
+#' @return Final eQTL key.
+splatPopQuantNormKey <- function(key, means){
+
+    if (is.list(means)){
+        qn.means <- list()
+        qn.cvs <- list()
+
+        for(group in names(means)){
+            qn.mean.gr <- rowMeans(means[[group]])
+            qn.cv.gr <- apply(means[[group]], 1, FUN = co.var)
+            qn.means[[group]] <- qn.mean.gr
+            qn.cvs[[group]] <- qn.cv.gr
+        }
+        qn.mean <- rowMeans(as.data.frame(qn.means))
+        qn.cv <- rowMeans(as.data.frame(qn.cvs))
+
+    }else{
+        qn.mean <- rowMeans(means)
+        qn.cv <- apply(means, 1, FUN = co.var)
+    }
+
+    qn.df <- data.frame(list(meanQuantileNorm = qn.mean,
+                             cvQuantileNorm = qn.cv))
+    qn.df$geneID <- row.names(qn.df)
+    key <- merge(key, qn.df, by = "geneID")
+
+    return(key)
+}
+
+
+#' Simulate gene means for splatPop
+#'
+#' Simulate outlier expression factors for splatPop. Genes with an outlier
+#' factor not equal to 1 are replaced with the median mean expression
+#' multiplied by the outlier factor.
+#'
+#' @param sim SingleCellExperiment to add gene means to.
+#' @param params SplatParams object with simulation parameters.
+#' @param base.means.gene List of gene means for sample from matrix
+#'        generated by `splatPopSimulateMeans` and with the sample specified
+#'        in `splatPopSimulateSC`.
+#'
+#' @return SingleCellExperiment with simulated gene means.
+#'
+#' @importFrom SummarizedExperiment rowData rowData<-
+#' @importFrom stats rgamma median
+splatPopSimGeneMeans <- function(sim, params, base.means.gene) {
+
+    # Note: This function is similar to splatSimGeneMeans, except it uses the
+    # simulated gene mean instead of sampling one randomly. If changes are made
+    # to the outlier method for splat, they should also be made here.
+
+    nGenes <- getParam(params, "nGenes")
+    out.prob <- getParam(params, "out.prob")
+    out.facLoc <- getParam(params, "out.facLoc")
+    out.facScale <- getParam(params, "out.facScale")
+
+    # Add expression outliers
+    outlier.facs <- getLNormFactors(nGenes, out.prob, 0, out.facLoc,
+                                    out.facScale)
+    median.means.gene <- median(base.means.gene)
+    outlier.means <- median.means.gene * outlier.facs
+    is.outlier <- outlier.facs != 1
+    means.gene <- base.means.gene
+    means.gene[is.outlier] <- outlier.means[is.outlier]
+
+    rowData(sim)$BaseGeneMean <- base.means.gene
+    rowData(sim)$OutlierFactor <- outlier.facs
+    rowData(sim)$GeneMean <- means.gene
+
+    return(sim)
+}
+
+#' Clean up the population-scale SCE to remove redundant information
+#'
+#' @param sim.all SingleCellExperiment object with counts for all samples
+#'
+#' @return SingleCellExperiment with simulated sc counts.
+#'
+splatPopCleanSCE <- function(sim.all){
+
+    # Remove redundant sce info
+    keep.id <-  gsub("_Gene", "", names(rowData(sim.all))[1])
+
+    shared.out.factor <- rowData(sim.all)[[paste0(keep.id, "_OutlierFactor")]]
+    rowData(sim.all)[grepl("_OutlierFactor", names(rowData(sim.all)))] <- NULL
+    rowData(sim.all)$Shared_OutlierFactor <- shared.out.factor
+
+    rowData(sim.all)[grepl("_Gene", names(rowData(sim.all)))] <- NULL
+    metadata(sim.all)[2:length(names(metadata(sim.all)))] <- NULL
+
+    return(sim.all)
+}
diff --git a/R/utils.R b/R/utils.R
index 96fec570..983b1ad1 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -80,3 +80,15 @@ winsorize <- function(x, q) {
 
     return(x)
 }
+
+#' Calculate coefficient of variation
+#'
+#' Implementation of the coefficient of variation
+#'
+#' @param x vector of values.
+#'
+#' @return Value of coefficient of variation for vector
+#' @importFrom stats sd
+co.var <- function(x) {
+    sd(x) / mean(x)
+}
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index 5e2ff91e..19ad0862 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -1,6 +1,15 @@
 \name{NEWS}
 \title{News for Package \pkg{splatter}}
 
+\section{Version 1.14.0, Bioconductor 3.12 Release (2020-10-28)}{
+  \itemize{
+    \item{Add the splatPop simulation. This is a extension to the splat
+    simulation contributed by Christina Azodi and Davis McCarthy that adds
+    population effects. It allows you to specify relatedness between individuals
+    and generate cell-type specific eQTL effects.}
+  }
+}
+
 \section{Version 1.12.0, Bioconductor 3.11 Release (2020-04-20)}{
   \itemize{
     \item{Add checks for cycles in the Splat path.from parameter.}
diff --git a/inst/WORDLIST b/inst/WORDLIST
index e1b228cf..b6a06265 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -12,30 +12,56 @@ Bioc
 Biocondutor
 biocViews
 bioRxiv
+bp
 BPPARAM
+center
 Chu
 CMD
 compareSCEs
 compareSCESets
 cpm
 cutoff
+cv
+cvSampled
+DEGs
 df
 diffSCEs
 diffSCESets
+dist
 DM
 doi
+downProb
 DP
 Dudoit
 EE
+EffectSize
 eg
+eGene
+eGenes
+eqtl
+eQTL
+ESg
+eSNP
+eSNPs
 etc
+facLoc
+facScale
 fpkm
 FPKM
 gam
+GEg
+geneID
+geneMiddle
 getCounts
 getLNormFactors
+gff
+GFF
 Github
 Gribkova
+Gs
+GTEx
+GTEX
+GTF
 ImmunoOncology
 ingroup
 JC
@@ -54,18 +80,23 @@ LunParams
 lunSimulate
 MADs
 MAEs
+MAF
 Marioni
 meanlog
+meanSampled
 MeanZeros
 mfa
 MFA
 MFAParams
 mfaSimulate
+Mgs
 mockSCE
 modeling
+Nelder
 nGenes
 nGroups
 NOTEs
+param
 params
 Params
 Parra
@@ -73,11 +104,17 @@ Perraudeau
 PhenoParams
 PhenoPath
 PLoS
+plotPCA
 poisson
 Poisson
+popSimulate
+prob
 PROSSTT
 pseudotime
+pval
+qgamma
 Quickstart
+rd
 RG
 Risso
 RMSEs
@@ -113,8 +150,16 @@ splatEstimate
 splatEstLib
 splatParams
 SplatParams
+splatPop
+splatPopEstimate
+splatPopParams
+splatPopQuantNorm
+splatPopSimulateMeans
+splatPopSimulateSample
+splatPopSimulateSC
 splatSimulate
 splatSimulatePaths
+splatSimulatePop
 Splatter's
 splotchEstimate
 SplotchParams
@@ -128,6 +173,9 @@ TPM
 UMAP
 UpperCamelCase
 Vallejos
+VariantAnnotation
+vcf
+VCF
 WaVE
 Wellcome
 Wilks
@@ -137,6 +185,7 @@ winsorized
 Winsorized
 WithSpikes
 Yau
+Ygs
 ZINB
 ZINBParams
 zinbwave
diff --git a/man/co.var.Rd b/man/co.var.Rd
new file mode 100644
index 00000000..3349e074
--- /dev/null
+++ b/man/co.var.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{co.var}
+\alias{co.var}
+\title{Calculate coefficient of variation}
+\usage{
+co.var(x)
+}
+\arguments{
+\item{x}{vector of values.}
+}
+\value{
+Value of coefficient of variation for vector
+}
+\description{
+Implementation of the coefficient of variation
+}
diff --git a/man/mockBulkMatrix.Rd b/man/mockBulkMatrix.Rd
new file mode 100644
index 00000000..3e697292
--- /dev/null
+++ b/man/mockBulkMatrix.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mock-data.R
+\name{mockBulkMatrix}
+\alias{mockBulkMatrix}
+\title{Generate mock bulk population scale expression data}
+\usage{
+mockBulkMatrix(n.genes = 1000, n.samples = 100)
+}
+\arguments{
+\item{n.genes}{Number of genes in mock bulk data.}
+
+\item{n.samples}{Number of samples in mock bulk data.}
+}
+\value{
+matrix containing mock bulk expression data.
+}
+\description{
+Quick function to generate mock bulk expression data for a population, with
+parameters estimated using real thyroid tissue data from GTEx.
+}
diff --git a/man/mockBulkeQTL.Rd b/man/mockBulkeQTL.Rd
new file mode 100644
index 00000000..5abb8dcb
--- /dev/null
+++ b/man/mockBulkeQTL.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mock-data.R
+\name{mockBulkeQTL}
+\alias{mockBulkeQTL}
+\title{Generate mock eQTL mapping results}
+\usage{
+mockBulkeQTL(n.genes = 1000)
+}
+\arguments{
+\item{n.genes}{Number of genes in mock eQTL data.}
+}
+\value{
+data.frame containing mock bulk eQTL mapping results.
+}
+\description{
+Quick function to generate mock eQTL mapping results, with parameters
+estimated using real eQTL mapping results from GTEX using thyroid tissue.
+}
diff --git a/man/mockGFF.Rd b/man/mockGFF.Rd
new file mode 100644
index 00000000..ed64223e
--- /dev/null
+++ b/man/mockGFF.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mock-data.R
+\name{mockGFF}
+\alias{mockGFF}
+\title{Generate mock gff}
+\usage{
+mockGFF(n.genes = 500, chromosome = 22)
+}
+\arguments{
+\item{n.genes}{Number of genes in mock gff file}
+
+\item{chromosome}{Chromosome name}
+}
+\value{
+data.frame containing mock gff data.
+}
+\description{
+Quick function to generate a mock gff.
+}
diff --git a/man/mockVCF.Rd b/man/mockVCF.Rd
new file mode 100644
index 00000000..8889d664
--- /dev/null
+++ b/man/mockVCF.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mock-data.R
+\name{mockVCF}
+\alias{mockVCF}
+\title{Generate mock vcf}
+\usage{
+mockVCF(n.snps = 10000, n.samples = 10, chromosome = 22)
+}
+\arguments{
+\item{n.snps}{Number of SNPs in mock vcf file.}
+
+\item{n.samples}{Number of samples in mock bulk data.}
+
+\item{chromosome}{Chromosome name}
+}
+\value{
+data.frame containing mock gff data.
+}
+\description{
+Quick function to generate mock vcf file. Note this data has unrealistic
+population structure.
+}
diff --git a/man/newParams.Rd b/man/newParams.Rd
index 71d00e6b..795b6ab8 100644
--- a/man/newParams.Rd
+++ b/man/newParams.Rd
@@ -3,7 +3,7 @@
 %   R/KersplatParams-methods.R, R/Lun2Params-methods.R, R/LunParams-methods.R,
 %   R/MFAParams-methods.R, R/PhenoParams-methods.R, R/SCDDParams-methods.R,
 %   R/SimpleParams-methods.R, R/SparseDCParams-methods.R,
-%   R/SplatParams-methods.R, R/ZINBParams-methods.R
+%   R/SplatParams-methods.R, R/SplatPopParams-methods.R, R/ZINBParams-methods.R
 \name{newParams}
 \alias{newParams}
 \alias{newBASiCSParams}
@@ -16,6 +16,7 @@
 \alias{newSimpleParams}
 \alias{newSparseDCParams}
 \alias{newSplatParams}
+\alias{newSplatPopParams}
 \alias{newZINBParams}
 \title{New Params}
 \usage{
@@ -39,6 +40,8 @@ newSparseDCParams(...)
 
 newSplatParams(...)
 
+newSplatPopParams(...)
+
 newZINBParams(...)
 }
 \arguments{
diff --git a/man/setParam.Rd b/man/setParam.Rd
index e7dca4cc..5d8d28ec 100644
--- a/man/setParam.Rd
+++ b/man/setParam.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/AllGenerics.R, R/BASiCSParams-methods.R,
 %   R/KersplatParams-methods.R, R/Lun2Params-methods.R, R/LunParams-methods.R,
 %   R/Params-methods.R, R/PhenoParams-methods.R, R/SCDDParams-methods.R,
-%   R/SplatParams-methods.R, R/ZINBParams-methods.R
+%   R/SplatParams-methods.R, R/SplatPopParams-methods.R, R/ZINBParams-methods.R
 \name{setParam}
 \alias{setParam}
 \alias{setParam,BASiCSParams-method}
@@ -13,6 +13,7 @@
 \alias{setParam,PhenoParams-method}
 \alias{setParam,SCDDParams-method}
 \alias{setParam,SplatParams-method}
+\alias{setParam,SplatPopParams-method}
 \alias{setParam,ZINBParams-method}
 \title{Set a parameter}
 \usage{
@@ -34,6 +35,8 @@ setParam(object, name, value)
 
 \S4method{setParam}{SplatParams}(object, name, value)
 
+\S4method{setParam}{SplatPopParams}(object, name, value)
+
 \S4method{setParam}{ZINBParams}(object, name, value)
 }
 \arguments{
diff --git a/man/splatEstMean.Rd b/man/splatEstMean.Rd
index a9192dff..01b46bae 100644
--- a/man/splatEstMean.Rd
+++ b/man/splatEstMean.Rd
@@ -12,7 +12,7 @@ splatEstMean(norm.counts, params)
 \item{params}{SplatParams object to store estimated values in.}
 }
 \value{
-SplatParams object with estimated values.
+SplatParams object containing the estimated parameters.
 }
 \description{
 Estimate rate and shape parameters for the gamma distribution used to
diff --git a/man/splatEstimate.Rd b/man/splatEstimate.Rd
index 7139933e..9b1edc91 100644
--- a/man/splatEstimate.Rd
+++ b/man/splatEstimate.Rd
@@ -19,7 +19,7 @@ containing count data to estimate parameters from.}
 \item{params}{SplatParams object to store estimated values in.}
 }
 \value{
-SplatParams object containing the estimated parameters.
+SplatParams object with estimated values.
 }
 \description{
 Estimate simulation parameters for the Splat simulation from a real
diff --git a/man/splatPopAssignMeans.Rd b/man/splatPopAssignMeans.Rd
new file mode 100644
index 00000000..0a327fd0
--- /dev/null
+++ b/man/splatPopAssignMeans.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopAssignMeans}
+\alias{splatPopAssignMeans}
+\title{Sample expression mean and variance for each gene}
+\usage{
+splatPopAssignMeans(params, key)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{key}{Partial splatPop key data.frame.}
+}
+\value{
+The key updated with assigned means and variances.
+}
+\description{
+A mean and coefficient of variation is assigned to each gene by sampling from
+gamma distributions parameterized from real data in `splatPopEstimate`.
+The cv gamma distributions are binned by gene mean because the distribution
+of variance in real data is not independent from the mean. The degree of
+similarity between individuals can be further tuned using the
+similarity.scale parameter in `SplatPopParams`.
+}
diff --git a/man/splatPopCleanSCE.Rd b/man/splatPopCleanSCE.Rd
new file mode 100644
index 00000000..afdb359d
--- /dev/null
+++ b/man/splatPopCleanSCE.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopCleanSCE}
+\alias{splatPopCleanSCE}
+\title{Clean up the population-scale SCE to remove redundant information}
+\usage{
+splatPopCleanSCE(sim.all)
+}
+\arguments{
+\item{sim.all}{SingleCellExperiment object with counts for all samples}
+}
+\value{
+SingleCellExperiment with simulated sc counts.
+}
+\description{
+Clean up the population-scale SCE to remove redundant information
+}
diff --git a/man/splatPopEstimate.Rd b/man/splatPopEstimate.Rd
new file mode 100644
index 00000000..849b4a7a
--- /dev/null
+++ b/man/splatPopEstimate.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-estimate.R
+\name{splatPopEstimate}
+\alias{splatPopEstimate}
+\title{Estimate population/eQTL simulation parameters}
+\usage{
+splatPopEstimate(
+  params = newSplatPopParams(),
+  counts = NULL,
+  means = NULL,
+  eqtl = NULL
+)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for the
+simulation of the mean expression levels for the population.
+See \code{\link{SplatPopParams}} for details.}
+
+\item{counts}{either a counts matrix or a SingleCellExperiment object
+containing count data to estimate parameters from.}
+
+\item{means}{Matrix of real gene means across a population, where
+each row is a gene and each column is an individual in the population.}
+
+\item{eqtl}{data.frame with all or top eQTL pairs from a real eQTL analysis.
+Must include columns: 'gene_id', 'pval_nominal', and 'slope'.}
+}
+\value{
+SplatPopParams object containing the estimated parameters.
+}
+\description{
+Estimate simulation parameters for the eQTL population simulation from
+real data. See the individual estimation functions for more details on
+how this is done.
+}
+\seealso{
+\code{\link{splatPopEstimateEffectSize}},
+\code{\link{splatPopEstimateMeanCV}}
+}
diff --git a/man/splatPopEstimateEffectSize.Rd b/man/splatPopEstimateEffectSize.Rd
new file mode 100644
index 00000000..9824402b
--- /dev/null
+++ b/man/splatPopEstimateEffectSize.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-estimate.R
+\name{splatPopEstimateEffectSize}
+\alias{splatPopEstimateEffectSize}
+\title{Estimate eQTL Effect Size parameters}
+\usage{
+splatPopEstimateEffectSize(params, eqtl)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for the
+simulation of the mean expression levels for the population.
+See \code{\link{SplatPopParams}} for details.}
+
+\item{eqtl}{data.frame with all or top eQTL pairs from a real eQTL analysis.
+Must include columns: gene_id, pval_nominal, and slope.}
+}
+\value{
+params object with estimated values.
+}
+\description{
+Estimate rate and shape parameters for the gamma distribution used to
+simulate eQTL (eSNP-eGene) effect sizes.
+}
+\details{
+Parameters for the gamma distribution are estimated by fitting the top eSNP-
+eGene pair effect sizes using \code{\link[fitdistrplus]{fitdist}}. The
+maximum goodness-of-fit estimation method is used to minimise the
+Cramer-von Mises distance. This can fail in some situations, in which case
+the method of moments estimation method is used instead.
+}
diff --git a/man/splatPopEstimateMeanCV.Rd b/man/splatPopEstimateMeanCV.Rd
new file mode 100644
index 00000000..abe8b957
--- /dev/null
+++ b/man/splatPopEstimateMeanCV.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-estimate.R
+\name{splatPopEstimateMeanCV}
+\alias{splatPopEstimateMeanCV}
+\title{Estimate gene mean and gene mean variance parameters}
+\usage{
+splatPopEstimateMeanCV(params, means)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for the
+simulation of the mean expression levels for the population.
+See \code{\link{SplatPopParams}} for details.}
+
+\item{means}{data.frame of real gene means across a population, where
+each row is a gene and each column is an individual in the population.}
+}
+\value{
+params object with estimated values.
+}
+\description{
+Estimate gene mean and gene mean variance parameters
+}
+\details{
+Parameters for the mean gamma distribution are estimated by fitting the mean
+(across the population) expression of genes that meet the criteria (<50% of
+samples have exp <0.1) and parameters for the cv gamma distribution are
+estimated for each bin of mean expression using the cv of expression across
+the population for genes in that bin. Both are fit using
+\code{\link[fitdistrplus]{fitdist}}. The "Nelder-Mead" method is used to fit
+the mean gamma distribution and the maximum goodness-of-fit estimation
+method is used to minimise the Cramer-von Mises distance for the CV
+distribution.
+}
diff --git a/man/splatPopGroupEffects.Rd b/man/splatPopGroupEffects.Rd
new file mode 100644
index 00000000..f9039a8c
--- /dev/null
+++ b/man/splatPopGroupEffects.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopGroupEffects}
+\alias{splatPopGroupEffects}
+\title{Assign group-specific eQTL and DEGs.}
+\usage{
+splatPopGroupEffects(params, key, groups)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{key}{Partial splatPop key data.frame.}
+
+\item{groups}{array of group names}
+}
+\value{
+The key updated with group eQTL and non-eQTL effects.
+}
+\description{
+If groups > 1, n eSNP-eGene pairs (n = 'eqtl.group.specific') are randomly
+assigned as group specific.
+}
diff --git a/man/splatPopParams.Rd b/man/splatPopParams.Rd
new file mode 100644
index 00000000..a2291786
--- /dev/null
+++ b/man/splatPopParams.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AllClasses.R
+\docType{class}
+\name{SplatPopParams}
+\alias{SplatPopParams}
+\alias{SplatPopParams-class}
+\title{The SplatPopParams class}
+\description{
+S4 class that holds parameters for the splatPop simulation.
+}
+\section{Parameters}{
+
+
+In addition to the \code{\link{SplatParams}} parameters, splatPop simulation
+requires the following parameters:
+
+\describe{
+    \item{\code{[similarity.scale]}}{Scaling factor for pop.cv.param.rate,
+    where values larger than 1 increase the similarity between individuals in
+    the population and values less than one make the individuals less
+    similar.}
+    \item{\code{[eqtl.n]}}{The number (>1) or percent (<=1) of genes to
+    assign eQTL effects.}
+    \item{\code{[eqtl.dist]}}{Maximum distance between eSNP and eGene}
+    \item{\code{[eqtl.maf.min]}}{Minimum Minor Allele Frequency of eSNPs.}
+    \item{\code{[eqtl.maf.max]}}{Maximum Minor Allele Frequency of eSNPs.}
+    \item{\code{[eqtl.group.specific]}}{Percent of eQTL effects to simulate
+    as group specific.}
+    \item{\emph{eQTL Effect size distribution parameters. Defaults estimated
+    from GTEx eQTL mapping results, see vignette for more information.}}{
+        \describe{
+            \item{\code{eqtl.ES.shape}}{Shape parameter for the effect size
+            gamma distribution.}
+            \item{\code{eqtl.ES.rate}}{Rate parameter for the effect size
+            gamma distribution.}
+        }
+    }
+    \item{\emph{Bulk Mean Expression distribution parameters. Defaults
+    estimated from GTEx data, see vignette for more information.}}{
+        \describe{
+            \item{\code{pop.mean.shape}}{Shape parameter for the mean (i.e.
+            bulk) expression gamma distribution}
+            \item{\code{pop.mean.rate}}{Rate parameter for the mean (i.e.
+            bulk) expression gamma distribution}
+        }
+    }
+    \item{\emph{Bulk Expression Coefficient of Variation distribution
+    parameters binned. Defaults estimated from GTEx data, see vignette for
+    more information.}}{
+        \describe{
+            \item{\code{pop.cv.param}}{Dataframe containing gene
+            mean bin range, and the CV shape, and CV rate parameters for
+            each of those bins.}
+        }
+    }
+}
+The parameters not shown in brackets can be estimated from real data using
+\code{\link{splatPopEstimate}}. For details of the eQTL simulation
+see \code{\link{splatPopSimulate}}.
+}
+
diff --git a/man/splatPopParseGenes.Rd b/man/splatPopParseGenes.Rd
new file mode 100644
index 00000000..0a15c1d4
--- /dev/null
+++ b/man/splatPopParseGenes.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopParseGenes}
+\alias{splatPopParseGenes}
+\title{Generate population key matrix from random or gff provided gene information}
+\usage{
+splatPopParseGenes(params, gff)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{gff}{Either NULL or a data.frame object containing a GFF/GTF file.}
+}
+\value{
+The Partial splatPop key data.frame.
+}
+\description{
+Generate population key matrix from random or gff provided gene information
+}
diff --git a/man/splatPopParseVCF.Rd b/man/splatPopParseVCF.Rd
new file mode 100644
index 00000000..0ed80f0e
--- /dev/null
+++ b/man/splatPopParseVCF.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopParseVCF}
+\alias{splatPopParseVCF}
+\title{Format and subset genotype data from a VCF file.}
+\usage{
+splatPopParseVCF(vcf, params)
+}
+\arguments{
+\item{vcf}{VariantAnnotation object containing genotypes of samples.}
+
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+}
+\value{
+Genotype data.frame
+}
+\description{
+Extract numeric alleles from vcf object and filter out SNPs missing genotype
+data or outside the Minor Allele Frequency range in `SplatPopParams`.
+}
diff --git a/man/splatPopQuantNorm.Rd b/man/splatPopQuantNorm.Rd
new file mode 100644
index 00000000..e25c81ea
--- /dev/null
+++ b/man/splatPopQuantNorm.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopQuantNorm}
+\alias{splatPopQuantNorm}
+\title{Quantile normalize by sample to fit sc expression distribution.}
+\usage{
+splatPopQuantNorm(params, means)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{means}{Mean gene expression matrix with eQTL effects.}
+}
+\value{
+matrix of quantile normalized gene mean expression levels.
+}
+\description{
+For each sample, expression values are quantile normalized (qgamma)
+using the gamma distribution parameterized from splatEstimate(). This ensures
+the simulated gene means reflect the distribution expected from a sc dataset
+and not a bulk dataset.
+}
diff --git a/man/splatPopQuantNormKey.Rd b/man/splatPopQuantNormKey.Rd
new file mode 100644
index 00000000..63bf6aaf
--- /dev/null
+++ b/man/splatPopQuantNormKey.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopQuantNormKey}
+\alias{splatPopQuantNormKey}
+\title{Add quantile normalized gene mean and cv info the eQTL key.}
+\usage{
+splatPopQuantNormKey(key, means)
+}
+\arguments{
+\item{key}{Partial splatPop key data.frame.}
+
+\item{means}{matrix or list of matrices containing means from
+`splatPopQuantNorm`}
+}
+\value{
+Final eQTL key.
+}
+\description{
+Add quantile normalized gene mean and cv info the eQTL key.
+}
diff --git a/man/splatPopSimEffects.Rd b/man/splatPopSimEffects.Rd
new file mode 100644
index 00000000..f3b19926
--- /dev/null
+++ b/man/splatPopSimEffects.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimEffects}
+\alias{splatPopSimEffects}
+\title{Add eQTL effects to means matrix}
+\usage{
+splatPopSimEffects(id, key, vcf, means.pop)
+}
+\arguments{
+\item{id}{The group ID (e.g. "global" or "g1")}
+
+\item{key}{Partial splatPop key data.frame.}
+
+\item{vcf}{VariantAnnotation object containing genotypes of samples.}
+
+\item{means.pop}{Population mean gene expression matrix}
+}
+\value{
+data.frame of gene mean expression levels WITH eQTL effects.
+}
+\description{
+Add eQTL effects and non-eQTL group effects to simulated means matrix.
+The eQTL effects are incorporated using the following equation:
+\deqn{Ygs = (ESg x Mgs x Gs) + Mgs }
+Where Ygs is the mean for gene g and sample s, ESg is the effect size
+assigned to g, Mgs is the mean expression assigned to g for s, and Gs
+is the genotype (number of minor alleles) for s. Non-eQTL group effects are
+incorporated as:
+\deqn{Ygs = Mgs x GEg}
+Where GEg is the group effect (i.e. differential expression) assigned to g.
+To simulate multiple gene mean matrices with different group effects, this
+function can be run with `id` designating the group id.
+}
diff --git a/man/splatPopSimGeneMeans.Rd b/man/splatPopSimGeneMeans.Rd
new file mode 100644
index 00000000..5325662c
--- /dev/null
+++ b/man/splatPopSimGeneMeans.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimGeneMeans}
+\alias{splatPopSimGeneMeans}
+\title{Simulate gene means for splatPop}
+\usage{
+splatPopSimGeneMeans(sim, params, base.means.gene)
+}
+\arguments{
+\item{sim}{SingleCellExperiment to add gene means to.}
+
+\item{params}{SplatParams object with simulation parameters.}
+
+\item{base.means.gene}{List of gene means for sample from matrix
+generated by `splatPopSimulateMeans` and with the sample specified
+in `splatPopSimulateSC`.}
+}
+\value{
+SingleCellExperiment with simulated gene means.
+}
+\description{
+Simulate outlier expression factors for splatPop. Genes with an outlier
+factor not equal to 1 are replaced with the median mean expression
+multiplied by the outlier factor.
+}
diff --git a/man/splatPopSimMeans.Rd b/man/splatPopSimMeans.Rd
new file mode 100644
index 00000000..7d8971a6
--- /dev/null
+++ b/man/splatPopSimMeans.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimMeans}
+\alias{splatPopSimMeans}
+\title{Simulate mean gene expression matrix without eQTL effects}
+\usage{
+splatPopSimMeans(vcf, key)
+}
+\arguments{
+\item{vcf}{VariantAnnotation object containing genotypes of samples.}
+
+\item{key}{Partial splatPop key data.frame.}
+}
+\value{
+matrix of gene mean expression levels WITHOUT eQTL effects.
+}
+\description{
+Gene mean expression levels are assigned to each gene for each pair randomly
+from a normal distribution parameterized using the mean and cv assigned to
+each gene in the key.
+}
diff --git a/man/splatPopSimulate.Rd b/man/splatPopSimulate.Rd
new file mode 100644
index 00000000..eb5a7d4b
--- /dev/null
+++ b/man/splatPopSimulate.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimulate}
+\alias{splatPopSimulate}
+\title{splatPop simulation}
+\usage{
+splatPopSimulate(
+  params = newSplatPopParams(nGenes = 1000),
+  vcf = mockVCF(),
+  method = c("single", "groups", "paths"),
+  gff = NULL,
+  key = NULL,
+  counts.only = FALSE,
+  verbose = TRUE,
+  ...
+)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{vcf}{VariantAnnotation object containing genotypes of samples.}
+
+\item{method}{which simulation method to use. Options are "single" which
+produces a single population, "groups" which produces distinct groups
+(eg. cell types), "paths" which selects cells from continuous
+trajectories (eg. differentiation processes).}
+
+\item{gff}{Either NULL or a data.frame object containing a GFF/GTF file.}
+
+\item{key}{Either NULL or a data.frame object containing a full or partial
+splatPop key.}
+
+\item{counts.only}{logical. Whether to save only counts in sce object.}
+
+\item{verbose}{logical. Whether to print progress messages.}
+
+\item{...}{any additional parameter settings to override what is provided in
+\code{params}.}
+}
+\value{
+SingleCellExperiment object containing simulated counts,
+intermediate values like the gene means simulated in `splatPopSimulateMeans`,
+and information about the differential expression and eQTL effects assigned
+to each gene.
+}
+\description{
+Simulate scRNA-seq count data using the splat model for a population of
+individuals with correlation structure.
+}
+\details{
+This functions is for simulating data in a single step. It consists of a
+call to \code{\link{splatPopSimulateMeans}}, which simulates a mean
+expression level per gene per sample, followed by a call to
+\code{\link{splatPopSimulateSC}}, which uses the splat model to simulate
+single-cell counts per individual. Please see the documentation for those
+functions for more details.
+}
+\seealso{
+\code{\link{splatPopSimulateMeans}}, \code{\link{splatPopSimulateSC}}
+}
diff --git a/man/splatPopSimulateMeans.Rd b/man/splatPopSimulateMeans.Rd
new file mode 100644
index 00000000..d584bace
--- /dev/null
+++ b/man/splatPopSimulateMeans.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimulateMeans}
+\alias{splatPopSimulateMeans}
+\title{splatPopSimulateMeans}
+\usage{
+splatPopSimulateMeans(
+  vcf = mockVCF(),
+  params = newSplatPopParams(nGenes = 1000),
+  verbose = TRUE,
+  key = NULL,
+  gff = NULL,
+  ...
+)
+}
+\arguments{
+\item{vcf}{VariantAnnotation object containing genotypes of samples.}
+
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{verbose}{logical. Whether to print progress messages.}
+
+\item{key}{Either FALSE or a data.frame object containing a full or partial
+splatPop key.}
+
+\item{gff}{Either NULL or a data.frame object containing a GFF/GTF file.}
+
+\item{...}{any additional parameter settings to override what is provided in
+\code{params}.}
+}
+\value{
+A list containing: `means` a matrix (or list of matrices if
+n.groups > 1) with the simulated mean gene expression value for each gene
+(row) and each sample (column) and `key` a data.frame with population
+information including eQTL and group effects.
+}
+\description{
+Simulate mean expression levels for all genes for all samples, with between
+sample correlation structure simulated with eQTL effects and with the option
+to simulate multiple groups (i.e. cell-types).
+}
+\details{
+SplatPopParams can be set in a variety of ways. 1. If
+not provided, default parameters are used. 2. Default parameters can be
+overridden by supplying desired parameters using \code{\link{setParams}}.
+3. Parameters can be estimated from real data of your choice using
+\code{\link{splatPopEstimate}}.
+
+`splatPopSimulateMeans` involves the following steps:
+\enumerate{
+    \item Load population key or generate random or GFF/GTF based key.
+    \item Format and subset genotype data from the VCF file.
+    \item If not in key, assign expression mean and variance to each gene.
+    \item If not in key, assign eGenes-eSNPs pairs and effect sizes.
+    \item If not in key and groups >1, assign subset of eQTL associations as
+    group-specific and assign DEG group effects.
+    \item Simulate mean gene expression matrix without eQTL effects
+    \item Quantile normalize by sample to fit single-cell expression
+    distribution as defined in `splatEstimate`.
+    \item Add quantile normalized gene mean and cv info the eQTL key.
+    \item Add eQTL effects to means matrix.
+}
+}
+\seealso{
+\code{\link{splatPopParseVCF}}, \code{\link{splatPopParseGenes}},
+\code{\link{splatPopAssignMeans}},
+\code{\link{splatPopQuantNorm}}, \code{\link{splatPopQuantNormKey}}
+\code{\link{splatPopeQTLEffects}}, \code{\link{splatPopGroupEffects}},
+\code{\link{splatPopSimMeans}}, \code{\link{splatPopSimEffects}},
+}
diff --git a/man/splatPopSimulateSC.Rd b/man/splatPopSimulateSC.Rd
new file mode 100644
index 00000000..1c706793
--- /dev/null
+++ b/man/splatPopSimulateSC.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimulateSC}
+\alias{splatPopSimulateSC}
+\title{splatPopSimulateSC}
+\usage{
+splatPopSimulateSC(
+  sim.means,
+  params,
+  key,
+  method = c("single", "groups", "paths"),
+  counts.only = FALSE,
+  verbose = TRUE,
+  ...
+)
+}
+\arguments{
+\item{sim.means}{Matrix or list of matrices of gene means for the population.
+Output from `splatPopSimulateMeans()`.}
+
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{key}{data.frame object containing a full or partial splatPop key.
+Output from `splatPopSimulateMeans()`.}
+
+\item{method}{which simulation method to use. Options are "single" which
+produces a single cell population for each sample, "groups" which
+produces distinct groups (eg. cell types) for each sample (note, this
+creates separate groups from those created in `popSimulate` with only
+DE effects), and "paths" which selects cells from continuous
+trajectories (eg. differentiation processes).}
+
+\item{counts.only}{logical. Whether to return only the counts.}
+
+\item{verbose}{logical. Whether to print progress messages.}
+
+\item{...}{any additional parameter settings to override what is provided in
+\code{params}.}
+}
+\value{
+SingleCellExperiment object containing simulated counts,
+intermediate values like the gene means simulated in `splatPopSimulateMeans`,
+and information about the differential expression and eQTL effects assigned
+to each gene.
+}
+\description{
+Simulate count data for a population from a fictional single-cell
+RNA-seq experiment using the Splat method.
+}
diff --git a/man/splatPopSimulateSample.Rd b/man/splatPopSimulateSample.Rd
new file mode 100644
index 00000000..ce1d64bc
--- /dev/null
+++ b/man/splatPopSimulateSample.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopSimulateSample}
+\alias{splatPopSimulateSample}
+\title{splatPopSimulateSample simulation}
+\usage{
+splatPopSimulateSample(
+  params = newSplatPopParams(),
+  method = c("single", "groups", "paths"),
+  counts.only = FALSE,
+  verbose = TRUE,
+  sample.means,
+  ...
+)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{method}{which simulation method to use. Options are "single" which
+produces a single population, "groups" which produces distinct groups
+(eg. cell types), "paths" which selects cells from continuous
+trajectories (eg. differentiation processes).}
+
+\item{counts.only}{logical. Whether to return only the counts.}
+
+\item{verbose}{logical. Whether to print progress messages.}
+
+\item{sample.means}{Gene means to use if running splatSimulatePop().}
+
+\item{...}{any additional parameter settings to override what is provided in
+\code{params}.}
+}
+\value{
+SingleCellExperiment object containing the simulated counts and
+intermediate values for one sample.
+}
+\description{
+Simulate count data for one sample from a fictional single-cell RNA-seq
+experiment using the Splat method.
+}
+\details{
+This function closely mirrors \code{\link{splatSimulate}}. The main
+difference is that it takes the means simulated by splatPopSimulateMeans
+instead of randomly sampling a mean for each gene. For details about this
+function see the documentation for \code{\link{splatSimulate}}.
+}
+\seealso{
+\code{\link{splatSimLibSizes}}, \code{\link{splatPopSimGeneMeans}},
+\code{\link{splatSimBatchEffects}}, \code{\link{splatSimBatchCellMeans}},
+\code{\link{splatSimDE}}, \code{\link{splatSimCellMeans}},
+\code{\link{splatSimBCVMeans}}, \code{\link{splatSimTrueCounts}},
+\code{\link{splatSimDropout}}, \code{\link{splatPopSimulateSC}}
+}
diff --git a/man/splatPopeQTLEffects.Rd b/man/splatPopeQTLEffects.Rd
new file mode 100644
index 00000000..bab995b5
--- /dev/null
+++ b/man/splatPopeQTLEffects.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatPop-simulate.R
+\name{splatPopeQTLEffects}
+\alias{splatPopeQTLEffects}
+\title{Assign eGenes-eSNPs pairs and effect sizes.}
+\usage{
+splatPopeQTLEffects(params, key, vcf)
+}
+\arguments{
+\item{params}{SplatPopParams object containing parameters for population
+scale simulations. See \code{\link{SplatPopParams}} for details.}
+
+\item{key}{Partial splatPop key data.frame.}
+
+\item{vcf}{VariantAnnotation object containing genotypes of samples.}
+}
+\value{
+The key updated with assigned eQTL effects.
+}
+\description{
+Randomly pairs N genes (eGene) a SNP (eSNP) within the window size
+(eqtl.dist) and assigns each pair an effect size sampled from a gamma
+distribution parameterized using the effect sizes from a real eQTL study.
+}
diff --git a/man/splatSimulate.Rd b/man/splatSimulate.Rd
index e74df3d2..e0c465e6 100644
--- a/man/splatSimulate.Rd
+++ b/man/splatSimulate.Rd
@@ -26,7 +26,7 @@ See \code{\link{SplatParams}} for details.}
 
 \item{method}{which simulation method to use. Options are "single" which
 produces a single population, "groups" which produces distinct groups
-(eg. cell types) or "paths" which selects cells from continuous
+(eg. cell types), or "paths" which selects cells from continuous
 trajectories (eg. differentiation processes).}
 
 \item{verbose}{logical. Whether to print progress messages.}
diff --git a/tests/testthat/test-SplatParams.R b/tests/testthat/test-SplatParams.R
index 371cb113..45ca9f0a 100644
--- a/tests/testthat/test-SplatParams.R
+++ b/tests/testthat/test-SplatParams.R
@@ -18,6 +18,8 @@ test_that("nGroups checks work", {
                  "nGroups cannot be set directly, set group.prob instead")
 })
 
+
+### These tests are also run in test-SplatPopParams.R, please update both
 test_that("path.from checks work", {
     pp <- setParams(params, group.prob = c(0.5, 0.5))
     pp <- setParamUnchecked(pp, "path.from", c(0, 1))
@@ -39,6 +41,7 @@ test_that("path.from checks work", {
     expect_error(validObject(pp), "path.from cannot contain cycles")
 })
 
+### These tests are also run in test-SplatPopParams.R, please update both
 test_that("dropout.type checks work", {
     expect_error(setParam(params, "dropout.type", "cell"),
                  "dropout.type cannot be set to 'cell'")
diff --git a/tests/testthat/test-SplatPopParams.R b/tests/testthat/test-SplatPopParams.R
new file mode 100644
index 00000000..f28cb510
--- /dev/null
+++ b/tests/testthat/test-SplatPopParams.R
@@ -0,0 +1,35 @@
+context("splatPopParams")
+
+if (requireNamespace("VariantAnnotation", quietly = TRUE) &&
+    requireNamespace("preprocessCore", quietly = TRUE)) {
+    params <- newSplatPopParams()
+}
+
+test_that("printing works", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    expect_output(show(params), "A Params object of class SplatPopParams")
+})
+
+test_that("nCells checks work", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    expect_error(setParam(params, "nCells", 1),
+                   "nCells cannot be set directly, set batchCells instead")
+})
+
+test_that("CV params checks work", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    expect_error(setParam(params, "pop.cv.param", data.frame(
+        start = c(0, 10), shape = c(10, 2), rate = c(7, 3))),
+                 "Need to set pop.cv.bins to length of pop.cv.param")
+
+})
+
+test_that("setParams order doesn't matter", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    expect_silent(setParams(params, eqtl.n = 10, eqtl.maf.max = 0.4))
+    expect_silent(setParams(params, eqtl.maf.max = 0.4, eqtl.n = 10))
+})
diff --git a/tests/testthat/test-splatPop-simulate.R b/tests/testthat/test-splatPop-simulate.R
new file mode 100644
index 00000000..17ea16b4
--- /dev/null
+++ b/tests/testthat/test-splatPop-simulate.R
@@ -0,0 +1,45 @@
+context("splatPopSimulate")
+
+if (requireNamespace("VariantAnnotation", quietly = TRUE) &&
+    requireNamespace("preprocessCore", quietly = TRUE)) {
+
+    set.seed(1)
+
+    n.samples <- 6
+    n.genes <- 10
+    vcf <- mockVCF(n.samples = n.samples, n.snps = 1000)
+
+    params <- setParams(newSplatPopParams(), eqtl.n = 10, nGenes = n.genes)
+}
+
+test_that("splatPopSimulate output is valid and works", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    expect_true(validObject(splatPopSimulate(params = params, vcf = vcf)))
+    pop <- splatPopSimulateMeans(params = params, vcf = vcf)
+    expect_false(any(is.na(pop$means)))
+    expect_false(any(sapply(pop$means, is.infinite)))
+    expect_equal(ncol(pop$means), n.samples)
+    expect_length(pop$key, 15) # Number of columns expected in splatPop key
+    expect_true(validObject(splatPopSimulateSC(sim.means = pop$means,
+                                               key = pop$key,
+                                               params = params)))
+})
+
+
+test_that("splatPopSimulate on multiple groups output is valid and works", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    params.g2 <- setParams(params, group.prob = c(0.5, 0.5), nGenes = n.genes)
+    eqtl <- splatPopSimulate(params = params.g2, vcf = vcf)
+    expect_true(validObject(eqtl))
+
+})
+
+test_that("splatPopSimulate can read genes from gff data.frame", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    gff <- mockGFF(n.genes = n.genes)
+    sim <- splatPopSimulateMeans(vcf = vcf, params = params, gff = gff)
+    expect_true(validObject(sim))
+})
diff --git a/tests/testthat/test-splatPopEstimate.R b/tests/testthat/test-splatPopEstimate.R
new file mode 100644
index 00000000..93f46bb4
--- /dev/null
+++ b/tests/testthat/test-splatPopEstimate.R
@@ -0,0 +1,47 @@
+context("splatPopEstimate")
+
+if (requireNamespace("VariantAnnotation", quietly = TRUE) &&
+    requireNamespace("preprocessCore", quietly = TRUE)) {
+
+    set.seed(1)
+
+    # Mock data
+    bulk.means <- mockBulkMatrix(n.genes=500, n.samples=100)
+    bulk.eqtl <- mockBulkeQTL(n.genes=500)
+
+    counts <- scater::mockSCE()
+}
+
+test_that("splatPopEstimate works", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    # Check separate functions
+    params <- splatPopEstimateMeanCV(newSplatPopParams(), bulk.means)
+    expect_true(validObject(params))
+    params <- splatPopEstimateEffectSize(newSplatPopParams(), bulk.eqtl)
+    expect_true(validObject(params))
+
+    # Check full function
+    params <- splatPopEstimate(means=bulk.means,
+                               eqtl=bulk.eqtl,
+                               counts=counts)
+    expect_true(validObject(params))
+})
+
+test_that("splatPopEstimate checks on input data", {
+    skip_if_not_installed("VariantAnnotation")
+    skip_if_not_installed("preprocessCore")
+    bulk.eqtl.bad <- bulk.eqtl
+    bulk.eqtl.bad$gene_id <- NULL
+    expect_error(splatPopEstimate(means=bulk.means,
+                                  eqtl=bulk.eqtl.bad,
+                                  counts=counts),
+        "Incorrect format for eqtl data.")
+
+    bulk.means.bad <- bulk.means
+    bulk.means.bad[, 1] <- NA
+    expect_error(splatPopEstimate(means=bulk.means.bad,
+                                  eqtl=bulk.eqtl,
+                                  counts=counts),
+        "Incorrect format or NAs present in gene.means.")
+})
diff --git a/vignettes/splatPop-logo-small.png b/vignettes/splatPop-logo-small.png
new file mode 100644
index 00000000..fb2bb404
Binary files /dev/null and b/vignettes/splatPop-logo-small.png differ
diff --git a/vignettes/splatPop-model.png b/vignettes/splatPop-model.png
new file mode 100644
index 00000000..4358117c
Binary files /dev/null and b/vignettes/splatPop-model.png differ
diff --git a/vignettes/splatPop.Rmd b/vignettes/splatPop.Rmd
new file mode 100644
index 00000000..04e6c493
--- /dev/null
+++ b/vignettes/splatPop.Rmd
@@ -0,0 +1,416 @@
+---
+title: "splatPop: simulating single-cell data for populations"
+author: "Christina Azodi"
+package: splatter
+date: "Last updated: 21 October 2020"
+output:
+    BiocStyle::html_document:
+        toc: true
+        toc_float: true
+vignette: >
+  %\VignetteIndexEntry{splatPop simulation}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r knitr-options, include = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+```
+
+```{r setup}
+suppressPackageStartupMessages({
+  library("splatter")
+  library("scater")
+  library("VariantAnnotation")
+  library("ggplot2")
+})
+```
+
+![splatPop logo](splatPop-logo-small.png)
+
+# Introduction
+
+splatPop is an extension of the splat model that allows you to simulate single
+cell count data for an entire population of individuals. Like with splat, these 
+simulations resemble real single-cell data because they use parameters estimated
+from empirical data. Provided with genotype information (VCF) for a population 
+as input, splatPop simulates gene counts for multiple cells for all individuals 
+in the population. Realistic population structure (the pattern of genetic 
+relatedness between individuals in the population) in the simulations is achieved 
+by modelling expression Quantitative Trait Loci (eQTL) effects, where the 
+expression of a gene is associated with the genotype of the individual at a 
+specific loci. Finally, splatPop allows for the simulation of complex datasets 
+with cells from multiple groups (e.g. cell types), cells along differentiation 
+trajectories, and cells from different batches. 
+
+# The splatPop model 
+
+The primary simulation function is `splatPopSimulate`, which runs through the 
+two main phases:
+
+1. `splatPopSimulateMeans`: the simulation of means for all genes for all
+individuals in the population.
+2. `splatPopSimulateSC`: the simulation of single-cell counts for all cells for 
+all genes for all individuals. 
+
+The second phase is essentially a wrapper around the original `splatSimulate()` 
+function, which is described in detail [here](splatter.html). The figure below
+describes the first phase. Input parameters that can be estimated from real data
+have double borders and are shaded by the type of data used (blue = single-cell
+counts, yellow = population scale bulk/sc-aggregated RNA-seq data, and green =
+eQTL mapping results). The final output (red) is a matrix of means for each
+gene and each individual that is used as input to the second phase. 
+
+![The splatPop model for estimating gene means.](splatPop-model.png)
+
+To get started with splatPop, you need genotype information for the population 
+you want to simulate (i.e. a VCF). Genotype information should be provided as a
+[VariantAnnotation object](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html). 
+A mock VariantAnnotation object can be produced using the `mockVCF()` function. 
+Here we simulate single-cell RNA-sequencing counts for 100 random genes for 6
+random samples:
+
+```{r quick-start}
+vcf <- mockVCF(n.samples = 6)
+
+sim <- splatPopSimulate(vcf = vcf, "nGenes" = 100)
+
+sim <- logNormCounts(sim)
+sim <- runPCA(sim, ncomponents = 10)
+plotPCA(sim, colour_by = "Sample")
+```
+
+# Detailed look into splatPop
+
+## Step 1: Parameter Estimation 
+
+The parameters used in splatPop have sensible default values, but can also be
+estimated from real data provided by the user. For example, gene mean and 
+variance levels are sampled from gamma distributions derived from real
+population scale RNA-seq data and eQTL effect sizes from a gamma distribution 
+derived from real eQTL mapping results. The default parameters were derived 
+from GTEx data (v7, thyroid tissue). However, they can 
+also be estimated from user provided data using `splatPopEstimate()`. You can
+also provide `splatPopEstimate()` with real single-cell RNA-sequencing data to 
+estimate single-cell parameters as in `splatEstimate()`.
+
+All parameters needed for splatPop simulations are stored in a 
+`SplatPopParams` object. In addition to the compartments in `SplatParams` 
+described in detail in the [Splat parameters vignette](splat_params.html) and
+the parameters that are set manually (described below), 
+`SplatPopParams` also contains the following parameters that can be estimated
+from real data: 
+
+* **Population parameters**
+    * `pop.mean.shape` - Shape parameter for mean expression from population
+    scale data.
+    * `pop.mean.rate` - Rate parameter for mean expression from population
+    scale data.
+    * `pop.cv.param` - Shape and rate parameters for the coefficient of
+    variation (cv) across individuals from the population scale data, binned by
+    mean expression.
+* **eQTL effect size parameters**
+    * `eqtl.ES.shape` - Shape parameter for eQTL effect sizes.
+    * `eqtl.ES.rate` - Rate parameter for eQTL effect sizes.
+
+Let's take a look at the default parameters...
+
+```{r default-SplatPopParams}
+params <- newSplatPopParams()
+params
+```
+
+This tells us we have "a `Params` object of class `SplatPopParams`" and shows
+the values of these parameters. As with `SplatParams`, the parameters that can 
+be estimated by `splatPopEstimate` are in parentheses, those that can't be 
+estimated are in brackets, and those that have been changed from their default
+are in ALL CAPS.
+
+For example, we can estimate new parameter values from user provided data...
+
+```{r eqtlEstimate}
+bulk.means <- mockBulkMatrix(n.genes=100, n.samples=100)
+bulk.eqtl <- mockBulkeQTL(n.genes=100)
+counts <- mockSCE()
+
+params.est <- splatPopEstimate(means = bulk.means,
+                               eqtl = bulk.eqtl,
+                               counts = counts)
+params.est
+```
+
+Note that `splatPopEstimate()` will only estimate new parameters if the data 
+required is provided. For example, if you want to simulate data using default 
+gene means and eQTL parameters, but from single-cell parameters estimated from
+your own real single-cell counts data, you could run `splatPopEstimate()` with 
+only the `counts` argument provided.
+
+## Step 2: Simulate gene means 
+
+The `splatPopSimulate()` function runs both phases of splatPop, however we can 
+run these two phases separately to highlight their unique functions. The 
+first phase is run using `splatPopSimulateMeans()`. 
+
+### Input data
+
+This function requires two pieces of input data: genotypes and genes. Mock
+genotype and gene data can  be provided using `mockVCF()` and `mockGFF()`,
+respectively. These mock functions generate random SNP and gene annotation data
+for chromosome 22. To simulate populations with realistic population structure,
+the user should provide real (or simulated) genotypes as a VCF file read in as a 
+`VariantAnnotation` object. 
+
+splatPop takes in information about what genes to simulate in three ways: 
+
+1. **GFF/GTF (-gff data.frame):** Provide a GFF/GTF file as a `data.frame`
+   object. splatPop will filter out all non-gene features (3rd column != gene).
+   This  method uses real gene names and locations, but will randomly assign
+   expression values and eQTL effects to these genes. 
+2. **Key (-key data.frame):** Provide a `data.frame` object including
+   information  about genes you want to simulate. This object must include the
+   gene's name (*geneID*), chromosome (*chromosome*), and location
+   (*geneMiddle*). With just  those columns, splatPop will function the same as
+   if a GFF was provided.  However, you can also use this object to specify
+   other information. For example, if you provide a desired mean (*meanSampled*)
+   and variance (*cvSampled*) for  each gene, splatPop will use these instead of 
+   randomly sampled values. Finally, if you provide the type (*eQTL.type*, e.g.
+   NA or global), SNP identifier (*eSNP.ID*), and effect size
+   (*eQTL.EffectSize*), splatPop will  simulate gene means with these eQTL
+   associations instead of generating eQTL  associations randomly. 
+3. **Randomly (-gff NULL -key NULL):** This option will call `mockGFF()` to 
+   generate a random GFF file for a specified chromosome. This is the default
+   option if neither `gff` or `key` is provided.
+
+### Control parameters
+
+In addition to the parameters estimated from real data, the `SplatPopParams` 
+object also includes control parameters that must be set by the user. The 
+following `SplatPopParams` control parameters can be changed using
+`setParams()`:
+
+* **Population parameters** 
+    * `similarity.scale` - Scaling factor for the population variance (cv) rate
+      parameter. Increasing this scaling factor increases the similarity between
+      individuals.
+* **eQTL Parameters**
+    * `eqtl.n` - Number (>1) or percent (<=1) of genes to assign with eQTL 
+      effects.
+    * `eqtl.dist` - Maximum distance (bp) between the center of a gene and 
+      possible eSNPs for that gene.
+    * `eqtl.maf.min` - Minimum Minor Allele Frequency (MAF) of eSNPs.
+    * `eqtl.maf.max` - Maximum MAF of eSNPs.
+    * `eqtl.group.specific` - Percent of eQTL effects to make group specific.
+      The number of groups is specified using the "group.prob" parameter.
+ * **Group specific parameters**   
+    * `nGroups` - Number of groups to simulate for each individual. 
+    * `group.prob` - Array of the proportion of cells that should be simulated
+      in each group. 
+
+In addition to the group specific eQTL effects, each group will have group 
+specific differential expression effects, which are not associated with a 
+genetic variant). These parameters are estimated from real single-cell data as
+described in [splatter](splatter.html).
+
+### Output
+
+The output of `splatPopSimulateMeans()` is a list containing:
+
+* `means` - a data.frame (or list of data.frames if `nGroups` > 1) with
+  simulated mean gene expression value for each gene (row) and each sample
+  (column).
+* `key` - a data.frame listing for all simulated genes: the assigned mean
+  and variance (before and after quantile normalization), the assigned eSNP
+  and its effect size and type (global/group specific), and other group effects.
+
+Note that when `splatPopSimulate()` is run, these to objects are contained in
+the output SingleCellExperiment object (details below). Let's look at a 
+snapshot of some simulated means and the corresponding key...
+
+```{r splatPopSimulateMeans}
+vcf <- mockVCF(n.samples = 6)
+gff <- mockGFF(n.genes = 100)
+
+sim.means <- splatPopSimulateMeans(vcf = vcf, gff = gff,
+                                   params = newSplatPopParams())
+
+round(sim.means$means[1:5, 1:6], digits = 2)
+
+print(sim.means$key[1:5, ], digits = 2)
+```
+
+### Other examples
+
+**Replicate a simulation by providing a gene key** 
+
+As described above, information about genes can also be provided in a data.frame
+using the `key` argument. If you provide `splatPopSimulateMeans()` with the key 
+output from a previous run, it will generate a new population with the same 
+properties, essentially creating a replicate. Here is a snapshot of such a 
+replicate using the key simulated above:
+
+```{r splatPopSimulateMeans-from-key}
+sim.means.rep2 <- splatPopSimulateMeans(vcf = vcf, key=sim.means$key,
+                                        params = newSplatPopParams())
+
+round(sim.means.rep2$means[1:5, 1:6], digits = 2)
+```
+
+**Use real population-scale bulk expression data** 
+
+An important step of `splatPopSimulate()` is the quantile normalization of
+simulated gene means for each sample to match a gamma distribution
+estimated from real single-cell RNA-seq data using `splatEstimate()` or 
+`splatPopEstimate()`. This step ensures that even if bulk sequencing data are
+used to estimate population parameters, the means output from
+`splatPopSimulateMeans()` will be distributed like a single-cell dataset.
+
+If you already have bulk expression data for a population, you can use this
+quantile normalization function directly on that data and use the output as
+input to `splatPopSimulateSC()`. Note that this will not simulate eQTL or group
+effects, just simulate single-cell counts using the bulk means  provided. 
+
+```{r quant-normalize-population-data}
+bulk.qnorm <- splatPopQuantNorm(newSplatPopParams(), bulk.means)
+round(bulk.qnorm[1:5, 1:5], 3)
+```
+
+## Step 3: Simulate single cell counts
+
+Finally, single cell level data is simulated using `splatPopSimulateSC()`. 
+Running this function on its own requires the `SplatPopParams` object, and the 
+two outputs from `splatPopSimulateMeans()`: the key and the simulated means 
+matrix (or list of matrices if nGroups > 1). The user can also provide 
+additional parameters for the single-cell simulation, for example how many 
+cells to simulate. 
+
+Looking at the output of `splatPopSimulateSC()` we see that it is a single
+`SingleCellExperiment` object with a row for each feature (gene) and
+a column for each cell. The simulated counts are accessed using `counts`.
+although it can also hold other expression measures such as FPKM or TPM.
+Information about each cell (e.g. sample, group, batch) is held in the
+`colData` and information about each gene (e.g. location, eQTL effects, and 
+other data from the splatPop key) is held in the `rowData`. 
+
+```{r eqtl-splatPopSimulateSC-simple-object}
+sim.sc <- splatPopSimulateSC(params=params, 
+                             key = sim.means$key,
+                             sim.means=sim.means$means, 
+                             batchCells=50)
+sim.sc
+```
+
+We can visualize these simulations using plotting functions from **scater** like
+plotPCA... 
+
+```{r eqtl-splatPopSimulateSC-simple-plots}
+sim.sc <- logNormCounts(sim.sc)
+sim.sc <- runPCA(sim.sc, ncomponents = 10)
+plotPCA(sim.sc, colour_by = "Sample")
+```
+
+## splatPop with group, batch, and path effects
+
+Using the same methods as splat, splatPop allows you to simulate single-cell
+counts for a population with group (e.g. cell-types), batch, and path (e.g.
+developmental series) effects. Group effects are simulated by
+`splatPopSimulateMeans()` and applied to the single cell simulations in 
+`splatPopSimulateSC()`. Path and batch effects are simulated by
+`splatPopSimulateSC()`. 
+
+### Simulating population scale single-cell data with group effects
+
+The population simulated above is an example of a dataset with a single cell
+type across many samples. However, splatPop also allows you to simulate
+population-scale data for a mixture of cell-types (i.e. groups).
+
+Two types of group effects are included: group-eQTL and group-differential 
+expression (DE) effects. The number of groups to simulate is set using the 
+*group.prob* parameter in `SplatPopParams`. The DE effects are  implemented as
+in the `splat` simulation, with the user able to control  `splatPopParam`
+parameters including *de.prob*, *de.downProb*, *de.facLoc*, and *de.facScale*.
+For group-specific eQTL, the proportion of eQTL to designate as group-specific
+eQTL is set using *eqtl.group.specific*. 
+
+When used to simulate single-cell data with group-specific effects,
+`splatSimulatePop`  also outputs:
+
+* **Cell information (`colData`)**
+    * `Group` - The group ID for each cell.
+
+```{r group-specific-eQTL-simulations}
+params.group <- newSplatPopParams(nGenes = 50,
+                                  batchCells = 40,
+                                  group.prob = c(0.5, 0.5))
+
+sim.sc.gr2 <- splatPopSimulate(vcf = vcf, params = params.group)
+
+sim.sc.gr2 <- logNormCounts(sim.sc.gr2)
+sim.sc.gr2 <- runPCA(sim.sc.gr2, ncomponents = 10)
+plotPCA(sim.sc.gr2, colour_by = "Group", shape_by = "Sample")
+```
+
+From the PCA plot above you can see that in this simulation the sample effect
+outweighs the group effect. But we can tune these parameters to change the 
+relative weight of these effects. First we can decrease the sample effect by
+increasing the similarity.scale parameter. And second we can increase the group
+effect by adjusting the *eqtl.group.specific* and *de* parameters: 
+
+```{r group-specific-eQTL-simulations-bigger}
+params.group <- newSplatPopParams(batchCells = 40,
+                                  nGenes = 50,
+                                  similarity.scale = 6,
+                                  eqtl.group.specific = 0.6,
+                                  de.prob = 0.5,
+                                  de.facLoc = 0.5, 
+                                  de.facScale = 0.4,
+                                  group.prob = c(0.5, 0.5))
+
+sim.sc.gr2 <- splatPopSimulate(vcf = vcf, params = params.group)
+
+sim.sc.gr2 <- logNormCounts(sim.sc.gr2)
+sim.sc.gr2 <- runPCA(sim.sc.gr2, ncomponents = 10)
+plotPCA(sim.sc.gr2, colour_by = "Group", shape_by = "Sample")
+```
+
+### Simulate SC data for population with path and batch effects
+
+Like splat, splatPop also allows you to simulate single-cell data with path or 
+batch effects using the `method` tag in `splatSimulatePop`. Note that you can
+also set *method = group*, but this is done automatically by setting the 
+*group.prob* parameter. For more information about these settings, see the
+[Splat parameters vignette](splat_params.html).
+
+#### Batch effects
+
+```{r simulate-population-with-batch-effects}
+params.batches <- newSplatPopParams(batchCells = c(20, 20),
+                                    nGenes = 50,
+                                    similarity.scale = 5,
+                                    batch.facLoc = 0.3,
+                                    batch.facScale = 0.3)
+
+sim.pop.batches <- splatPopSimulate(vcf = vcf, params = params.batches)
+sim.pop.batches <- logNormCounts(sim.pop.batches)
+sim.pop.batches <- runPCA(sim.pop.batches, ncomponents = 10)
+plotPCA(sim.pop.batches, colour_by = "Batch", shape_by = "Sample",
+        ncomponents = 5:6)
+
+```
+
+#### Path effects
+
+```{r simulate-population-with-path-effects}
+params.paths <- newSplatPopParams(batchCells = 40,
+                                  nGenes = 50,
+                                  similarity.scale = 6,
+                                  de.facLoc = 0.5,
+                                  de.facScale = 0.5,
+                                  de.prob = 0.5)
+
+sim.pop.paths <- splatPopSimulate(vcf = vcf, params = params.paths,
+                                  method = "paths")
+sim.pop.paths <- logNormCounts(sim.pop.paths)
+sim.pop.paths <- runPCA(sim.pop.paths, ncomponents = 10)
+plotPCA(sim.pop.paths, colour_by = "Step", shape_by = "Sample", 
+        ncomponents = 5:6)
+```
diff --git a/vignettes/splat_params.Rmd b/vignettes/splat_params.Rmd
index 90641fad..28c63699 100644
--- a/vignettes/splat_params.Rmd
+++ b/vignettes/splat_params.Rmd
@@ -1,6 +1,8 @@
 ---
 title: "Splat simulation parameters"
-date: "Last updated: 18 April 2019"
+author: "Luke Zappia"
+package: splatter
+date: "Last updated: 19 October 2020"
 output:
     BiocStyle::html_document:
         toc: true
@@ -11,11 +13,8 @@ vignette: >
   %\VignetteEncoding{UTF-8}
 ---
 
-```{r, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>"
-)
+```{r knitr-options, include = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
 ```
 
 This vignette describes the Splat simulation model and the parameters it uses
diff --git a/vignettes/splatter.Rmd b/vignettes/splatter.Rmd
index 260eafd6..5c5e37e4 100644
--- a/vignettes/splatter.Rmd
+++ b/vignettes/splatter.Rmd
@@ -1,10 +1,15 @@
 ---
 title: "Introduction to Splatter"
-author: "Luke Zappia"
-date: "`r Sys.Date()`"
+author:
+  - "Luke Zappia"
+  - "Belinda Phipson"
+  - "Alicia Oshlack"
+package: splatter
+date: "Last updated: 19 October 2020"
 output:
     BiocStyle::html_document:
         toc: true
+        toc_float: true
 vignette: >
   %\VignetteIndexEntry{An introduction to the Splatter package}
   %\VignetteEngine{knitr::rmarkdown}
@@ -12,10 +17,7 @@ vignette: >
 ---
 
 ```{r knitr-options, echo = FALSE, message = FALSE, warning = FALSE}
-# To render an HTML version that works nicely with github and web pages, do:
-# rmarkdown::render("vignettes/splatter.Rmd", "all")
-knitr::opts_chunk$set(fig.align = 'center', fig.width = 6, fig.height = 5,
-                      dev = 'png')
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
 
 # Use exact BSPARAM to avoid warnings
 options(BiocSingularParam.default = BiocSingular::ExactParam())
@@ -31,9 +33,7 @@ introduction to Splatter's functionality.
 
 Splatter can be installed from Bioconductor:
 
-```{r install, eval = FALSE}
-if (!requireNamespace("BiocManager", quietly=TRUE))
-    install.packages("BiocManager")
+```{r install-bioc, eval = FALSE}
 BiocManager::install("splatter")
 ```
 
@@ -52,10 +52,12 @@ Splatter. Here is an example a mock dataset generated with the `scater` package:
 
 ```{r quickstart}
 # Load package
-library(splatter)
+suppressPackageStartupMessages({
+  library(splatter)
+  library(scater)
+})
 
 # Create mock data
-library(scater)
 set.seed(1)
 sce <- mockSCE()
 
@@ -238,9 +240,9 @@ plotPCA(sim)
 (**NOTE:** Your values and plots may look different as the simulation is random
 and produces different results each time it is run.)
 
-For more details about the `SingleCellExperiment` object refer to the [vignette]
-[SCE-vignette]. For information about what you can do with `scater` refer to the
-`scater` documentation and [vignette][scater-vignette].
+For more details about the `SingleCellExperiment` object refer to the
+[vignette][SCE-vignette]. For information about what you can do with `scater`
+refer to the `scater` documentation and [vignette][scater-vignette].
 
 The `splatSimulate` function outputs the following additional information about
 the simulation:
@@ -323,6 +325,8 @@ to the final, differentiated, cell type. By setting additional parameters it is
 possible to simulate more complex process (for example multiple mature cell
 types from a single progenitor).
 
+
+
 ## Batch effects
 
 Another factor that is important in the analysis of any sequencing experiment
@@ -365,6 +369,17 @@ To simulate a single population use `splatSimulateSingle()` (equivalent to
 simulate paths use `splatSimulatePaths()` (equivalent to
 `splatSimulate(method = "paths")`).
 
+## splatPop: Simulating populations
+
+splatPop uses the splat model to simulate single cell count data across a
+population with relationship structure including expression quantitative loci
+(eQTL) effects. The major addition in splatPop is the `splatPopSimulateMeans` 
+function, which simulates gene means for each gene for each individual using
+parameters estimated from real data. These simulated means are then used as 
+input to`splatPopSimulateSC`, which is essentially a wrapper around the base 
+`splatSimulate`. For more information on generating population scale single-cell
+count data, see the [splatPop vignette](splatPop.html).
+
 # Other simulations
 
 As well as it's own Splat simulation method the Splatter package contains