Skip to content

Commit

Permalink
v0.5.3: minor updates to make the package cran-compliant.
Browse files Browse the repository at this point in the history
  • Loading branch information
jakob-wirbel committed Aug 23, 2024
1 parent 4f540ce commit f6a0708
Show file tree
Hide file tree
Showing 27 changed files with 330 additions and 3,345 deletions.
72 changes: 55 additions & 17 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,34 +1,72 @@
Package: SIMBA
Package: simbaR
Type: Package
Title: Simulation of Microbiome data with Biological Accuracy
Version: 0.5.1
Title: Simulation of Microbiome Data with Biological Accuracy
Version: 0.5.3
Authors@R: c(person("Jakob", "Wirbel", role = c("aut", "cre"),
email = "jakob.wirbel@embl.de",
email = "jakob.wirbel@gmail.com",
comment = c(ORCID = "0000-0002-4073-3562")),
person("Morgan", "Essex",
email = "morgan.essex@embl.de", role = c("aut")),
email = "essexmorgan@gmail.com", role = c("aut")),
person("Sofia", "Forslund",
email = "Sofia.Forslund@mdc-berlin.de", role = c("aut")),
person("Georg", "Zeller",
email = "zeller@embl.de", role = c("aut")))
Maintainer: Jakob Wirbel <jakob.wirbel@embl.de>
Description: Based on real data, the package simulates new metagenomic data
by re-sampling real samples and then implants differentially abundant
features. Additionally, the package can simulate confounding factors based
on metadata variables in the real data. The simulations are stored in an
`.h5` file, which is then the basis for downstream benchmarking,
involving i) reality assessment of the simulations, ii) testing for
differential abundance, and iii) evaluation of the output from
differential abundance testing methods.
email = "georg.zeller@gmail.com", role = c("aut")))
Maintainer: Jakob Wirbel <jakob.wirbel@gmail.com>
Description: Based on real microbiome data, the package simulates new metagenomic data by re-sampling real samples and then implants differentially abundant features. Additionally, the package can simulate confounding factors based on metadata variables in the real data. The simulations are stored in an `.h5` file, which is then the basis for downstream benchmarking, involving i) reality assessment of the simulations, ii) testing for differential abundance, and iii) evaluation of the output from differential abundance testing methods. See Wirbel, Essex, et al. for more information on benchmarking differential abundance testing methods <doi:10.1101/2022.05.09.491139>.
Depends:
R (>= 4.0.0),
rhdf5
Imports:
ineq,
labdsv,
vegan,
progress,
pROC,
matrixStats,
phyloseq,
nlme,
ggplot2,
tmvtnorm
Suggests:
knitr,
rmarkdown,
tidyverse,
patchwork
dplyr,
stringr,
purrr,
patchwork,
BiocStyle,
MASS,
MAST,
edgeR,
exactRankTests,
limma,
coin,
scde,
lmerTest,
fastANCOM,
SingleCellExperiment,
distinct,
corncob,
ANCOMBC,
metagenomeSeq,
LDM,
SIAMCAT,
DESeq2,
LinDA,
ALDEx2,
ZINQ,
ZIBSeq,
sparseDOSSA,
mvtnorm,
SpiecEasi,
TailRank,
GUniFrac,
car,
mixOmics,
lmtest
VignetteBuilder: knitr
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
25 changes: 25 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,30 @@ importFrom(matrixStats,rowQuantiles)
importFrom(matrixStats,rowVars)
importFrom(pROC,roc)
importFrom(progress,progress_bar)
importFrom(stats,anova)
importFrom(stats,as.formula)
importFrom(stats,coefficients)
importFrom(stats,cor)
importFrom(stats,cov2cor)
importFrom(stats,dnorm)
importFrom(stats,fisher.test)
importFrom(stats,formula)
importFrom(stats,kruskal.test)
importFrom(stats,ks.test)
importFrom(stats,lm)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,p.adjust)
importFrom(stats,pnorm)
importFrom(stats,qnbinom)
importFrom(stats,quantile)
importFrom(stats,residuals)
importFrom(stats,rgamma)
importFrom(stats,rmultinom)
importFrom(stats,rnbinom)
importFrom(stats,sd)
importFrom(stats,t.test)
importFrom(stats,var)
importFrom(stats,wilcox.test)
importFrom(vegan,adonis)
importFrom(vegan,vegdist)
37 changes: 27 additions & 10 deletions R/data_docs.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
#' @title Toy example
#' @title SIMBA example features
#'
#' @description Feature and metadata table from the dataset from Zeevi et al
#' (see \url{https://doi.org/10.1016/j.cell.2015.11.001}), containing microbial
#' features profiled with mOTUs2
#' (see \url{http://dx.doi.org/10.1038/s41467-019-08844-4}). For some
#' patients, more than one same has been collected.
#' @description Taxonomic profiles from the study by Zeevi et al. Cell 2014
#' (see \doi{10.1016/j.cell.2015.11.001}),
#' containing 1952 features from 1181 samples, created by the motus profiler.
#'
#' Mainly used for running the examples in the function documentation and in
#' the package vignette.
#' Mainly used for running the examples in the function documentation and
#' in the vignette.
#'
#' @encoding UTF-8
#'
#' @name toy_example
#' @name toy.feat
#'
#' @source \url{https://doi.org/10.1016/j.cell.2015.11.001}
#' @source \doi{10.1016/j.cell.2015.11.001}
#'
#' @docType data
#'
#' @keywords data
NULL

#' @title SIMBA example metadata
#'
#' @description Metadata from the study by Zeevi et al. Cell 2014
#' (see \doi{10.1016/j.cell.2015.11.001}).
#'
#' Mainly used for running the examples in the function documentation and
#' in the vignette.
#'
#' @encoding UTF-8
#'
#' @name toy.meta
#'
#' @source \doi{10.1016/j.cell.2015.11.001}
#'
#' @docType data
#'
#' @keywords data
NULL
9 changes: 6 additions & 3 deletions R/eval_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @description This function takes the results of the \link{apply.test}
#' function and calculates different evaluation measures (see Details).
#'
#' @usage eval.test(sim.location, group, res.mat, alpha = 0.05)
#' @usage eval.test(sim.location, group, res.mat, adjust='BH', alpha = 0.05)
#'
#' @param sim.location file name for the .h5 file containing the simulations
#'
Expand All @@ -18,6 +18,9 @@
#'
#' @param res.mat matrix, output from the \link{apply.test} function
#'
#' @param adjust character, indicate the multiple hypothesis testing to be
#' performed on the P-values, defaults to "BH"
#'
#' @param alpha numeric, significance threshold at which the test will be
#' evaluated, defaults to \code{0.05}
#'
Expand Down Expand Up @@ -105,9 +108,9 @@ check.eval.parameters <- function(sim.location, group,
if (mode(adjust)!='character' | length(adjust)!=1){
stop("Parameter 'adjust' should be a character and of length 1!")
}
if (!adjust %in% p.adjust.methods){
if (!adjust %in% stats::p.adjust.methods){
stop("Parameter 'adjust' must be one of these: c('",
paste(p.adjust.methods, collapse = "', '"), "')")
paste(stats::p.adjust.methods, collapse = "', '"), "')")
}
# get marker features
this <- h5read(file = sim.location, name = group)
Expand Down
4 changes: 2 additions & 2 deletions R/helper_ancom_2.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ ANCOM = function(feature_table, meta_data, struc_zero = NULL, main_var, p_adj_me
if (is.null(rand_formula) & is.null(adj_formula)) {
# Basic model
# Whether the main variable of interest has two levels or more?
if (length(unique(meta_data%>%pull(main_var))) == 2) {
if (length(unique(dplyr::pull(meta_data, main_var))) == 2) {
# Two levels: Wilcoxon rank-sum test
tfun = stats::wilcox.test
} else{
Expand Down Expand Up @@ -237,7 +237,7 @@ ANCOM = function(feature_table, meta_data, struc_zero = NULL, main_var, p_adj_me
out_comp = data.frame(taxa_id, W, row.names = NULL, check.names = FALSE)
# Declare a taxon to be differentially abundant based on the quantile of W statistic.
# We perform (n_taxa - 1) hypothesis testings on each taxon, so the maximum number of rejections is (n_taxa - 1).
out_comp = out_comp%>%mutate(detected_0.9 = ifelse(W > 0.9 * (n_taxa -1), TRUE, FALSE),
out_comp = dplyr::mutate(out_comp, detected_0.9 = ifelse(W > 0.9 * (n_taxa -1), TRUE, FALSE),
detected_0.8 = ifelse(W > 0.8 * (n_taxa -1), TRUE, FALSE),
detected_0.7 = ifelse(W > 0.7 * (n_taxa -1), TRUE, FALSE),
detected_0.6 = ifelse(W > 0.6 * (n_taxa -1), TRUE, FALSE))
Expand Down
10 changes: 6 additions & 4 deletions R/helper_clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ validate.original.data <- function(d.list, sim.method){
# features
feat.all <- do.call(cbind, feat.final)
# metadata
colnames.final <- reduce(colnames.meta, intersect)
colnames.final <- purrr::reduce(colnames.meta, intersect)
for (j in seq_along(meta.final)){
meta.final[[j]] <- meta.final[[j]][,colnames.final]
}
Expand Down Expand Up @@ -442,9 +442,11 @@ confounder.check <- function(feat, meta){
}

# create plot
ggplot(df.plot.all, aes(x=effect.size, y=-log10(p.val))) +
geom_point() +
facet_grid(~variable)
ggplot2::ggplot(df.plot.all,
ggplot2::aes(x=df.plot.all$effect.size,
y=-log10(df.plot.all$p.val))) +
ggplot2::geom_point() +
ggplot2::facet_grid(~variable)
}

# check simulation parameters
Expand Down
19 changes: 10 additions & 9 deletions R/helper_hawinkel.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ simulate.betabin <- function(feat, meta, sim.out, sim.params){
message("++ Only ", n.cores, " cores detected. It would be better to",
" have more cores available!")
}
res <- spiec.easi(t(feat), verbose=TRUE, pulsar.params=list(ncores=n.cores))
correlation <- getOptCov(res)
res <- SpiecEasi::spiec.easi(t(feat), verbose=TRUE, pulsar.params=list(ncores=n.cores))
correlation <- SpiecEasi::getOptCov(res)

# calculate rhos/phis
message("++ Starting to estimate parameters ",
Expand Down Expand Up @@ -149,8 +149,8 @@ simulate.negbin <- function(feat, meta, sim.out, sim.params){
message("++ Only ", n.cores, " cores detected. It would be better to",
" have more cores available!")
}
res <- spiec.easi(t(feat), verbose=TRUE, pulsar.params=list(ncores=n.cores))
correlation <- getOptCov(res)
res <- SpiecEasi::spiec.easi(t(feat), verbose=TRUE, pulsar.params=list(ncores=n.cores))
correlation <- SpiecEasi::getOptCov(res)
} else {
correlation <- NULL
}
Expand Down Expand Up @@ -323,7 +323,7 @@ simulate.markers.betabin <- function(pis, theta,
for(nRun in seq_along(sampleSizes)){
indSel <- (samSizeSeq[nRun] + 1L):samSizeSeq[nRun + 1L]
dmData[indSel, ] <- rmvbetabin(n=sampleSizes[nRun], pi=pis.all[, nRun],
libSize=libSizesOrig[indSel],
libSizes=libSizesOrig[indSel],
Sigma=correlation, theta = theta)
}

Expand Down Expand Up @@ -417,10 +417,11 @@ rmvnegbin <- function(n, mu, Sigma, ks, ...) {
if (dim(mu)[2] != dim(Sigma)[2])
stop("Sigma and mu dimensions don't match")
if (missing(ks)) {
ks <- unlist(lapply(1:length(SDs), function(i) .negbin_getK(mu[i], SDs[i])))
# ks <- unlist(lapply(1:length(SDs), function(i) .negbin_getK(mu[i], SDs[i])))
stop("Problem with this dataset!")
}
d <- dim(mu)[2]
normd <- rmvnorm(n, rep(0, d), Sigma = Cor) #The normal-to-anything framework
normd <- mvtnorm::rmvnorm(n, rep(0, d), Sigma = Cor) #The normal-to-anything framework
unif <- pnorm(normd)
data <- t(qnbinom(t(unif), mu = t(mu), size = ks, ...))
data <- fixInf(data)
Expand All @@ -431,7 +432,7 @@ rmvnegbin <- function(n, mu, Sigma, ks, ...) {
# on the Tailrank package
#' @keywords internal
qbetabin = function(p, N, u, v) {
pp <- cumsum(dbb(0:N, N, u, v))
pp <- cumsum(TailRank::dbb(0:N, N, u, v))
sapply(p, function(x) sum(pp < x, na.rm = TRUE))
}

Expand All @@ -450,7 +451,7 @@ rmvbetabin = function(n, pi, Sigma, theta, libSizes, ...) {
stop("No overdispersion parameter supplied")
}
d <- length(pi)
normd <- rmvnorm(n, rep(0, d), Sigma = Cor) #The normal-to-anything framework
normd <- mvtnorm::rmvnorm(n, rep(0, d), Sigma = Cor) #The normal-to-anything framework
unif <- pnorm(normd)
data <- mapply(unif, rep(pi, each = nrow(unif)), libSizes,
FUN = function(u, p, l) {
Expand Down
Loading

0 comments on commit f6a0708

Please sign in to comment.