diff --git a/NAMESPACE b/NAMESPACE index e9b900946..ee550dd9e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(addAlpha) export(addCluster) export(addContaminantQC) export(addDPCoA) +export(addDissimilarity) export(addDivergence) export(addDominant) export(addLDA) @@ -50,6 +51,7 @@ export(getBestDMNFit) export(getCrossAssociation) export(getDMN) export(getDPCoA) +export(getDissimilarity) export(getDominant) export(getExperimentCrossAssociation) export(getExperimentCrossCorrelation) @@ -149,6 +151,7 @@ exportMethods(addAlpha) exportMethods(addCCA) exportMethods(addCluster) exportMethods(addContaminantQC) +exportMethods(addDissimilarity) exportMethods(addDivergence) exportMethods(addDominant) exportMethods(addHierarchyTree) @@ -188,6 +191,7 @@ exportMethods(getCCA) exportMethods(getCrossAssociation) exportMethods(getDMN) exportMethods(getDPCoA) +exportMethods(getDissimilarity) exportMethods(getDominant) exportMethods(getExperimentCrossAssociation) exportMethods(getExperimentCrossCorrelation) @@ -236,6 +240,7 @@ exportMethods(rarefyAssay) exportMethods(relAbundanceCounts) exportMethods(relabundance) exportMethods(right_join) +exportMethods(runJSD) exportMethods(runOverlap) exportMethods(splitOn) exportMethods(subsampleCounts) @@ -344,9 +349,7 @@ importFrom(ape,collapse.singles) importFrom(ape,cophenetic.phylo) importFrom(ape,drop.tip) importFrom(ape,has.singles) -importFrom(ape,is.rooted) importFrom(ape,read.tree) -importFrom(ape,root) importFrom(bluster,clusterRows) importFrom(decontam,isContaminant) importFrom(decontam,isNotContaminant) @@ -399,6 +402,7 @@ importFrom(utils,read.table) importFrom(utils,unzip) importFrom(vegan,"sppscores<-") importFrom(vegan,anova.cca) +importFrom(vegan,avgdist) importFrom(vegan,betadisper) importFrom(vegan,monoMDS) importFrom(vegan,scores) diff --git a/R/addDissimilarity.R b/R/addDissimilarity.R new file mode 100644 index 000000000..3d7dd6e05 --- /dev/null +++ b/R/addDissimilarity.R @@ -0,0 +1,379 @@ +#' Calculate dissimilarities +#' +#' These functions are designed to calculate dissimilarities on data stored +#' within a +#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +#' object. For overlap, Unifrac, and Jensen-Shannon Divergence (JSD) +#' dissimilarities, the functions use mia internal functions, while for other +#' types of dissimilarities, they rely on \code{\link[vegan:vegdist]{vegdist}} +#' by default. +#' +#' @param x \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +#' or \code{matrix}. +#' +#' @param method \code{Character scalar}. Specifies which dissimilarity to +#' calculate. +#' +#' @param name \code{Character scalar}. The name to be used to store the result +#' in metadata of the output. (Default: \code{method}) +#' +#' @param assay.type \code{Character scalar}. Specifies which assay to use for +#' calculation. (Default: \code{"counts"}) +#' +#' @param niter The number of iterations performed. If \code{NULL}, +#' rarefaction is disabled. (Default: \code{NULL}) +#' +#' @param transposed \code{Logical scalar}. Specifies if x is transposed with +#' cells in rows. (Default: \code{FALSE}) +#' +#' @param tree.name (Unifrac) \code{Character scalar}. Specifies the name of the +#' tree from \code{rowTree(x)} that is used in calculation. Disabled when +#' \code{tree} is specified. (Default: \code{"phylo"}) +#' +#' @param tree (Unifrac) \code{phylo}. A phylogenetic tree used in calculation. +#' (Default: \code{NULL}) +#' +#' @param ... other arguments passed onto \code{\link[vegan:avgdist]{avgdist}}, +#' \code{\link[vegan:vegdist]{vegdist}}, or onto mia internal functions: +#' +#' \itemize{ +#' \item \code{sample}: The sampling depth in rarefaction. +#' (Default: \code{min(rowSums2(x))}) +#' +#' \item \code{dis.fun}: \code{Character scalar}. Specifies the dissimilarity +#' function to be used. +#' +#' \item \code{weighted}: (Unifrac) \code{Logical scalar}. Should use +#' weighted-Unifrac calculation? +#' Weighted-Unifrac takes into account the relative abundance of +#' species/taxa shared between samples, whereas unweighted-Unifrac only +#' considers presence/absence. Default is \code{FALSE}, meaning the +#' unweighted-Unifrac dissimilarity is calculated for all pairs of samples. +#' (Default: \code{FALSE}) +#' +#' \item \code{node.label} (Unifrac) \code{character vector}. Used only if +#' \code{x} is a matrix. Specifies links between rows/columns and tips of +#' \code{tree}. The length must equal the number of rows/columns of \code{x}. +#' Furthermore, all the node labs must be present in \code{tree}. +#' +#' \item \code{chunkSize}: (JSD) \code{Integer scalar}. Defines the size of +#' data send to the individual worker. Only has an effect, if \code{BPPARAM} +#' defines more than one worker. (Default: \code{nrow(x)}) +#' +#' \item \code{BPPARAM}: (JSD) +#' \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}. +#' Specifies whether the calculation should be parallelized. +#' +#' \item \code{detection}: (Overlap) \code{Numeric scalar}. +#' Defines detection threshold for absence/presence of features. Feature that +#' has abundance under threshold in either of samples, will be discarded when +#' evaluating overlap between samples. (Default: \code{0}) +#' } +#' +#' @return +#' \code{getDissimilarity} returns a sample-by-sample dissimilarity matrix. +#' +#' \code{addDissimilarity} returns \code{x} that includes dissimilarity matrix +#' in its metadata. +#' +#' @details +#' Overlap reflects similarity between sample-pairs. When overlap is +#' calculated using relative abundances, the higher the value the higher the +#' similarity is. When using relative abundances, overlap value 1 means that +#' all the abundances of features are equal between two samples, and 0 means +#' that samples have completely different relative abundances. +#' +#' Unifrac is calculated with \code{\link[rbiom:unifrac]{rbiom:unifrac()}}. +#' +#' If rarefaction is enabled, \code{\link[vegan:avgdist]{vegan:avgdist()}} is +#' utilized. +#' +#' @name getDissimilarity +#' +#' @author +#' For overlap implementation: +#' Leo Lahti and Tuomas Borman. Contact: \url{microbiome.github.io} +#' +#' For JSD implementation: +#' Susan Holmes \email{susan@@stat.stanford.edu}. +#' Adapted for phyloseq by Paul J. McMurdie. +#' Adapted for mia by Felix G.M. Ernst +#' +#' @seealso +#' \url{http://en.wikipedia.org/wiki/Jensen-Shannon_divergence} +#' +#' @references +#' For unifrac dissimilarity: \url{http://bmf.colorado.edu/unifrac/} +#' +#' See also additional descriptions of Unifrac in the following articles: +#' +#' Lozupone, Hamady and Knight, ``Unifrac - An Online Tool for Comparing +#' Microbial Community Diversity in a Phylogenetic Context.'', BMC +#' Bioinformatics 2006, 7:371 +#' +#' Lozupone, Hamady, Kelley and Knight, ``Quantitative and qualitative (beta) +#' diversity measures lead to different insights into factors that structure +#' microbial communities.'' Appl Environ Microbiol. 2007 +#' +#' Lozupone C, Knight R. ``Unifrac: a new phylogenetic method for comparing +#' microbial communities.'' Appl Environ Microbiol. 2005 71 (12):8228-35. +#' +#' For JSD dissimilarity: +#' Jensen-Shannon Divergence and Hilbert space embedding. +#' Bent Fuglede and Flemming Topsoe University of Copenhagen, +#' Department of Mathematics +#' \url{http://www.math.ku.dk/~topsoe/ISIT2004JSD.pdf} +#' +#' @export +#' +#' @examples +#' library(mia) +#' library(scater) +#' +#' # load dataset +#' data(GlobalPatterns) +#' tse <- GlobalPatterns +#' +#' ### Overlap dissimilarity +#' +#' tse <- addDissimilarity(tse, method = "overlap", detection = 0.25) +#' metadata(tse)[["overlap"]][1:6, 1:6] +#' +#' ### JSD dissimilarity +#' +#' tse <- addDissimilarity(tse, method = "jsd") +#' metadata(tse)[["jsd"]][1:6, 1:6] +#' +#' # Multi Dimensional Scaling applied to JSD dissimilarity matrix +#' tse <- runMDS(tse, FUN = getDissimilarity, method = "overlap", +#' assay.type = "counts") +#' metadata(tse)[["MDS"]][1:6, ] +#' +#' ### Unifrac dissimilarity +#' +#' res <- getDissimilarity(tse, method = "unifrac", weighted = FALSE) +#' dim(as.matrix((res))) +#' +#' tse <- addDissimilarity(tse, method = "unifrac", weighted = TRUE) +#' metadata(tse)[["unifrac"]][1:6, 1:6] +#' +NULL + +#' @rdname getDissimilarity +#' @export +setGeneric( + "addDissimilarity", signature = c("x"), function(x, method, ...) + standardGeneric("addDissimilarity")) + +#' @rdname getDissimilarity +#' @export +setMethod( + "addDissimilarity", signature = c(x = "SummarizedExperiment"), + function(x, method, name = method, ...){ + # + res <- getDissimilarity(x, method = method, ...) + # Add matrix to original SE + x <- .add_values_to_metadata(x, names = name, value = as.matrix(res)) + return(x) + } +) + +#' @rdname getDissimilarity +#' @export +setGeneric( + "getDissimilarity", signature = c("x"), function(x, method, ...) + standardGeneric("getDissimilarity")) + +#' @rdname getDissimilarity +#' @export +setMethod( + "getDissimilarity", signature = c(x = "SummarizedExperiment"), + function( + x, method, assay.type = "counts", niter = NULL, transposed = FALSE, + tree = NULL, ...){ + # Input checks + .check_assay_present(assay.type, x) + if( !.is_non_empty_string(method) ){ + stop("'method' must be a non-empty single character value", + call. = FALSE) + } + if( !.is_a_bool(transposed) ){ + stop("'na.rm' must be TRUE or FALSE.", call. = FALSE) + } + if( !(is.null(tree) || is(tree, "phylo")) ){ + stop("'tree' must be NULL or phylo.", call. = FALSE) + } + # + # Get arguments + mat <- assay(x, assay.type) + if( !transposed ){ + mat <- t(mat) + } + args <- c( + list(x = mat, method = method, tree = tree, niter = niter), + list(...)) + # Calculate dissimilarity based on matrix + mat <- do.call(getDissimilarity, args) + return(mat) + } +) + +#' @rdname getDissimilarity +#' @export +setMethod( + "getDissimilarity", signature = c(x = "TreeSummarizedExperiment"), + function( + x, method, assay.type = "counts", tree.name = "phylo", niter = NULL, + transposed = FALSE, tree = NULL, ...){ + # Input checks + .check_assay_present(assay.type, x) + if( !.is_non_empty_string(method) ){ + stop("'method' must be a non-empty single character value", + call. = FALSE) + } + if( !.is_a_bool(transposed) ){ + stop("'na.rm' must be TRUE or FALSE.", call. = FALSE) + } + if( !(is.null(tree) || is(tree, "phylo")) ){ + stop("'tree' must be NULL or phylo.", call. = FALSE) + } + # + # Retrieve tree arguments from TreeSE object, if method is unifrac and + # user did not specify external tree + if( method %in% c("unifrac") && is.null(tree) ){ + args <- .get_tree_args( + x, method = method, assay.type = assay.type, tree.name = tree.name, + transposed = transposed, ...) + } else{ + # For other cases, do not fetch tree data from TreeSE + mat <- assay(x, assay.type) + if( !transposed ){ + mat <- t(mat) + } + args <- c( + list(x = mat, method = method, tree = tree, niter = niter), + list(...)) + } + # Calculate dissimilarity + mat <- do.call(getDissimilarity, args) + return(mat) + } +) + +#' @rdname getDissimilarity +#' @export +setMethod( + "getDissimilarity", signature = c(x = "ANY"), function( + x, method, niter = NULL, tree = NULL, ...){ + # Input check + if( !.is_a_string(method) ){ + stop("'method' must be a single character value.", call. = FALSE) + } + if( !(is.null(tree) || is(tree, "phylo")) ){ + stop("'tree' must be NULL or phylo.", call. = FALSE) + } + # + # Calculate dissimilarity + mat <- .calculate_dissimilarity( + mat = x, method = method, niter = niter, tree = tree, ...) + return(mat) + } +) + +# This function chooses right method and calculates dissimilarity matrix. +#' @importFrom vegan vegdist avgdist +.calculate_dissimilarity <- function( + mat, method, niter, dis.fun = distfun, distfun = NULL, + sample = min(rowSums2(mat)), ...){ + # input check + if( !(is.null(dis.fun) || is.function(dis.fun)) ){ + stop("'dis.fun' must be NULL or a function.", call. = FALSE) + } + if( !(is.null(niter) || .is_an_integer(niter)) ){ + stop("'niter' must be NULL or an integer.", call. = FALSE) + } + # sample is only used when niter is specified + if( !is.null(niter) && !.is_an_integer(sample) ){ + stop("'sample' must be an integer.", call. = FALSE) + } + # + # If the dissimilarity function is not specified, get default choice + if( is.null(dis.fun) ){ + if( method %in% c("overlap") ){ + dis.fun <- .get_overlap + } else if( method %in% c("unifrac") ){ + dis.fun <- .get_unifrac + } else if( method %in% c("jsd") ){ + dis.fun <- .get_jsd + } else{ + dis.fun <- vegdist + } + } + # Initialize an argument list + args <- c(list(mat), list(...)) + # If rarefaction is specified, calculate dissimilarity with vegan::avgdist + # function that utilizes the specified dissimilarity function. Otherwise, + # call the specified function directly. + if( !is.null(niter) ){ + # Remove arguments that will overlap with arguments added below + args <- args[ !names(args) %in% c("dmethod", "iterations") ] + # Add arguments specific for avgdist + args <- c(args, list( + dmethod = method, iterations = niter, sample = sample, + distfun = dis.fun)) + # Calculate dissimilarities + res <- do.call(avgdist, args) + } else{ + args <- c(args, list(method = method)) + res <- do.call(dis.fun, args) + } + return(res) +} + +# If user want to calculate unifrac dissimilarity and user wants to use tree +# data from TreeSE, this function is used to retrieve the data. +.get_tree_args <- function( + x, method, assay.type = "counts", tree.name = "phylo", + transposed = FALSE, ...){ + # Get functions and parameters based on direction + tree_present_FUN <- if (transposed) .check_colTree_present + else .check_rowTree_present + tree_FUN <- if (transposed) colTree else rowTree + links_FUN <- if (transposed) colLinks else rowLinks + margin_name <- if (transposed) "col" else "row" + # Check tree.name + tree_present_FUN(tree.name, x) + # + # Select only those features/samples that are in the tree + links <- links_FUN(x) + present_in_tree <- links[, "whichTree"] == tree.name + if( any(!present_in_tree) ){ + warning( + "Not all ", margin_name, "s were present in the ", margin_name, + "Tree specified by 'tree.name'. 'x' is subsetted.", + call. = FALSE) + # Subset the data + if( transposed ){ + x <- x[, present_in_tree] + } else{ + x <- x[present_in_tree, ] + } + } + # Get tree + tree <- tree_FUN(x, tree.name) + # Get links and take only nodeLabs + links <- links_FUN(x) + links <- links[ , "nodeLab"] + node.label <- links + # Get assay. By default, dissimilarity between samples is calculated. In + # dissimilarity functions, features must be in columns and samples in rows + # in this case. + mat <- assay(x, assay.type) + if( !transposed ){ + mat <- t(mat) + } + # Create an arument list that includes matrix, and tree-related parameters. + args <- list(x = mat, method = method, tree = tree, node.label = node.label) + args <- c(args, list(...)) + return(args) +} diff --git a/R/calculateDistance.R b/R/calculateDistance.R deleted file mode 100644 index d23f196ae..000000000 --- a/R/calculateDistance.R +++ /dev/null @@ -1,6 +0,0 @@ -# calculateDistance function is removed. This function is left for other functions -# to use -.calculate_distance <- function(mat, FUN = stats::dist, ...){ - # Distance between all samples against all samples - do.call(FUN, c(list(mat),list(...))) -} diff --git a/R/calculateJSD.R b/R/calculateJSD.R index 3b2fd13dd..19cb08413 100644 --- a/R/calculateJSD.R +++ b/R/calculateJSD.R @@ -1,97 +1,3 @@ -#' Calculate the Jensen-Shannon Divergence -#' -#' This function calculates the Jensen-Shannon Divergence (JSD) in a -#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -#' object. -#' -#' @param x a numeric matrix with samples as rows or a -#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -#' object. -#' -#' @param assay.type \code{Character scalar}. Specifies the name of the -#' assay used in calculation. (Default: \code{"counts"}) -#' -#' @param exprs_values Deprecated. Use \code{assay.type} instead. -#' -#' @param assay_name Deprecated. Use \code{assay.type} instead. -#' -#' @param chunkSize \code{Integer scalar}. Defines the size of data send -#' to the individual worker. Only has an effect, if \code{BPPARAM} defines -#' more than one worker. (Default: \code{nrow(x)}) -#' -#' @param BPPARAM A -#' \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}} -#' object specifying whether the calculation should be parallelized. -#' -#' @param transposed \code{Logical scalar}. Is x transposed with samples in rows? -#' (Default: \code{FALSE}) -#' -#' @param ... optional arguments not used. -#' -#' @return a sample-by-sample distance matrix, suitable for NMDS, etc. -#' -#' @seealso -#' \url{http://en.wikipedia.org/wiki/Jensen-Shannon_divergence} -#' -#' @references -#' Jensen-Shannon Divergence and Hilbert space embedding. -#' Bent Fuglede and Flemming Topsoe University of Copenhagen, -#' Department of Mathematics -#' \url{http://www.math.ku.dk/~topsoe/ISIT2004JSD.pdf} -#' -#' @name calculateJSD -#' -#' @author -#' Susan Holmes \email{susan@@stat.stanford.edu}. -#' Adapted for phyloseq by Paul J. McMurdie. -#' Adapted for mia by Felix G.M. Ernst -#' -#' @export -#' -#' @examples -#' data(enterotype) -#' library(scater) -#' -#' -#' jsd <- calculateJSD(enterotype) -#' class(jsd) -#' head(jsd) -#' -#' enterotype <- runMDS(enterotype, FUN = calculateJSD, name = "JSD", -#' exprs_values = "counts") -#' head(reducedDim(enterotype)) -#' head(attr(reducedDim(enterotype),"eig")) -#' attr(reducedDim(enterotype),"GOF") -NULL - -setGeneric("calculateJSD", signature = c("x"), - function(x, ...) - standardGeneric("calculateJSD")) - -#' @rdname calculateJSD -#' @export -setMethod("calculateJSD", signature = c(x = "ANY"), - function(x, ...){ - .calculate_distance(x, FUN = runJSD, ...) - } -) - -#' @rdname calculateJSD -#' -#' @importFrom SummarizedExperiment assay -#' -#' @export -setMethod("calculateJSD", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = assay_name, assay_name = exprs_values, - exprs_values = "counts", transposed = FALSE, ...){ - mat <- assay(x, assay.type) - if(!transposed){ - mat <- t(mat) - } - calculateJSD(mat, ...) - } -) - # written by Susan Holmes \email{susan@@stat.stanford.edu}. # Adapted for phyloseq by Paul J. McMurdie. # Adapted for mia by Felix G.M. Ernst @@ -113,15 +19,12 @@ setMethod("calculateJSD", signature = c(x = "SummarizedExperiment"), return(rowSums(d, na.rm = TRUE)) } -#' @rdname calculateJSD -#' #' @importFrom utils combn #' @importFrom stats as.dist #' @importFrom BiocParallel SerialParam register bplapply bpisup bpstart bpstop #' @importFrom DelayedArray getAutoBPPARAM setAutoBPPARAM -#' -#' @export -runJSD <- function(x, BPPARAM = SerialParam(), chunkSize = nrow(x)){ +#' +.get_jsd <- function(x, BPPARAM = SerialParam(), chunkSize = nrow(x), ...){ # input check if(is.null(rownames(x))){ rownames(x) <- seq_len(nrow(x)) diff --git a/R/calculateOverlap.R b/R/calculateOverlap.R index b16ccf6fc..1d264708e 100644 --- a/R/calculateOverlap.R +++ b/R/calculateOverlap.R @@ -1,133 +1,37 @@ -#' Estimate overlap -#' -#' This function calculates overlap for all sample-pairs -#' in a \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -#' object. -#' -#' @inheritParams calculateJSD -#' -#' @param detection \code{Numeric scalar}. Defines detection threshold for -#' absence/presence of features. Feature that has abundance under threshold in -#' either of samples, will be discarded when evaluating overlap between samples. -#' (Default: \code{0}) -#' -#' @param ... Optional arguments not used. -#' -#' @return calculateOverlap returns sample-by-sample distance matrix. -#' runOverlap returns \code{x} that includes overlap matrix in its -#' reducedDim. -#' -#' @details This function calculates overlap between all the sample-pairs. Overlap -#' reflects similarity between sample-pairs. -#' -#' When overlap is calculated using relative abundances, the higher the value the -#' higher the similarity is, When using relative abundances, overlap value 1 means that -#' all the abundances of features are equal between two samples, and 0 means that -#' samples have completely different relative abundances. -#' -#' @seealso -#' \code{\link[mia:calculateJSD]{calculateJSD}} -#' \code{\link[mia:calculateUnifrac]{calculateUnifrac}} -#' -#' -#' @name calculateOverlap -#' @export -#' -#' @author Leo Lahti and Tuomas Borman. Contact: \url{microbiome.github.io} -#' -#' @examples -#' data(esophagus) -#' tse <- esophagus -#' tse <- transformAssay(tse, method = "relabundance") -#' overlap <- calculateOverlap(tse, assay_name = "relabundance") -#' overlap -#' -#' # Store result to reducedDim -#' tse <- runOverlap(tse, assay.type = "relabundance", name = "overlap_between_samples") -#' head(reducedDims(tse)$overlap_between_samples) -#' -NULL - -#' @rdname calculateOverlap -#' @export -setGeneric("calculateOverlap", signature = c("x"), - function(x, assay.type = assay_name, assay_name = "counts", - detection = 0, ...) - standardGeneric("calculateOverlap")) - -#' @rdname calculateOverlap -#' @export -setMethod("calculateOverlap", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = assay_name, assay_name = "counts", - detection = 0, ...){ - ############################# INPUT CHECK ############################## - # Check assay.type - .check_assay_present(assay.type, x) - # Check detection - if (!.is_numeric_string(detection)) { - stop("'detection' must be a single numeric value or coercible to ", - "one.", - call. = FALSE) - } - detection <- as.numeric(detection) - ########################### INPUT CHECK END ############################ - # Get assay - assay <- assay(x, assay.type) - - # All the sample pairs - sample_pairs <- as.matrix(expand.grid(colnames(x), colnames(x))) +.get_overlap <- function(x, detection = 0, ...){ + ############################# INPUT CHECK ############################## + # Check detection + if (!.is_numeric_string(detection)) { + stop("'detection' must be a single numeric value or coercible to ", + "one.", + call. = FALSE) + } + detection <- as.numeric(detection) + ########################### INPUT CHECK END ############################ + x <- t(x) + # All the sample pairs + sample_pairs <- as.matrix(expand.grid(colnames(x), colnames(x))) - # Loop through all sample pairs - result <- apply(sample_pairs, 1, FUN = function(sample_pair){ - # Get samples - sample1 <- assay[ , sample_pair[1]] - sample2 <- assay[ , sample_pair[2]] - # Calculate overlap - temp_result <- .calculate_overlap(sample1, - sample2, - detection) + # Loop through all sample pairs + result <- apply(sample_pairs, 1, FUN = function(sample_pair){ + # Get samples + sample1 <- x[ , sample_pair[1]] + sample2 <- x[ , sample_pair[2]] + # Calculate overlap + temp_result <- .calculate_overlap(sample1, sample2, detection) }) - # Create a matrix from result vector and give name to rownames and colnames - result <- matrix(result, ncol = ncol(assay)) - colnames(result) <- colnames(assay) - rownames(result) <- colnames(assay) + # Create a matrix from result vector and give name to rownames and colnames + result <- matrix(result, ncol = ncol(x)) + colnames(result) <- colnames(x) + rownames(result) <- colnames(x) - # Convert into distances - result <- stats::as.dist(result) - return(result) - } -) + # Convert into distances + result <- stats::as.dist(result) + return(result) +} -#' @rdname calculateOverlap -#' @export -setGeneric("runOverlap", signature = c("x"), - function(x, ...) - standardGeneric("runOverlap")) -#' @rdname calculateOverlap -#' -#' @param name \code{Character scalar}. Specifies the name of overlap matrix that -#' is stored in reducedDim(x). (Default: \code{"overlap"}) -#' -#' @export -#' @importFrom SingleCellExperiment reducedDim<- -setMethod("runOverlap", signature = c(x = "SummarizedExperiment"), - function(x, name = "overlap", ...){ - # Check name - if(!.is_non_empty_string(name)){ - stop("'name' must be a non-empty single character value.", - call. = FALSE) - } - # Calculate overlap - mat <- calculateOverlap(x, ...) - # Convert it into matrix so that nrow equals number of samples - mat <- as.matrix(mat) - # Store it to reducedDim - reducedDim(x, name) <- mat - return(x) - } -) ################################ HELP FUNCTIONS ################################ .calculate_overlap <- function (x, y, detection) { diff --git a/R/calculateUnifrac.R b/R/calculateUnifrac.R index 898ab1f81..4bcb621e5 100644 --- a/R/calculateUnifrac.R +++ b/R/calculateUnifrac.R @@ -1,180 +1,14 @@ -#' Calculate weighted or unweighted (Fast) Unifrac distance -#' -#' This function calculates the Unifrac distance for all sample-pairs -#' in a \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -#' object. The function utilizes \code{\link[rbiom:unifrac]{rbiom:unifrac()}}. -#' -#' Please note that if \code{calculateUnifrac} is used as a \code{FUN} for -#' \code{runMDS}, the argument \code{ntop} has to be set to \code{nrow(x)}. -#' -#' @inheritParams calculateJSD -#' -#' @details -#' Please note that \code{runUnifrac} expects a matrix with samples per row -#' and not per column. This is implemented to be compatible with other -#' distance calculations such as \code{\link[stats:dist]{dist}} as much as -#' possible. -#' -#' @param tree if \code{x} is a matrix, a -#' \code{\link[TreeSummarizedExperiment:phylo]{phylo}} object matching the -#' matrix. This means that the phylo object and the columns should relate -#' to the same type of features (aka. microorganisms). -#' -#' @param node.label if \code{x} is a matrix, -#' a \code{character} vector specifying links between rows/columns and tips of \code{tree}. -#' The length must equal the number of rows/columns of \code{x}. Furthermore, all the -#' node labs must be present in \code{tree}. -#' -#' @param nodeLab Deprecated. Use \code{node.label} instead. -#' -#' @param tree.name \code{Character scalar}. Specifies the name of the -#' tree used in calculation. (Default: \code{"phylo"}) -#' -#' @param tree_name Deprecated. Use \code{tree.name} instead. -#' -#' @param weighted \code{Logical scalar}. Should use weighted-Unifrac -#' calculation? Weighted-Unifrac takes into account the relative abundance of -#' species/taxa shared between samples, whereas unweighted-Unifrac only -#' considers presence/absence. Default is \code{FALSE}, meaning the -#' unweighted-Unifrac distance is calculated for all pairs of samples. -#' (Default: \code{FALSE}) -#' -#' @param ... optional arguments not used. -#' -#' @return a sample-by-sample distance matrix, suitable for NMDS, etc. -#' -#' @references -#' \url{http://bmf.colorado.edu/unifrac/} -#' -#' See also additional descriptions of Unifrac in the following articles: -#' -#' Lozupone, Hamady and Knight, ``Unifrac - An Online Tool for Comparing -#' Microbial Community Diversity in a Phylogenetic Context.'', BMC -#' Bioinformatics 2006, 7:371 -#' -#' Lozupone, Hamady, Kelley and Knight, ``Quantitative and qualitative (beta) -#' diversity measures lead to different insights into factors that structure -#' microbial communities.'' Appl Environ Microbiol. 2007 -#' -#' Lozupone C, Knight R. ``Unifrac: a new phylogenetic method for comparing -#' microbial communities.'' Appl Environ Microbiol. 2005 71 (12):8228-35. -#' -#' @name calculateUnifrac -#' -#' @export -#' -#' @examples -#' data(esophagus) -#' library(scater) -#' calculateUnifrac(esophagus, weighted = FALSE) -#' calculateUnifrac(esophagus, weighted = TRUE) -#' # for using calculateUnifrac in conjunction with runMDS the tree argument -#' # has to be given separately. In addition, subsetting using ntop must -#' # be disabled -#' esophagus <- runMDS(esophagus, FUN = calculateUnifrac, name = "Unifrac", -#' tree = rowTree(esophagus), -#' assay.type = "counts", -#' ntop = nrow(esophagus)) -#' reducedDim(esophagus) -NULL - -#' @rdname calculateUnifrac -#' @export -setGeneric("calculateUnifrac", signature = c("x", "tree"), - function(x, tree, ... ) - standardGeneric("calculateUnifrac")) - -#' @rdname calculateUnifrac -#' @export -setMethod("calculateUnifrac", signature = c(x = "ANY", tree = "phylo"), - function(x, tree, weighted = FALSE, ...){ - if(is(x,"SummarizedExperiment")){ - stop("When providing a 'tree', please provide a matrix-like as 'x'", - " and not a 'SummarizedExperiment' object. Please consider ", - "combining both into a 'TreeSummarizedExperiment' object.", - call. = FALSE) - } - .calculate_distance(x, FUN = runUnifrac, tree = tree, - weighted = weighted, ...) - } -) - -#' @rdname calculateUnifrac -#' -#' @importFrom SummarizedExperiment assay -#' -#' @export -setMethod("calculateUnifrac", - signature = c(x = "TreeSummarizedExperiment", - tree = "missing"), - function(x, assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", - tree.name = tree_name, tree_name = "phylo", transposed = FALSE, ...){ - # Check transposed - if( !.is_a_bool(transposed) ){ - stop("'transposed' must be TRUE or FALSE.", call. = FALSE) - } - # Get functions and parameters based on direction - tree_present_FUN <- if (transposed) .check_colTree_present - else .check_rowTree_present - tree_FUN <- if (transposed) colTree else rowTree - links_FUN <- if (transposed) colLinks else rowLinks - margin_name <- if (transposed) "col" else "row" - # Check assay.type - .check_assay_present(assay.type, x) - # Check tree.name - tree_present_FUN(tree.name, x) - # - # Select only those features/samples that are in the tree - links <- links_FUN(x) - present_in_tree <- links[, "whichTree"] == tree.name - if( any(!present_in_tree) ){ - warning( - "Not all ", margin_name, "s were present in the ", margin_name, - "Tree specified by 'tree.name'. 'x' is subsetted.", - call. = FALSE) - # Subset the data - if( transposed ){ - x <- x[, present_in_tree] - } else{ - x <- x[present_in_tree, ] - } - } - # Get assay and transpose it if specified. Features must be in columns - # and samples in rows. - mat <- assay(x, assay.type) - if( !transposed ){ - mat <- t(mat) - } - # Get tree - tree <- tree_FUN(x, tree.name) - # Get links and take only nodeLabs - links <- links_FUN(x) - links <- links[ , "nodeLab" ] - # Calculate unifrac - res <- calculateUnifrac(mat, tree = tree, node.label = links, ...) - return(res) - } -) - -################################################################################ -#' @rdname calculateUnifrac -#' #' @importFrom ape drop.tip #' @importFrom rbiom unifrac -#' @export -runUnifrac <- function( +.get_unifrac <- function( x, tree, weighted = FALSE, node.label = nodeLab, nodeLab = NULL, ...){ + # Transpose the matrix so that the orientation is the same as in other + # dissimilatity methods + x <- t(x) # Check x if( !is.matrix(as.matrix(x)) ){ stop("'x' must be a matrix", call. = FALSE) } - # x has samples as row. Therefore transpose. This benchmarks faster than - # converting the function to work with the input matrix as is - x <- try(t(x), silent = TRUE) - if(is(x,"try-error")){ - stop("The input to 'runUnifrac' must be a matrix-like object: ", - as.character(x), call. = FALSE) - } # input check if(!.is_a_bool(weighted)){ stop("'weighted' must be TRUE or FALSE.", call. = FALSE) @@ -223,14 +57,7 @@ runUnifrac <- function( # Merge assay so that each row represent single tip. It might be that # multiple rows are linked to single tip. x <- .merge_assay_by_rows(x, node.label, ...) - # Remove those tips that are not present in the data - if( any(!tree$tip.label %in% rownames(x)) ){ - tree <- drop.tip( - tree, tree$tip.label[!tree$tip.label %in% rownames(x)]) - warning( - "The tree is pruned so that tips that cannot be found from ", - "the abundance matrix are removed.", call. = FALSE) - } + # Calculate unifrac. Use implementation from rbiom package res <- unifrac(x, tree = tree, weighted = weighted) return(res) @@ -253,20 +80,3 @@ runUnifrac <- function( x <- x[ node.label, ] return(x) } - -#' @importFrom ape is.rooted root -.norm_tree_to_be_rooted <- function(tree, names){ - if( !is.rooted(tree) ){ - randoroot <- sample(names, 1) - warning("Randomly assigning root as -- ", randoroot, " -- in the", - " phylogenetic tree in the data you provided.", call. = FALSE) - tree <- root(phy = tree, outgroup = randoroot, - resolve.root = TRUE, interactive = FALSE) - if( !is.rooted(tree) ){ - stop("Problem automatically rooting tree. Make sure your tree ", - "is rooted before attempting Unifrac calculation. See ", - "?ape::root", call. = FALSE) - } - } - tree -} diff --git a/R/decontam.R b/R/decontam.R index 327accfb1..ce48b532e 100644 --- a/R/decontam.R +++ b/R/decontam.R @@ -5,7 +5,8 @@ #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} #' objects. #' -#' @inheritParams calculateJSD +#' @inheritParams getDissimilarity +#' @inheritParams getDominant #' #' @param seqtab,x #' a \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} diff --git a/R/deprecate.R b/R/deprecate.R index d39d26331..ee51c26aa 100644 --- a/R/deprecate.R +++ b/R/deprecate.R @@ -4,6 +4,10 @@ #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} #' object. #' @param value a matrix to store as the \sQuote{relabundance} assay +#' +#' @param mat An abundance matrix +#' +#' @param tree A phylogenetic tree #' #' @param ... Additional parameters. See dedicated function. #' @@ -1179,3 +1183,98 @@ setReplaceMethod("relabundance", signature = c(x = "SummarizedExperiment"), x } ) + +#' @rdname deprecate +#' @export +setGeneric("runOverlap", signature = c("x"), + function(x, ...) + standardGeneric("runOverlap")) + +#' @rdname deprecate +#' @export +setMethod("runOverlap", signature = c(x = "SummarizedExperiment"), + function(x, ...){ + .Deprecated(msg = paste0("'runOverlap' is deprecated\n", + "Use 'addDissimilarity' with parameter ", + "method = 'overlap' instead.")) + addDissimilarity(x, method = "overlap", ...) + } +) + +#' @rdname deprecate +#' @export +setGeneric("calculateOverlap", signature = c("x"), + function(x, ...) + standardGeneric("calculateOverlap")) + +#' @rdname deprecate +#' @export +setMethod("calculateOverlap", signature = c(x = "SummarizedExperiment"), + function(x, ...){ + .Deprecated(msg = paste0("'calculateOverlap' is deprecated\n", + "Use 'getDissimilarity' with parameter ", + "method = 'overlap' instead.")) + getDissimilarity(x, method = "overlap", ...) + } +) + +#' @rdname deprecate +#' @export +setGeneric("calculateJSD", signature = c("x"), + function(x, ...) + standardGeneric("calculateJSD")) + +#' @rdname deprecate +#' @export +setMethod("calculateJSD", signature = c(x = "SummarizedExperiment"), + function(x, ...){ + .Deprecated(msg = paste0("'calculateJSD' is deprecated\n", + "Use 'getDissimilarity' with parameter ", + "method = 'jsd' instead.")) + getDissimilarity(x, method = "jsd", ...) + } + +) + +#' @rdname deprecate +#' @export +setGeneric("runJSD", signature = c("x"), + function(x, ...) + standardGeneric("runJSD")) + +#' @rdname deprecate +#' @export +setMethod("runJSD", signature = c(x = "SummarizedExperiment"), + function(x, ...){ + .Deprecated(msg = paste0("'runJSD' is deprecated\n", + "Use 'getDissimilarity' with parameter ", + "method = 'jsd' instead.")) + getDissimilarity(x, method = "jsd", ...) + } +) + +#' @rdname deprecate +#' @export +setGeneric("calculateUnifrac", signature = c("x"), + function(x, ...) + standardGeneric("calculateUnifrac")) + +#' @rdname deprecate +#' @export +setMethod("calculateUnifrac", signature = c(x = "TreeSummarizedExperiment"), + function(x, ...){ + .Deprecated(msg = paste0("'calculateUnifrac' is deprecated\n", + "Use 'getDissimilarity' with parameter ", + "method = 'unifrac' instead.")) + getDissimilarity(x, method = "unifrac", ...) + } +) + +#' @rdname deprecate +#' @export +runUnifrac <- function(mat, tree, ...){ + .Deprecated(msg = paste0("'runUnifrac' is deprecated\n", + "Use 'getDissimilarity' with parameter ", + "method = 'unifrac' instead.")) + getDissimilarity(t(mat), tree = tree, method = "unifrac", ...) +} \ No newline at end of file diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R index 6486283f1..f2d0af9a9 100644 --- a/R/estimateDiversity.R +++ b/R/estimateDiversity.R @@ -262,7 +262,6 @@ NULL # Check assay and check that vegan package is available since it is # required in some of the diversity calculations. .check_assay_present(assay.type, x) - .require_package("vegan") # # Calculate specified diversity indices dvrsts <- BiocParallel::bplapply( diff --git a/R/getDominant.R b/R/getDominant.R index c48ee74b5..2e3bda36c 100644 --- a/R/getDominant.R +++ b/R/getDominant.R @@ -7,7 +7,7 @@ #' @inheritParams getPrevalence #' #' @param name \code{Character scalar}. A name for the column of the -#' \code{colData} where results will be stored. (Default: \code{"dominant_taxa"}) +#' \code{colData} where results will be stored. (Default: \code{"dominant_taxa"}) #' #' @param other.name \code{Character scalar}. A name for features that are not #' included in n the most frequent dominant features in the data. (Default: \code{"Other"}) diff --git a/R/getPrevalence.R b/R/getPrevalence.R index 9fccc9a5d..93664ac8f 100644 --- a/R/getPrevalence.R +++ b/R/getPrevalence.R @@ -3,7 +3,11 @@ #' These functions calculate the population prevalence for taxonomic ranks in a #' \code{\link{SummarizedExperiment-class}} object. #' -#' @inheritParams calculateJSD +#' @inheritParams getDissimilarity +#' +#' @param x \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}. +#' +#' @param assay_name Deprecated. Use \code{assay.type} instead. #' #' @param detection \code{Numeric scalar}. Detection threshold for absence/presence. #' If \code{as_relative = FALSE}, diff --git a/R/meltAssay.R b/R/meltAssay.R index 0a2cd23e7..c656fbcad 100644 --- a/R/meltAssay.R +++ b/R/meltAssay.R @@ -10,9 +10,10 @@ #' \code{rowData} contains a column \dQuote{FeatureID}, they will be renamed to #' \dQuote{SampleID_col} and \dQuote{FeatureID_row}, if row names or column #' names are set. -#' -#' @inheritParams calculateJSD #' +#' @inheritParams getDominant +#' @inheritParams getDissimilarity +#' #' @param add.col \code{Logical scalar}. \code{NULL} ,or \code{character vector}. Used to #' select information from the \code{colData} to add to the molten assay data. #' If \code{add.col = NULL} no data will be added, if diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 92302dd38..61a229902 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -1,6 +1,7 @@ #' Merge SE objects into single SE object. #' #' @inheritParams rarefyAssay +#' @inheritParams getDominant #' #' @param y a \code{\link{SummarizedExperiment}} object when \code{x} is a #' \code{\link{SummarizedExperiment}} object. Disabled when \code{x} is a list. diff --git a/R/rarefyAssay.R b/R/rarefyAssay.R index 6f90c5164..c38373f8f 100644 --- a/R/rarefyAssay.R +++ b/R/rarefyAssay.R @@ -20,7 +20,8 @@ #' before implement this function. #' #' @inheritParams transformAssay -#' +#' @inheritParams getDominant +#' #' @param sample \code{Integer scalar}. Indicates the number of counts being #' simulated i.e. rarefying depth. This can equal to lowest number of total #' counts found in a sample or a user specified number. diff --git a/R/runCCA.R b/R/runCCA.R index 80b1697b3..81ac0c83d 100644 --- a/R/runCCA.R +++ b/R/runCCA.R @@ -3,7 +3,8 @@ #' These functions perform Canonical Correspondence Analysis on data stored #' in a \code{SummarizedExperiment}. #' -#' @inheritParams calculateJSD +#' @inheritParams getDominant +#' @inheritParams getDissimilarity #' #' @details #' For \code{run*} a @@ -42,6 +43,8 @@ #' 'wa' (site scores found as weighted averages (cca) or weighted sums (rda) of #' v with weights Xbar, but the multiplying effect of eigenvalues removed) or #' 'u' ((weighted) orthonormal site scores). (Default: \code{'wa'}) +#' +#' @param exprs_values Deprecated. Use \code{assay.type} instead. #' #' @param ... additional arguments passed to vegan::cca or vegan::dbrda and #' other internal functions. @@ -172,7 +175,6 @@ setGeneric("addRDA", signature = c("x"), #' @importFrom stats as.formula .calculate_cca <- function(x, formula, variables, scores, scale = TRUE, ...){ - .require_package("vegan") # input check if(!.is_a_bool(scale)){ stop("'scale' must be TRUE or FALSE.", call. = FALSE) @@ -320,7 +322,6 @@ runCCA <- function(x,...){ #' @importFrom vegan sppscores<- .calculate_rda <- function( x, formula, variables, scores, method = distance, distance = "euclidean", ...){ - .require_package("vegan") # # Transpose and ensure that the table is in matrix format x <- as.matrix(t(x)) diff --git a/R/runDPCoA.R b/R/runDPCoA.R index 41c05d06c..710624e7d 100644 --- a/R/runDPCoA.R +++ b/R/runDPCoA.R @@ -4,7 +4,8 @@ #' \code{ade4} package in typical fashion. Results are stored in the #' \code{reducedDims} and are available for all the expected functions. #' -#' @inheritParams calculateUnifrac +#' @inheritParams getDominant +#' @inheritParams getDissimilarity #' #' @details #' For \code{addDPCoA} a \linkS4class{TreeSummarizedExperiment} containing the @@ -35,6 +36,14 @@ #' #' @param altexp \code{Character scalar} or \code{integer scalar}. Specifies an #' alternative experiment containing the input data. (Default: \code{NULL}) +#' +#' @param exprs_values Deprecated. Use \code{assay.type} instead. +#' +#' @param tree.name \code{Character scalar}. Specifies the name of the +#' tree to be included in the phyloseq object that is created, +#' (Default: \code{"phylo"}) +#' +#' @param tree_name Deprecated. Use \code{tree.name} instead. #' #' @param ... Currently not used. #' diff --git a/R/runNMDS.R b/R/runNMDS.R index fc0e1db55..a84b432c1 100644 --- a/R/runNMDS.R +++ b/R/runNMDS.R @@ -3,8 +3,9 @@ #' Perform non-metric multi-dimensional scaling (nMDS) on samples, based on the #' data in a \code{SingleCellExperiment} object. #' +#' @inheritParams getDominant #' @inheritParams runDPCoA -#' +#' #' @details #' For \code{addNMDS} a \linkS4class{SingleCellExperiment} #' @@ -24,6 +25,9 @@ #' \code{\link[vegan:monoMDS]{vegan::monoMDS}} #' #' @param nmdsFUN Deprecated. Use \code{nmds.fun} instead. +#' +#' @param name \code{Character scalar}. A name for the column of the +#' \code{colData} where results will be stored. (Default: \code{"NMDS"}) #' #' @param ... additional arguments to pass to \code{FUN} and #' \code{nmds.fun}. @@ -35,6 +39,8 @@ #' dimred is specified. #' #' @param n_dimred Deprecated. Use \code{ndimred} instead. +#' +#' @param exprs_values Deprecated. Use \code{assay.type} instead. #' #' @return For \code{getNMDS}, a matrix is returned containing the MDS #' coordinates for each sample (row) and dimension (column). diff --git a/R/transformCounts.R b/R/transformCounts.R index a7f18e584..60cda2568 100644 --- a/R/transformCounts.R +++ b/R/transformCounts.R @@ -3,7 +3,8 @@ #' Variety of transformations for abundance data, stored in \code{assay}. #' See details for options. #' -#' @inheritParams calculateJSD +#' @inheritParams getDominant +#' @inheritParams getDissimilarity #' #' @param method \code{Character scalar}. Specifies the transformation #' method. diff --git a/R/utils.R b/R/utils.R index 5df70d40b..ec3f9a5ad 100644 --- a/R/utils.R +++ b/R/utils.R @@ -385,6 +385,10 @@ "Therefore, it is converted to TreeSummarizedExperiment.", call. = FALSE) } + if( !identical(rownames(as.matrix(values)), colnames(x)) ){ + stop("Rownames of the matrix should match with colnames(x).", + " The result is not added to reducedDims.") + } # Throw warning if values of reducedDim are overwritten if( name %in% names(reducedDims(x)) ){ warning( diff --git a/man/agglomerate-methods.Rd b/man/agglomerate-methods.Rd index 180f7388a..6fa459ab0 100644 --- a/man/agglomerate-methods.Rd +++ b/man/agglomerate-methods.Rd @@ -105,9 +105,7 @@ unsplitByRanks(x, ...) ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{arguments passed to \code{agglomerateByRank} function for \code{SummarizedExperiment} objects and other functions. diff --git a/man/agglomerateByPrevalence.Rd b/man/agglomerateByPrevalence.Rd index 65b801b61..ec3247212 100644 --- a/man/agglomerateByPrevalence.Rd +++ b/man/agglomerateByPrevalence.Rd @@ -26,9 +26,7 @@ agglomerateByPrevalence(x, ...) ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{arguments passed to \code{agglomerateByRank} function for \code{SummarizedExperiment} objects and other functions. diff --git a/man/calculateJSD.Rd b/man/calculateJSD.Rd deleted file mode 100644 index caa62d63d..000000000 --- a/man/calculateJSD.Rd +++ /dev/null @@ -1,84 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculateJSD.R -\name{calculateJSD} -\alias{calculateJSD} -\alias{calculateJSD,ANY-method} -\alias{calculateJSD,SummarizedExperiment-method} -\alias{runJSD} -\title{Calculate the Jensen-Shannon Divergence} -\usage{ -\S4method{calculateJSD}{ANY}(x, ...) - -\S4method{calculateJSD}{SummarizedExperiment}( - x, - assay.type = assay_name, - assay_name = exprs_values, - exprs_values = "counts", - transposed = FALSE, - ... -) - -runJSD(x, BPPARAM = SerialParam(), chunkSize = nrow(x)) -} -\arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} - -\item{...}{optional arguments not used.} - -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} - -\item{assay_name}{Deprecated. Use \code{assay.type} instead.} - -\item{exprs_values}{Deprecated. Use \code{assay.type} instead.} - -\item{transposed}{\code{Logical scalar}. Is x transposed with samples in rows? -(Default: \code{FALSE})} - -\item{BPPARAM}{A -\code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}} -object specifying whether the calculation should be parallelized.} - -\item{chunkSize}{\code{Integer scalar}. Defines the size of data send -to the individual worker. Only has an effect, if \code{BPPARAM} defines -more than one worker. (Default: \code{nrow(x)})} -} -\value{ -a sample-by-sample distance matrix, suitable for NMDS, etc. -} -\description{ -This function calculates the Jensen-Shannon Divergence (JSD) in a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object. -} -\examples{ -data(enterotype) -library(scater) - - -jsd <- calculateJSD(enterotype) -class(jsd) -head(jsd) - -enterotype <- runMDS(enterotype, FUN = calculateJSD, name = "JSD", - exprs_values = "counts") -head(reducedDim(enterotype)) -head(attr(reducedDim(enterotype),"eig")) -attr(reducedDim(enterotype),"GOF") -} -\references{ -Jensen-Shannon Divergence and Hilbert space embedding. -Bent Fuglede and Flemming Topsoe University of Copenhagen, -Department of Mathematics -\url{http://www.math.ku.dk/~topsoe/ISIT2004JSD.pdf} -} -\seealso{ -\url{http://en.wikipedia.org/wiki/Jensen-Shannon_divergence} -} -\author{ -Susan Holmes \email{susan@stat.stanford.edu}. -Adapted for phyloseq by Paul J. McMurdie. -Adapted for mia by Felix G.M. Ernst -} diff --git a/man/calculateOverlap.Rd b/man/calculateOverlap.Rd deleted file mode 100644 index 9b20e91f4..000000000 --- a/man/calculateOverlap.Rd +++ /dev/null @@ -1,87 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculateOverlap.R -\name{calculateOverlap} -\alias{calculateOverlap} -\alias{calculateOverlap,SummarizedExperiment-method} -\alias{runOverlap} -\alias{runOverlap,SummarizedExperiment-method} -\title{Estimate overlap} -\usage{ -calculateOverlap( - x, - assay.type = assay_name, - assay_name = "counts", - detection = 0, - ... -) - -\S4method{calculateOverlap}{SummarizedExperiment}( - x, - assay.type = assay_name, - assay_name = "counts", - detection = 0, - ... -) - -runOverlap(x, ...) - -\S4method{runOverlap}{SummarizedExperiment}(x, name = "overlap", ...) -} -\arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} - -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} - -\item{assay_name}{Deprecated. Use \code{assay.type} instead.} - -\item{detection}{\code{Numeric scalar}. Defines detection threshold for -absence/presence of features. Feature that has abundance under threshold in -either of samples, will be discarded when evaluating overlap between samples. -(Default: \code{0})} - -\item{...}{Optional arguments not used.} - -\item{name}{\code{Character scalar}. Specifies the name of overlap matrix that -is stored in reducedDim(x). (Default: \code{"overlap"})} -} -\value{ -calculateOverlap returns sample-by-sample distance matrix. -runOverlap returns \code{x} that includes overlap matrix in its -reducedDim. -} -\description{ -This function calculates overlap for all sample-pairs -in a \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object. -} -\details{ -This function calculates overlap between all the sample-pairs. Overlap -reflects similarity between sample-pairs. - -When overlap is calculated using relative abundances, the higher the value the -higher the similarity is, When using relative abundances, overlap value 1 means that -all the abundances of features are equal between two samples, and 0 means that -samples have completely different relative abundances. -} -\examples{ -data(esophagus) -tse <- esophagus -tse <- transformAssay(tse, method = "relabundance") -overlap <- calculateOverlap(tse, assay_name = "relabundance") -overlap - -# Store result to reducedDim -tse <- runOverlap(tse, assay.type = "relabundance", name = "overlap_between_samples") -head(reducedDims(tse)$overlap_between_samples) - -} -\seealso{ -\code{\link[mia:calculateJSD]{calculateJSD}} -\code{\link[mia:calculateUnifrac]{calculateUnifrac}} -} -\author{ -Leo Lahti and Tuomas Borman. Contact: \url{microbiome.github.io} -} diff --git a/man/calculateUnifrac.Rd b/man/calculateUnifrac.Rd deleted file mode 100644 index 6b3022bfa..000000000 --- a/man/calculateUnifrac.Rd +++ /dev/null @@ -1,121 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculateUnifrac.R -\name{calculateUnifrac} -\alias{calculateUnifrac} -\alias{calculateUnifrac,ANY,phylo-method} -\alias{calculateUnifrac,TreeSummarizedExperiment,missing-method} -\alias{runUnifrac} -\title{Calculate weighted or unweighted (Fast) Unifrac distance} -\usage{ -calculateUnifrac(x, tree, ...) - -\S4method{calculateUnifrac}{ANY,phylo}(x, tree, weighted = FALSE, ...) - -\S4method{calculateUnifrac}{TreeSummarizedExperiment,missing}( - x, - assay.type = assay_name, - assay_name = exprs_values, - exprs_values = "counts", - tree.name = tree_name, - tree_name = "phylo", - transposed = FALSE, - ... -) - -runUnifrac( - x, - tree, - weighted = FALSE, - node.label = nodeLab, - nodeLab = NULL, - ... -) -} -\arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} - -\item{tree}{if \code{x} is a matrix, a -\code{\link[TreeSummarizedExperiment:phylo]{phylo}} object matching the -matrix. This means that the phylo object and the columns should relate -to the same type of features (aka. microorganisms).} - -\item{...}{optional arguments not used.} - -\item{weighted}{\code{Logical scalar}. Should use weighted-Unifrac -calculation? Weighted-Unifrac takes into account the relative abundance of -species/taxa shared between samples, whereas unweighted-Unifrac only -considers presence/absence. Default is \code{FALSE}, meaning the -unweighted-Unifrac distance is calculated for all pairs of samples. -(Default: \code{FALSE})} - -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} - -\item{assay_name}{Deprecated. Use \code{assay.type} instead.} - -\item{exprs_values}{Deprecated. Use \code{assay.type} instead.} - -\item{tree.name}{\code{Character scalar}. Specifies the name of the -tree used in calculation. (Default: \code{"phylo"})} - -\item{tree_name}{Deprecated. Use \code{tree.name} instead.} - -\item{transposed}{\code{Logical scalar}. Is x transposed with samples in rows? -(Default: \code{FALSE})} - -\item{node.label}{if \code{x} is a matrix, -a \code{character} vector specifying links between rows/columns and tips of \code{tree}. -The length must equal the number of rows/columns of \code{x}. Furthermore, all the -node labs must be present in \code{tree}.} - -\item{nodeLab}{Deprecated. Use \code{node.label} instead.} -} -\value{ -a sample-by-sample distance matrix, suitable for NMDS, etc. -} -\description{ -This function calculates the Unifrac distance for all sample-pairs -in a \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -object. The function utilizes \code{\link[rbiom:unifrac]{rbiom:unifrac()}}. -} -\details{ -Please note that if \code{calculateUnifrac} is used as a \code{FUN} for -\code{runMDS}, the argument \code{ntop} has to be set to \code{nrow(x)}. - -Please note that \code{runUnifrac} expects a matrix with samples per row -and not per column. This is implemented to be compatible with other -distance calculations such as \code{\link[stats:dist]{dist}} as much as -possible. -} -\examples{ -data(esophagus) -library(scater) -calculateUnifrac(esophagus, weighted = FALSE) -calculateUnifrac(esophagus, weighted = TRUE) -# for using calculateUnifrac in conjunction with runMDS the tree argument -# has to be given separately. In addition, subsetting using ntop must -# be disabled -esophagus <- runMDS(esophagus, FUN = calculateUnifrac, name = "Unifrac", - tree = rowTree(esophagus), - assay.type = "counts", - ntop = nrow(esophagus)) -reducedDim(esophagus) -} -\references{ -\url{http://bmf.colorado.edu/unifrac/} - -See also additional descriptions of Unifrac in the following articles: - -Lozupone, Hamady and Knight, ``Unifrac - An Online Tool for Comparing -Microbial Community Diversity in a Phylogenetic Context.'', BMC -Bioinformatics 2006, 7:371 - -Lozupone, Hamady, Kelley and Knight, ``Quantitative and qualitative (beta) -diversity measures lead to different insights into factors that structure -microbial communities.'' Appl Environ Microbiol. 2007 - -Lozupone C, Knight R. ``Unifrac: a new phylogenetic method for comparing -microbial communities.'' Appl Environ Microbiol. 2005 71 (12):8228-35. -} diff --git a/man/deprecate.Rd b/man/deprecate.Rd index d4df00e0e..6bcb5b9ae 100644 --- a/man/deprecate.Rd +++ b/man/deprecate.Rd @@ -137,6 +137,17 @@ \alias{relabundance<-} \alias{relabundance,SummarizedExperiment-method} \alias{relabundance<-,SummarizedExperiment-method} +\alias{runOverlap} +\alias{runOverlap,SummarizedExperiment-method} +\alias{calculateOverlap} +\alias{calculateOverlap,SummarizedExperiment-method} +\alias{calculateJSD} +\alias{calculateJSD,SummarizedExperiment-method} +\alias{runJSD} +\alias{runJSD,SummarizedExperiment-method} +\alias{calculateUnifrac} +\alias{calculateUnifrac,TreeSummarizedExperiment-method} +\alias{runUnifrac} \title{These functions will be deprecated. Please use other functions instead.} \usage{ cluster(x, ...) @@ -408,6 +419,28 @@ relabundance(x) <- value \S4method{relabundance}{SummarizedExperiment}(x) \S4method{relabundance}{SummarizedExperiment}(x) <- value + +runOverlap(x, ...) + +\S4method{runOverlap}{SummarizedExperiment}(x, ...) + +calculateOverlap(x, ...) + +\S4method{calculateOverlap}{SummarizedExperiment}(x, ...) + +calculateJSD(x, ...) + +\S4method{calculateJSD}{SummarizedExperiment}(x, ...) + +runJSD(x, ...) + +\S4method{runJSD}{SummarizedExperiment}(x, ...) + +calculateUnifrac(x, ...) + +\S4method{calculateUnifrac}{TreeSummarizedExperiment}(x, ...) + +runUnifrac(mat, tree, ...) } \arguments{ \item{x}{A @@ -417,6 +450,10 @@ object.} \item{...}{Additional parameters. See dedicated function.} \item{value}{a matrix to store as the \sQuote{relabundance} assay} + +\item{mat}{An abundance matrix} + +\item{tree}{A phylogenetic tree} } \description{ These functions will be deprecated. Please use other functions instead. diff --git a/man/getDissimilarity.Rd b/man/getDissimilarity.Rd new file mode 100644 index 000000000..4bc144d4b --- /dev/null +++ b/man/getDissimilarity.Rd @@ -0,0 +1,196 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/addDissimilarity.R +\name{getDissimilarity} +\alias{getDissimilarity} +\alias{addDissimilarity} +\alias{addDissimilarity,SummarizedExperiment-method} +\alias{getDissimilarity,SummarizedExperiment-method} +\alias{getDissimilarity,TreeSummarizedExperiment-method} +\alias{getDissimilarity,ANY-method} +\title{Calculate dissimilarities} +\usage{ +addDissimilarity(x, method, ...) + +\S4method{addDissimilarity}{SummarizedExperiment}(x, method, name = method, ...) + +getDissimilarity(x, method, ...) + +\S4method{getDissimilarity}{SummarizedExperiment}( + x, + method, + assay.type = "counts", + niter = NULL, + transposed = FALSE, + tree = NULL, + ... +) + +\S4method{getDissimilarity}{TreeSummarizedExperiment}( + x, + method, + assay.type = "counts", + tree.name = "phylo", + niter = NULL, + transposed = FALSE, + tree = NULL, + ... +) + +\S4method{getDissimilarity}{ANY}(x, method, niter = NULL, tree = NULL, ...) +} +\arguments{ +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +or \code{matrix}.} + +\item{method}{\code{Character scalar}. Specifies which dissimilarity to +calculate.} + +\item{...}{other arguments passed onto \code{\link[vegan:avgdist]{avgdist}}, +\code{\link[vegan:vegdist]{vegdist}}, or onto mia internal functions: + +\itemize{ +\item \code{sample}: The sampling depth in rarefaction. +(Default: \code{min(rowSums2(x))}) + +\item \code{dis.fun}: \code{Character scalar}. Specifies the dissimilarity +function to be used. + +\item \code{weighted}: (Unifrac) \code{Logical scalar}. Should use +weighted-Unifrac calculation? +Weighted-Unifrac takes into account the relative abundance of +species/taxa shared between samples, whereas unweighted-Unifrac only +considers presence/absence. Default is \code{FALSE}, meaning the +unweighted-Unifrac dissimilarity is calculated for all pairs of samples. +(Default: \code{FALSE}) + +\item \code{node.label} (Unifrac) \code{character vector}. Used only if +\code{x} is a matrix. Specifies links between rows/columns and tips of +\code{tree}. The length must equal the number of rows/columns of \code{x}. +Furthermore, all the node labs must be present in \code{tree}. + +\item \code{chunkSize}: (JSD) \code{Integer scalar}. Defines the size of +data send to the individual worker. Only has an effect, if \code{BPPARAM} +defines more than one worker. (Default: \code{nrow(x)}) + +\item \code{BPPARAM}: (JSD) +\code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}. +Specifies whether the calculation should be parallelized. + +\item \code{detection}: (Overlap) \code{Numeric scalar}. +Defines detection threshold for absence/presence of features. Feature that +has abundance under threshold in either of samples, will be discarded when +evaluating overlap between samples. (Default: \code{0}) +}} + +\item{name}{\code{Character scalar}. The name to be used to store the result +in metadata of the output. (Default: \code{method})} + +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} + +\item{niter}{The number of iterations performed. If \code{NULL}, +rarefaction is disabled. (Default: \code{NULL})} + +\item{transposed}{\code{Logical scalar}. Specifies if x is transposed with +cells in rows. (Default: \code{FALSE})} + +\item{tree}{(Unifrac) \code{phylo}. A phylogenetic tree used in calculation. +(Default: \code{NULL})} + +\item{tree.name}{(Unifrac) \code{Character scalar}. Specifies the name of the +tree from \code{rowTree(x)} that is used in calculation. Disabled when +\code{tree} is specified. (Default: \code{"phylo"})} +} +\value{ +\code{getDissimilarity} returns a sample-by-sample dissimilarity matrix. + +\code{addDissimilarity} returns \code{x} that includes dissimilarity matrix +in its metadata. +} +\description{ +These functions are designed to calculate dissimilarities on data stored +within a +\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +object. For overlap, Unifrac, and Jensen-Shannon Divergence (JSD) +dissimilarities, the functions use mia internal functions, while for other +types of dissimilarities, they rely on \code{\link[vegan:vegdist]{vegdist}} +by default. +} +\details{ +Overlap reflects similarity between sample-pairs. When overlap is +calculated using relative abundances, the higher the value the higher the +similarity is. When using relative abundances, overlap value 1 means that +all the abundances of features are equal between two samples, and 0 means +that samples have completely different relative abundances. + +Unifrac is calculated with \code{\link[rbiom:unifrac]{rbiom:unifrac()}}. + +If rarefaction is enabled, \code{\link[vegan:avgdist]{vegan:avgdist()}} is +utilized. +} +\examples{ +library(mia) +library(scater) + +# load dataset +data(GlobalPatterns) +tse <- GlobalPatterns + +### Overlap dissimilarity + +tse <- addDissimilarity(tse, method = "overlap", detection = 0.25) +metadata(tse)[["overlap"]][1:6, 1:6] + +### JSD dissimilarity + +tse <- addDissimilarity(tse, method = "jsd") +metadata(tse)[["jsd"]][1:6, 1:6] + +# Multi Dimensional Scaling applied to JSD dissimilarity matrix +tse <- runMDS(tse, FUN = getDissimilarity, method = "overlap", + assay.type = "counts") +metadata(tse)[["MDS"]][1:6, ] + +### Unifrac dissimilarity + +res <- getDissimilarity(tse, method = "unifrac", weighted = FALSE) +dim(as.matrix((res))) + +tse <- addDissimilarity(tse, method = "unifrac", weighted = TRUE) +metadata(tse)[["unifrac"]][1:6, 1:6] + +} +\references{ +For unifrac dissimilarity: \url{http://bmf.colorado.edu/unifrac/} + +See also additional descriptions of Unifrac in the following articles: + +Lozupone, Hamady and Knight, ``Unifrac - An Online Tool for Comparing +Microbial Community Diversity in a Phylogenetic Context.'', BMC +Bioinformatics 2006, 7:371 + +Lozupone, Hamady, Kelley and Knight, ``Quantitative and qualitative (beta) +diversity measures lead to different insights into factors that structure +microbial communities.'' Appl Environ Microbiol. 2007 + +Lozupone C, Knight R. ``Unifrac: a new phylogenetic method for comparing +microbial communities.'' Appl Environ Microbiol. 2005 71 (12):8228-35. + +For JSD dissimilarity: +Jensen-Shannon Divergence and Hilbert space embedding. +Bent Fuglede and Flemming Topsoe University of Copenhagen, +Department of Mathematics +\url{http://www.math.ku.dk/~topsoe/ISIT2004JSD.pdf} +} +\seealso{ +\url{http://en.wikipedia.org/wiki/Jensen-Shannon_divergence} +} +\author{ +For overlap implementation: +Leo Lahti and Tuomas Borman. Contact: \url{microbiome.github.io} + +For JSD implementation: +Susan Holmes \email{susan@stat.stanford.edu}. +Adapted for phyloseq by Paul J. McMurdie. +Adapted for mia by Felix G.M. Ernst +} diff --git a/man/getDominant.Rd b/man/getDominant.Rd index c16c1beba..1f999098e 100644 --- a/man/getDominant.Rd +++ b/man/getDominant.Rd @@ -41,12 +41,10 @@ addDominant(x, name = "dominant_taxa", other.name = "Other", n = NULL, ...) ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/getPrevalence.Rd b/man/getPrevalence.Rd index f8c742a15..9bdf3c6fc 100644 --- a/man/getPrevalence.Rd +++ b/man/getPrevalence.Rd @@ -102,9 +102,7 @@ getPrevalentAbundance( \S4method{getPrevalentAbundance}{SummarizedExperiment}(x, assay.type = assay_name, assay_name = "counts", ...) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{additional arguments \itemize{ @@ -139,8 +137,8 @@ detection and prevalence cutoffs be included? (Default: \code{FALSE})} \item{na.rm}{\code{Logical scalar}. Should NA values be omitted when calculating prevalence? (Default: \code{TRUE})} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/hierarchy-tree.Rd b/man/hierarchy-tree.Rd index d7df639d2..68f873314 100644 --- a/man/hierarchy-tree.Rd +++ b/man/hierarchy-tree.Rd @@ -17,9 +17,7 @@ addHierarchyTree(x, ...) \S4method{addHierarchyTree}{SummarizedExperiment}(x, ...) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{optional arguments not used currently.} } diff --git a/man/isContaminant.Rd b/man/isContaminant.Rd index 7e3cecbb3..14a1825e9 100644 --- a/man/isContaminant.Rd +++ b/man/isContaminant.Rd @@ -47,8 +47,8 @@ addNotContaminantQC(x, name = "isNotContaminant", ...) \arguments{ \item{seqtab, x}{a \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/meltSE.Rd b/man/meltSE.Rd index 99a9263e2..162746b35 100644 --- a/man/meltSE.Rd +++ b/man/meltSE.Rd @@ -37,12 +37,10 @@ meltSE( ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/mergeSEs.Rd b/man/mergeSEs.Rd index 049755487..55f669b8f 100644 --- a/man/mergeSEs.Rd +++ b/man/mergeSEs.Rd @@ -29,14 +29,12 @@ mergeSEs(x, ...) \S4method{mergeSEs}{list}(x, ...) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{optional arguments (not used).} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/rarefyAssay.Rd b/man/rarefyAssay.Rd index 6b339144d..e666a14c0 100644 --- a/man/rarefyAssay.Rd +++ b/man/rarefyAssay.Rd @@ -30,12 +30,10 @@ rarefyAssay( ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/runCCA.Rd b/man/runCCA.Rd index e80403ff9..b9b5649a2 100644 --- a/man/runCCA.Rd +++ b/man/runCCA.Rd @@ -66,9 +66,7 @@ calculateRDA(x, ...) runRDA(x, ...) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{additional arguments passed to vegan::cca or vegan::dbrda and other internal functions. @@ -112,8 +110,8 @@ into a CA analysis and dbRDA into PCoA/MDS.} multivariate homogeneity of group dispersions be performed. (Default: \code{TRUE})} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/man/runDPCoA.Rd b/man/runDPCoA.Rd index d59f42642..e74e92259 100644 --- a/man/runDPCoA.Rd +++ b/man/runDPCoA.Rd @@ -40,9 +40,7 @@ addDPCoA(x, ..., altexp = NULL, name = "DPCoA") runDPCoA(x, ...) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{y}{a \code{dist} or a symmetric \code{matrix} compatible with \code{ade4:dpcoa}} @@ -65,18 +63,19 @@ integer vector of row indices or a logical vector. (Default: \code{NULL})} \item{scale}{\code{Logical scalar}. Should the expression values be standardized? (Default: \code{FALSE})} -\item{transposed}{\code{Logical scalar}. Is x transposed with samples in rows? -(Default: \code{FALSE})} +\item{transposed}{\code{Logical scalar}. Specifies if x is transposed with +cells in rows. (Default: \code{FALSE})} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} \item{exprs_values}{Deprecated. Use \code{assay.type} instead.} \item{tree.name}{\code{Character scalar}. Specifies the name of the -tree used in calculation. (Default: \code{"phylo"})} +tree to be included in the phyloseq object that is created, +(Default: \code{"phylo"})} \item{tree_name}{Deprecated. Use \code{tree.name} instead.} diff --git a/man/runNMDS.Rd b/man/runNMDS.Rd index f0791a676..20ac3b739 100644 --- a/man/runNMDS.Rd +++ b/man/runNMDS.Rd @@ -56,9 +56,7 @@ addNMDS(x, ..., altexp = NULL, name = "NMDS") runNMDS(x, ...) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{additional arguments to pass to \code{FUN} and \code{nmds.fun}.} @@ -89,8 +87,8 @@ integer vector of row indices or a logical vector. (Default: \code{NULL})} \item{scale}{\code{Logical scalar}. Should the expression values be standardized? (Default: \code{FALSE})} -\item{transposed}{\code{Logical scalar}. Is x transposed with samples in rows? -(Default: \code{FALSE})} +\item{transposed}{\code{Logical scalar}. Specifies if x is transposed with +cells in rows. (Default: \code{FALSE})} \item{keep.dist}{\code{Logical scalar}. Indicates whether the \code{dist} object calculated by \code{FUN} should be stored as \sQuote{dist} attribute of @@ -99,8 +97,8 @@ the matrix returned/stored by \code{getNMDS}/ \code{addNMDS}. (Default: \item{keep_dist}{Deprecated. Use \code{keep.dist} instead.} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} @@ -118,7 +116,7 @@ dimred is specified.} alternative experiment containing the input data. (Default: \code{NULL})} \item{name}{\code{Character scalar}. A name for the column of the -\code{colData} where results will be stored. (Default: \code{"DPCoA"})} +\code{colData} where results will be stored. (Default: \code{"NMDS"})} } \value{ For \code{getNMDS}, a matrix is returned containing the MDS diff --git a/man/splitOn.Rd b/man/splitOn.Rd index 0a54c5a6f..f03e5f9e7 100644 --- a/man/splitOn.Rd +++ b/man/splitOn.Rd @@ -35,9 +35,7 @@ unsplitOn(x, ...) ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{...}{Arguments passed to \code{agglomerateByVariable} function for \code{SummarizedExperiment} objects and other functions. diff --git a/man/summaries.Rd b/man/summaries.Rd index 9239b54a5..09fbf1d9c 100644 --- a/man/summaries.Rd +++ b/man/summaries.Rd @@ -42,9 +42,7 @@ summarizeDominance(x, group = NULL, name = "dominant_taxa", ...) \S4method{summary}{SummarizedExperiment}(object, assay.type = assay_name, assay_name = "counts") } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{top}{\code{Numeric scalar}. Determines how many top taxa to return. Default is to return top five taxa. (Default: \code{5})} diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd index 700c9719e..f125981fa 100644 --- a/man/taxonomy-methods.Rd +++ b/man/taxonomy-methods.Rd @@ -69,9 +69,7 @@ mapTaxonomy(x, ...) IdTaxaToDataFrame(from) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} \item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a value of \code{taxonomyRanks()} function.} diff --git a/man/transformAssay.Rd b/man/transformAssay.Rd index c27650683..a72baaf1d 100644 --- a/man/transformAssay.Rd +++ b/man/transformAssay.Rd @@ -32,12 +32,10 @@ transformAssay( ) } \arguments{ -\item{x}{a numeric matrix with samples as rows or a -\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -object.} +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} -\item{assay.type}{\code{Character scalar}. Specifies the name of the -assay used in calculation. (Default: \code{"counts"})} +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 0a79e726f..9cdb3d10c 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -59,9 +59,7 @@ reference: - addAlpha - subtitle: Dissimilarity - contents: - - calculateUnifrac - - calculateJSD - - calculateOverlap + - getDissimilarity - addDivergence - subtitle: Ordination - contents: diff --git a/tests/testthat/test-5Unifrac.R b/tests/testthat/test-5Unifrac.R index fdecba388..212dd834e 100644 --- a/tests/testthat/test-5Unifrac.R +++ b/tests/testthat/test-5Unifrac.R @@ -5,45 +5,47 @@ test_that("Unifrac beta diversity", { tse <- transformAssay(tse, assay.type="counts", method="relabundance") expect_error( - calculateUnifrac(tse, assay.type = "test", tree.name = "phylo", - weighted = FALSE) + getDissimilarity(tse, method = "unifrac",assay.type = "test", + tree.name = "phylo", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = 2, tree.name = "phylo", - weighted = FALSE) + getDissimilarity(tse, method = "unifrac", assay.type = 2, + tree.name = "phylo", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = TRUE, tree.name = "phylo", - weighted = FALSE) + getDissimilarity(tse, method = "unifrac", assay.type = TRUE, + tree.name = "phylo", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree.name = "test", - weighted = FALSE) + getDissimilarity(tse, method = "unifrac", assay.type = "counts", + tree.name = "test", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree.name = 1, - weighted = FALSE) + getDissimilarity(tse, method = "unifrac", assay.type = "counts", + tree.name = 1, weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree.name = TRUE, - weighted = "FALSE") + getDissimilarity(tse, method = "unifrac", assay.type = "counts", + tree.name = TRUE, weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree.name = "phylo", - weighted = 1) + getDissimilarity(tse, method = "unifrac", assay.type = "counts", + tree.name = "phylo", weighted = 1) ) data(GlobalPatterns, package="mia") tse <- GlobalPatterns # Calculate unweighted unifrac - unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = FALSE)) + unifrac_mia <- as.matrix(getDissimilarity(tse, method = "unifrac", + weighted = FALSE)) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse), weighted = FALSE, rowTree(tse))) expect_equal(unifrac_mia, unifrac_rbiom) # Calculate weighted unifrac. Allow tolerance since weighted unifrac # calculation in rbiom has some stochasticity. That is most likely due # multithreading and complex structure of tree (loops). - unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = TRUE)) + unifrac_mia <- as.matrix(getDissimilarity(tse, method = "unifrac", + weighted = TRUE)) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse), weighted = TRUE, rowTree(tse))) expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 1e-3) @@ -60,7 +62,8 @@ test_that("Unifrac beta diversity", { tse_ref <- tse_ref[ rowLinks(tse_ref)[["whichTree"]] == "tree2", ] # Calculate unweighted unifrac expect_warning( - unifrac_mia <- calculateUnifrac(tse, weighted = FALSE, tree.name = "tree2") + unifrac_mia <- getDissimilarity(tse, method = "unifrac", + weighted = FALSE, tree.name = "tree2") ) unifrac_mia <- as.matrix(unifrac_mia) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = FALSE, @@ -68,28 +71,31 @@ test_that("Unifrac beta diversity", { expect_equal(unifrac_mia, unifrac_rbiom) # Calculate weighted unifrac expect_warning( - unifrac_mia <- calculateUnifrac(tse, weighted = TRUE, tree.name = "tree2") + unifrac_mia <- getDissimilarity(tse, method = "unifrac", + weighted = TRUE, tree.name = "tree2") ) unifrac_mia <- as.matrix(unifrac_mia) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = TRUE, rowTree(tse_ref))) expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 1e-3) - # Test the function with agglomerated data. runUnifrac renames rownames - # based on tips and links to them. Then it also prunes the tree so that - # rows are in tips. + # Test the function with agglomerated data. .get_unifrac renames + # rownames based on tips and links to them. Then it also prunes the tree so + # that rows are in tips. tse <- GlobalPatterns tse <- agglomerateByRank(tse, rank = "Species") tse_ref <- tse rownames(tse_ref) <- rowLinks(tse_ref)[["nodeLab"]] rowTree(tse_ref) <- .prune_tree(rowTree(tse_ref), rowLinks(tse_ref)[["nodeLab"]]) # Calculate unweighted unifrac - unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = FALSE)) + unifrac_mia <- as.matrix(getDissimilarity(tse, method = "unifrac", + weighted = FALSE)) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = FALSE, rowTree(tse_ref))) # Calculate weighted unifrac. No tolerance needed since the tree has # simpler structure after pruning. - unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = TRUE)) + unifrac_mia <- as.matrix(getDissimilarity(tse, method = "unifrac", + weighted = TRUE)) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = TRUE, rowTree(tse_ref))) expect_equal(unifrac_mia, unifrac_rbiom) diff --git a/tests/testthat/test-5calculateOverlap.R b/tests/testthat/test-5calculateOverlap.R index be580f640..9e0c360c9 100644 --- a/tests/testthat/test-5calculateOverlap.R +++ b/tests/testthat/test-5calculateOverlap.R @@ -1,19 +1,20 @@ -context("calculateOverlap") +context("Overlap") -test_that("calculateOverlap", { +test_that("Overlap", { # Get data data(esophagus, package="mia") tse <- esophagus # Test input - expect_error(calculateOverlap(tse, assay.type = "relabundance", detection = 0.15)) - expect_error(calculateOverlap(tse, detection = "TEST")) - expect_error(calculateOverlap(tse, detection = TRUE)) + expect_error( + getDissimilarity(tse, method = "overlap", assay.type = "relabundance", detection = 0.15)) + expect_error(getDissimilarity(tse, method = "overlap", detection = "TEST")) + expect_error(getDissimilarity(tse, method = "overlap", detection = TRUE)) # Calculate overlap tse <- transformAssay(tse, method = "relabundance") - result <- calculateOverlap(tse, assay.type = "relabundance", detection = 0.15) + result <- getDissimilarity(tse, method = "overlap", assay.type = "relabundance", detection = 0.15) # Test output expect_true(class(result) == "dist") @@ -25,7 +26,7 @@ test_that("calculateOverlap", { 0.3634972, 0.4111838, 0.3634972, 0.0000000), nrow=3) expect_equal(round(unname(result), 7), round(reference), 7) # Test with different detection threshold - result <- calculateOverlap(tse, assay.type = "relabundance", detection = 0) + result <- getDissimilarity(tse, method = "overlap", assay.type = "relabundance", detection = 0) result <- as.matrix(result) # Reference @@ -33,12 +34,10 @@ test_that("calculateOverlap", { 0.8390008, 0.9038734, 0.8390008, 0.00000000), nrow=3) expect_equal(round(unname(result), 7), round(reference, 7)) - # Test runOverlap - expect_error(runOverlap(tse, name = 1)) - expect_error(runOverlap(tse, name = c("A", "B"))) - expect_error(runOverlap(tse, name = TRUE)) - expect_equal( reducedDim(runOverlap(tse)), as.matrix(calculateOverlap(tse)) ) - expect_equal( reducedDim(runOverlap(tse, assay.type = "relabundance", detection = 0.08)), - as.matrix(calculateOverlap(tse, assay.type = "relabundance", detection = 0.08)) ) + # Test .calculate_overlap + expect_equal( metadata(addDissimilarity(tse, "overlap"))[["overlap"]], + as.matrix(getDissimilarity(tse, method = "overlap")) ) + expect_equal( metadata(addDissimilarity(tse, method = "overlap", assay.type = "relabundance", detection = 0.08))[["overlap"]], + as.matrix(getDissimilarity(tse, method = "overlap", assay.type = "relabundance", detection = 0.08)) ) }) diff --git a/tests/testthat/test-5getDissimilarity.R b/tests/testthat/test-5getDissimilarity.R new file mode 100644 index 000000000..5c97f8f08 --- /dev/null +++ b/tests/testthat/test-5getDissimilarity.R @@ -0,0 +1,31 @@ +context("Dissimilarity calculation") +test_that("Dissimilarity calculation", { + data(GlobalPatterns, package="mia") + tse <- GlobalPatterns + # Test if Unifrac dissimilarity works with external tree + expect_equal(getDissimilarity(tse, method = "unifrac"), + getDissimilarity(tse, tree = rowTree(tse), method = "unifrac")) + # Test if results from vegan::vegdist are equal to getDissimilarity results + mat <- assay(tse, "counts") + expect_equal(as.matrix(vegan::vegdist(t(mat), "euclidean")), + as.matrix(getDissimilarity(tse, method = "euclidean"))) + tse <- GlobalPatterns + # Test if Unifrac dissimilarity works with external tree + expect_equal(getDissimilarity(tse, method = "unifrac"), + getDissimilarity(tse, tree = rowTree(tse), method = "unifrac")) + # Test rarefaction + ntop <- 5 + tse_sub <- tse[head(rev(order(rowSds(assay(tse, "counts")))), ntop), ] + mat <- assay(tse_sub, "counts") + clr <- function (x) { + vegan::decostand(x, method="clr", pseudocount=1) + } + set.seed(123) + res1 <- vegan::avgdist(t(mat), distfun = vegdist, dmethod = "euclidean", + sample = min(colSums2(mat)), + iterations = 10, transf = clr) + set.seed(123) + res2 <- getDissimilarity(tse_sub, method = "euclidean", niter = 10, + transf = clr) + expect_equal(as.matrix(res1), as.matrix(res2)) +}) diff --git a/vignettes/mia.Rmd b/vignettes/mia.Rmd index ad8358d30..a4db08db9 100644 --- a/vignettes/mia.Rmd +++ b/vignettes/mia.Rmd @@ -249,12 +249,12 @@ altExp(tse,"Genus") <- runMDS(altExp(tse,"Genus"), keep_dist = TRUE) ``` -JSD and UniFrac are implemented in `mia` as well. `calculateJSD` can be used +JSD and UniFrac are implemented in `mia` as well. `getJSD` can be used as a drop-in replacement in the example above (omit the `method` argument as well) to calculate the JSD. For calculating the UniFrac distance via -`calculateUniFrac` either a `TreeSummarizedExperiment` must be used or a tree -supplied via the `tree` argument. For more details see `?calculateJSD`, -`?calculateUnifrac` or `?getDPCoA`. +`getUniFrac` either a `TreeSummarizedExperiment` must be used or a tree +supplied via the `tree` argument. For more details see `?getJSD`, +`?getUnifrac` or `?getDPCoA`. `runMDS` performs the decomposition. Alternatively `addNMDS` can also be used.