From ef406c711cefb5fa0d92f0444f1f65218180c201 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Wed, 8 Jan 2025 15:02:49 +0200 Subject: [PATCH 1/6] Utilize vegan::rrarefy --- NAMESPACE | 1 + R/rarefyAssay.R | 162 +++++++++++++++++++------------ man/mergeSEs.Rd | 3 - man/rarefyAssay.Rd | 44 +++++---- tests/testthat/test-8subsample.R | 28 +++--- 5 files changed, 145 insertions(+), 93 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 771585489..0366bc059 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -420,5 +420,6 @@ importFrom(vegan,dbrda) importFrom(vegan,eigenvals) importFrom(vegan,monoMDS) importFrom(vegan,permutest) +importFrom(vegan,rrarefy) importFrom(vegan,scores) importFrom(vegan,vegdist) diff --git a/R/rarefyAssay.R b/R/rarefyAssay.R index 1c5a152a9..c36fbcda1 100644 --- a/R/rarefyAssay.R +++ b/R/rarefyAssay.R @@ -18,7 +18,12 @@ #' #' To maintain the reproducibility, please define the seed using set.seed() #' before implement this function. -#' +#' +#' When \code{replace = FALSE}, the function uses internally +#' \code{vegan::rarefy} while with replacement enabled the function utilizes +#' own implementation, inspired by \code{phyloseq::rarefy_even_depth}. +#' +#' # #' @inheritParams transformAssay #' @inheritParams getDominant #' @@ -28,15 +33,15 @@ #' #' @param min_size Deprecated. Use \code{sample} instead. #' -#' @param replace \code{Logical scalar}. The default is with -#' replacement (\code{replace=TRUE}). -#' See \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} -#' for details on implications of this parameter. (Default: \code{TRUE}) -#' -#' @param verbose \code{Logical scalar}. Choose whether to show messages. -#' (Default: \code{TRUE}) +#' @param replace \code{Logical scalar}. Whether to åperform subsampling with +#' replacement. Ths works similarly to \code{sample(..., replace = TRUE)}. +#' (Default: \code{FALSE}) #' -#' @param ... additional arguments not used +#' @param ... optional arguments: +#' \itemize{ +#' \item \code{verbose}: \code{Logical scalar}. Choose whether to show +#' messages. (Default: \code{TRUE}) +#' } #' #' @references #' McMurdie PJ, Holmes S. Waste not, want not: why rarefying microbiome data @@ -67,14 +72,20 @@ #' dim(tse) #' dim(assay(tse_subsampled, "subsampled")) #' +#' @seealso +#' \itemize{ +#' \item \code{\link[vegan:rrarefy]{vegan::rrarefy}} +#' \item \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} +#' } +#' NULL #' @rdname rarefyAssay #' @export setGeneric("rarefyAssay", signature = c("x"), function( x, assay.type = assay_name, assay_name = "counts", - sample = min_size, min_size = min(colSums2(assay(x))), - replace = TRUE, name = "subsampled", verbose = TRUE, ...) + sample = min_size, min_size = min(colSums2(assay(x, assay.type))), + replace = FALSE, name = "subsampled", ...) standardGeneric("rarefyAssay")) #' @importFrom SummarizedExperiment assay assay<- @@ -84,22 +95,20 @@ setGeneric("rarefyAssay", signature = c("x"), function( setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), function(x, assay.type = assay_name, assay_name = "counts", sample = min_size, min_size = min(colSums2(assay(x, assay.type))), - replace = TRUE, name = "subsampled", verbose = TRUE, ...){ + replace = FALSE, name = "subsampled", ...){ # Input check # Check that assay name is correct and that assay is counts table. .check_assay_present(assay.type, x) - if( any(assay(x, assay.type) %% 1 != 0) ){ - warning("assay contains non-integer values. Only counts table ", - "is applicable...", call. = FALSE) + if( any(assay(x, assay.type) %% 1 != 0 & + !is.na(assay(x, assay.type)) ) ){ + stop("assay contains non-integer values. Only counts table ", + "is applicable.", call. = FALSE) } - if(any(assay(x, assay.type) < 0)){ + if( any(assay(x, assay.type) < 0 & !is.na(assay(x, assay.type))) ){ stop("assay contains strictly-negative values. Only counts ", "table is applicable...", call. = FALSE) } - # Check that verbose and replace are boolean values - if( !.is_a_bool(verbose) ){ - stop("'verbose' must be TRUE or FALSE.", call. = FALSE) - } + # Check that replace is boolean values if( !.is_a_bool(replace) ){ stop("`replace` must be TRUE or FALSE.", call. = FALSE) } @@ -109,61 +118,90 @@ setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), "different from 'assay.type'.", call. = FALSE) } # Check sample. It must be single positive integer value. - if(!is.numeric(sample) || length(sample) != 1 || + if( is.na(sample) || !is.numeric(sample) || length(sample) != 1 || sample %% 1 != 0 && sample <= 0 ){ stop("'sample' needs to be a positive integer value.", call. = FALSE) } # Input check end - - # 'sample' determines the number of reads subsampled from samples. - # This means that every samples should have at least 'sample' of reads. - # If they do not have, drop those samples at this point. - # Get those sample names that we are going to remove due to too - # small number of reads. - rm_samples <- colSums2(assay(x, assay.type)) < sample - if( any(rm_samples) ){ - # Remove sample(s) from TreeSE (or keep rest of the samples) - x <- x[ , !rm_samples, drop = FALSE] - # Return NULL, if no samples were found after subsampling - if( ncol(x) == 0 ){ - stop("No samples were found after subsampling. Consider ", - "lower 'sample'.", call. = FALSE) - } - # Give message which samples were removed - if( verbose ){ - message( - sum(rm_samples), " samples removed because they contained ", - "fewer reads than `sample`.") - } - } - # Subsample specified assay. - mat <- apply( - assay(x, assay.type), 2, - .subsample_assay, sample = sample, replace = replace) - # Add rownames to new assay. The returned value from .subsample_assay - # is a vector that do not have feature names. - rownames(mat) <- rownames(x) - # remove features not present in any samples after subsampling - feat_inc <- rowSums2(mat, na.rm = TRUE) > 0 - mat <- mat[feat_inc, ] - # Give message if some features were dropped - if( verbose && any(!feat_inc) ){ - message( - sum(!feat_inc), " features removed because they are not ", - "present in all samples after subsampling." - ) - } + # Remove samples that do not have enoguh counts + x <- .remove_samples_below_counts_th(x, assay.type, sample, ...) + # Subsample specified assay + mat <- .get_subsamples_matrix(x, assay.type, sample, replace, ...) # Subset the TreeSE based on new feature-set x <- x[rownames(mat), ] # Add new assay to TreeSE assay(x, name, withDimnames = FALSE) <- mat - # Add info on sample to metadata - x <- .add_values_to_metadata(x, "rarefyAssay_sample", min_size) return(x) } ) +# 'sample' determines the number of reads subsampled from samples. +# This means that every samples should have at least 'sample' of reads. +# If they do not have, drop those samples at this point. +# Get those sample names that we are going to remove due to too +# small number of reads. +.remove_samples_below_counts_th <- function( + x, assay.type, sample, verbose = TRUE, ...){ + # + if( !.is_a_bool(verbose) ){ + stop("'verbose' must be TRUE or FALSE.", call. = FALSE) + } + # + rm_samples <- colSums2(assay(x, assay.type), na.rm = TRUE) < sample + if( any(rm_samples) ){ + # Remove sample(s) from TreeSE (or keep rest of the samples) + x <- x[ , !rm_samples, drop = FALSE] + # Return NULL, if no samples were found after subsampling + if( ncol(x) == 0 ){ + stop("No samples were found after subsampling. Consider ", + "lower 'sample'.", call. = FALSE) + } + # Give message which samples were removed + if( verbose ){ + message( + sum(rm_samples), " samples removed because they contained ", + "fewer reads than `sample`.") + } + } + return(x) +} + +# This function gets abundance table as input and returns subsampled matrix. +#' @importFrom vegan rrarefy +.get_subsamples_matrix <- function( + x, assay.type, sample, replace, verbose = TRUE, ...){ + # + if( !.is_a_bool(verbose) ){ + stop("'verbose' must be TRUE or FALSE.", call. = FALSE) + } + # + # We use vegan::rrarefy by default. However, as it does not support + # replace=TRUE, we use our own implementation in this case. + mat <- assay(x, assay.type) + if( replace ){ + # Loop throguh samples and subsample samples one-by-one + mat <- apply( + mat, 2, .subsample_assay, sample = sample, replace = replace) + rownames(mat) <- rownames(x) + } else{ + mat <- t( vegan::rrarefy(t(mat), sample) ) + } + # remove features not present in any samples after subsampling + feat_inc <- rowSums2(mat, na.rm = TRUE) > 0 + mat <- mat[feat_inc, ] + # Give message if some features were dropped + if( verbose && any(!feat_inc) ){ + message( + sum(!feat_inc), " features removed because they are not ", + "present in any of the samples after subsampling." + ) + } + # Add info on sample to attributes + attr(mat, "subsample") <- sample + return(mat) +} + ## Modified Sub sampling function from phyloseq internals .subsample_assay <- function(x, sample, replace){ # Create replacement species vector diff --git a/man/mergeSEs.Rd b/man/mergeSEs.Rd index 5b76bd0b6..8a98ea721 100644 --- a/man/mergeSEs.Rd +++ b/man/mergeSEs.Rd @@ -63,9 +63,6 @@ taxonomy information along with OTU number matches. \item{collapse_features}{Deprecated. Use \code{collapse.rows} instead.} -\item{verbose}{\code{Logical scalar}. Choose whether to show messages. -(Default: \code{TRUE})} - \item{y}{a \code{\link{SummarizedExperiment}} object when \code{x} is a \code{\link{SummarizedExperiment}} object. Disabled when \code{x} is a list.} } diff --git a/man/rarefyAssay.Rd b/man/rarefyAssay.Rd index 5ffb2a3bc..997f193c6 100644 --- a/man/rarefyAssay.Rd +++ b/man/rarefyAssay.Rd @@ -10,10 +10,9 @@ rarefyAssay( assay.type = assay_name, assay_name = "counts", sample = min_size, - min_size = min(colSums2(assay(x))), - replace = TRUE, + min_size = min(colSums2(assay(x, assay.type))), + replace = FALSE, name = "subsampled", - verbose = TRUE, ... ) @@ -23,9 +22,8 @@ rarefyAssay( assay_name = "counts", sample = min_size, min_size = min(colSums2(assay(x, assay.type))), - replace = TRUE, + replace = FALSE, name = "subsampled", - verbose = TRUE, ... ) } @@ -43,18 +41,18 @@ counts found in a sample or a user specified number.} \item{min_size}{Deprecated. Use \code{sample} instead.} -\item{replace}{\code{Logical scalar}. The default is with -replacement (\code{replace=TRUE}). -See \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} -for details on implications of this parameter. (Default: \code{TRUE})} +\item{replace}{\code{Logical scalar}. Whether to åperform subsampling with +replacement. Ths works similarly to \code{sample(..., replace = TRUE)}. +(Default: \code{FALSE})} \item{name}{\code{Character scalar}. A name for the column of the \code{colData} where results will be stored. (Default: \code{"method"})} -\item{verbose}{\code{Logical scalar}. Choose whether to show messages. -(Default: \code{TRUE})} - -\item{...}{additional arguments not used} +\item{...}{optional arguments: +\itemize{ +\item \code{verbose}: \code{Logical scalar}. Choose whether to show +messages. (Default: \code{TRUE}) +}} } \value{ \code{rarefyAssay} return \code{x} with subsampled data. @@ -66,18 +64,24 @@ for details on implications of this parameter. (Default: \code{TRUE})} subsampled assay. } \details{ -Although the subsampling approach is highly debated in microbiome research, -we include the \code{rarefyAssay} function because there may be some +Although the subsampling approach is highly debated in microbiome research, +we include the \code{rarefyAssay} function because there may be some instances where it can be useful. -Note that the output of \code{rarefyAssay} is not the equivalent as the +Note that the output of \code{rarefyAssay} is not the equivalent as the input and any result have to be verified with the original dataset. Subsampling/Rarefying may undermine downstream analyses and have unintended consequences. Therefore, make sure this normalization is appropriate for your data. -To maintain the reproducibility, please define the seed using set.seed() +To maintain the reproducibility, please define the seed using set.seed() before implement this function. + +When \code{replace = FALSE}, the function uses internally +\code{vegan::rarefy} while with replacement enabled the function utilizes +own implementation, inspired by \code{phyloseq::rarefy_even_depth}. + +# } \examples{ # When samples in TreeSE are less than specified sample, they will be removed. @@ -105,3 +109,9 @@ Zaneveld JR, Vázquez-Baeza Y, Birmingham A, Hyde ER. Normalization and microbial differential abundance strategies depend upon data characteristics. Microbiome. 2017 Dec;5(1):1-8. } +\seealso{ +\itemize{ +\item \code{\link[vegan:rrarefy]{vegan::rrarefy}} +\item \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} +} +} diff --git a/tests/testthat/test-8subsample.R b/tests/testthat/test-8subsample.R index f600b4d2a..dadabedb6 100644 --- a/tests/testthat/test-8subsample.R +++ b/tests/testthat/test-8subsample.R @@ -36,28 +36,34 @@ test_that("rarefyAssay", { expect_equal(unname(colSums2(assay(tse.subsampled, "subsampled"))), expColSums) # When replace = FALSE + sample <- 60000 seed = 1938 set.seed(seed) tse.subsampled.rp <- rarefyAssay( GlobalPatterns, - sample = 60000, + sample = sample, name = "subsampled", replace = FALSE) # check number of features removed is correct - expnFeaturesRemovedRp <- 6731 - obsnFeaturesRemovedRp <- nrow(GlobalPatterns) - nrow(tse.subsampled.rp) - expect_equal(obsnFeaturesRemovedRp, expnFeaturesRemovedRp) + set.seed(seed) + mat <- assay(GlobalPatterns, "counts") + # When replace = FALSE, the function utilize vegan::rrarefy + suppressWarnings( + sub_mat <- t( vegan::rrarefy(t(mat), sample = sample) ) + ) + # The function removes those samples whose library size is lower than + # 'sample' + sub_mat <- sub_mat[, colSums(mat) > sample] + # Moreover, it removes those features that are not present in any of the + # samples + sub_mat <- sub_mat[rowSums(sub_mat) > 0, ] + expect_equal(assay(tse.subsampled.rp, "subsampled"), sub_mat, check.attributes = FALSE) # check if all samples subsampled to even depth - expColSumsRp <- rep(60000, 25) + expColSumsRp <- rep(sample, 25) expect_equal(unname(colSums2(assay(tse.subsampled.rp, "subsampled"))), expColSumsRp) # check if same Features removed - obsFeaturesRemovedRp <- rownames(GlobalPatterns)[!rownames(GlobalPatterns) %in% rownames(tse.subsampled.rp)] - - expFeaturesRemovedRP <- c("522457","951","586076","244960","215972", - "31759","30678","138353","406058","1126") - - expect_equal(obsFeaturesRemovedRp[1:10], expFeaturesRemovedRP) + expect_true( all(rownames(tse.subsampled.rp) == rownames(sub_mat)) ) }) From 1d23fca0465eec8f2f66cfa5b05be6d7015aba8e Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Wed, 8 Jan 2025 15:06:09 +0200 Subject: [PATCH 2/6] up --- R/rarefyAssay.R | 1 - man/rarefyAssay.Rd | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/R/rarefyAssay.R b/R/rarefyAssay.R index c36fbcda1..8a9e34d6e 100644 --- a/R/rarefyAssay.R +++ b/R/rarefyAssay.R @@ -23,7 +23,6 @@ #' \code{vegan::rarefy} while with replacement enabled the function utilizes #' own implementation, inspired by \code{phyloseq::rarefy_even_depth}. #' -#' # #' @inheritParams transformAssay #' @inheritParams getDominant #' diff --git a/man/rarefyAssay.Rd b/man/rarefyAssay.Rd index 997f193c6..ce7ef94b8 100644 --- a/man/rarefyAssay.Rd +++ b/man/rarefyAssay.Rd @@ -64,24 +64,22 @@ messages. (Default: \code{TRUE}) subsampled assay. } \details{ -Although the subsampling approach is highly debated in microbiome research, -we include the \code{rarefyAssay} function because there may be some +Although the subsampling approach is highly debated in microbiome research, +we include the \code{rarefyAssay} function because there may be some instances where it can be useful. -Note that the output of \code{rarefyAssay} is not the equivalent as the +Note that the output of \code{rarefyAssay} is not the equivalent as the input and any result have to be verified with the original dataset. Subsampling/Rarefying may undermine downstream analyses and have unintended consequences. Therefore, make sure this normalization is appropriate for your data. -To maintain the reproducibility, please define the seed using set.seed() +To maintain the reproducibility, please define the seed using set.seed() before implement this function. When \code{replace = FALSE}, the function uses internally \code{vegan::rarefy} while with replacement enabled the function utilizes own implementation, inspired by \code{phyloseq::rarefy_even_depth}. - -# } \examples{ # When samples in TreeSE are less than specified sample, they will be removed. From 5006e83115bf96f6aee751c55294ae1f4bbb5b80 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Fri, 17 Jan 2025 22:59:11 +0200 Subject: [PATCH 3/6] up --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3aaff563a..f054f76b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.15.12 +Version: 1.15.13 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", From b92c3d03ecb1758668585a21b2c063feceb012dc Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Sat, 18 Jan 2025 09:36:22 +0200 Subject: [PATCH 4/6] up --- man/getCrossAssociation.Rd | 43 +++++++++++++++++++++++++++----------- man/rarefyAssay.Rd | 10 +++++---- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/man/getCrossAssociation.Rd b/man/getCrossAssociation.Rd index 4578240ac..5e47ecf80 100644 --- a/man/getCrossAssociation.Rd +++ b/man/getCrossAssociation.Rd @@ -66,6 +66,16 @@ corresponding variable-pair. This decreases the number of calculations in and give numeric values as an output. Adjust \code{method} and other parameters correspondingly. Supported functions are, for example, \code{stats::dist} and \code{vegan::vegdist}. + +\item \code{dimred1} \code{Character scalar} or \code{numeric scalar}. +Specifies reduced dimensionality from the \code{reducedDim} of experiment +\enumerate{ +\item (Default: \code{NULL}) +} + +\item \code{dimred2} \code{Character scalar} or \code{numeric scalar}. +Specifies reduced dimensionality from the \code{reducedDim} of experiment +2. (Default: \code{NULL}) }} \item{experiment1}{\code{Character scalar} or \code{numeric scalar}. @@ -91,12 +101,14 @@ assay in experiment 2 to be transformed.. (Default: \code{"counts"})} \item{assay_name2}{Deprecated. Use \code{assay.type2} instead.} \item{altexp1}{\code{Character scalar} or \code{numeric scalar}. Specifies -alternative experiment from the altExp of experiment 1. If NULL, then the -experiment is itself and altExp option is disabled. (Default: \code{NULL})} +alternative experiment from the \code{altExp} of experiment 1. If NULL, then +the experiment is itself and altExp option is disabled. +(Default: \code{NULL})} \item{altexp2}{\code{Character scalar} or \code{numeric scalar}. Specifies -alternative experiment from the altExp of experiment 2. If NULL, then the -experiment is itself and altExp option is disabled. (Default: \code{NULL})} +alternative experiment from the \code{altExp} of experiment 2. If NULL, then +the experiment is itself and altExp option is disabled. +(Default: \code{NULL})} \item{col.var1}{\code{Character scalar}. Specifies column(s) from \code{colData} of experiment 1. If col.var1 is used, assay.type1 is disabled. @@ -224,7 +236,7 @@ result <- getCrossAssociation( # Show first 5 entries head(result, 5) -# If test.signif = TRUE, then getCrossAssociation additionally returns +# If test.signif = TRUE, then getCrossAssociation additionally returns # significances # filter.self.cor = TRUE filters self correlations # p.adj.threshold can be used to filter those features that do not @@ -234,7 +246,7 @@ result <- getCrossAssociation( filter.self.cor = TRUE, p.adj.threshold = 0.05, test.signif = TRUE) # Show first 5 entries head(result, 5) - + # Returned value is a list of matrices names(result) @@ -243,19 +255,19 @@ names(result) result <- getCrossAssociation( mae[[1]], mae[[1]], by = 2, paired = FALSE, association.fun = getDissimilarity, method = "bray") - + # If experiments are equal and measure is symmetric # (e.g., taxa1 vs taxa2 == taxa2 vs taxa1), # it is possible to speed-up calculations by calculating association only # for unique variable-pairs. Use "symmetric" to choose whether to measure # association for only other half of of variable-pairs. result <- getCrossAssociation( - mae, experiment1 = "microbiota", experiment2 = "microbiota", + mae, experiment1 = "microbiota", experiment2 = "microbiota", assay.type1 = "counts", assay.type2 = "counts", symmetric = TRUE) # For big data sets, the calculations might take a long time. -# To speed them up, you can take a random sample from the data. -# When dealing with complex biological problems, random samples can be +# To speed them up, you can take a random sample from the data. +# When dealing with complex biological problems, random samples can be # enough to describe the data. Here, our random sample is 30 \% of whole data. sample_size <- 0.3 tse <- mae[[1]] @@ -270,7 +282,7 @@ mae[[1]] <- addAlpha(mae[[1]]) # an assay named assay.type from assay slot, it fetches a column named # col.var from colData. result <- getCrossAssociation( - mae[[1]], assay.type1 = "counts", + mae[[1]], assay.type1 = "counts", col.var2 = c("shannon_diversity", "coverage_diversity"), test.signif = TRUE) @@ -288,5 +300,12 @@ res <- getCrossAssociation( assay.type2 = "nmr" ) - +# To calculate correlation of features to principal coordinates, you have to +# first calculate PCoA... +tse <- runMDS( + tse, assay.type = "rclr", FUN = getDissimilarity, method = "euclidean") +# ...then calculate the correlation. +res <- getCrossAssociation(tse, assay.type1 = "rclr", dimred2 = "MDS") +head(res) + } diff --git a/man/rarefyAssay.Rd b/man/rarefyAssay.Rd index 6eeb8f958..5a5245ffc 100644 --- a/man/rarefyAssay.Rd +++ b/man/rarefyAssay.Rd @@ -10,9 +10,10 @@ rarefyAssay( assay.type = assay_name, assay_name = "counts", sample = min_size, - min_size = min(colSums2(assay(x, assay.type))), - replace = FALSE, + min_size = min(colSums2(assay(x))), + replace = TRUE, name = "subsampled", + verbose = TRUE, ... ) @@ -21,9 +22,10 @@ rarefyAssay( assay.type = assay_name, assay_name = "counts", sample = min_size, - min_size = min(colSums2(assay(x, assay.type))), - replace = FALSE, + min_size = min(colSums2(assay(x))), + replace = TRUE, name = "subsampled", + verbose = TRUE, ... ) } From 65b6d19909ad15fd9da76b0396e6fcc7801fc4c6 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Sat, 18 Jan 2025 10:06:19 +0200 Subject: [PATCH 5/6] up --- R/AllGenerics.R | 6 +- R/getCrossAssociation.R | 1 + R/mergeSEs.R | 159 +++++++++++++++++++------------------ man/getCrossAssociation.Rd | 1 + man/mergeSEs.Rd | 5 +- man/rarefyAssay.Rd | 29 +++---- 6 files changed, 97 insertions(+), 104 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 20668220f..a2c9ead77 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -200,11 +200,7 @@ setGeneric("mergeSEs", signature = c("x"), function(x, ... ) #' @rdname rarefyAssay #' @export -setGeneric("rarefyAssay", signature = c("x"), - function( - x, assay.type = assay_name, assay_name = "counts", - sample = min_size, min_size = min(colSums2(assay(x))), - replace = TRUE, name = "subsampled", verbose = TRUE, ...) +setGeneric("rarefyAssay", signature = c("x"), function(x, ...) standardGeneric("rarefyAssay")) #' @rdname runCCA diff --git a/R/getCrossAssociation.R b/R/getCrossAssociation.R index 8f969e691..edf3630bc 100644 --- a/R/getCrossAssociation.R +++ b/R/getCrossAssociation.R @@ -253,6 +253,7 @@ #' #' # To calculate correlation of features to principal coordinates, you have to #' # first calculate PCoA... +#' library(scater) #' tse <- runMDS( #' tse, assay.type = "rclr", FUN = getDissimilarity, method = "euclidean") #' # ...then calculate the correlation. diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 7c1b6eca5..a31a32ad9 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -1,26 +1,26 @@ #' Merge SE objects into single SE object. -#' +#' #' @inheritParams rarefyAssay #' @inheritParams getDominant -#' +#' #' @param y a \code{\link{SummarizedExperiment}} object when \code{x} is a #' \code{\link{SummarizedExperiment}} object. Disabled when \code{x} is a list. -#' +#' #' @param join \code{Character scalar}. A value for selecting the joining #' method. Must be 'full', 'inner', 'left', or 'right'. 'left' and 'right' are #' disabled when more than two objects are being merged. #' (Default: \code{"full"}) -#' +#' #' @param missing.values \code{NA}, \code{0} or \code{Character scalar}. #' Specifies the notation of missing values. (By default: \code{NA}) -#' +#' #' @param missing_values Deprecated. Use \code{missing.values} instead. -#' +#' #' @param collapse.cols \code{Logical scalar}. Determines whether to collapse #' identically named samples to one. (Default: \code{FALSE}) -#' +#' #' @param collapse_samples Deprecated. Use \code{collapse.cols} instead. -#' +#' #' @param collapse.rows \code{Logical scalar}. Selects whether to collapse #' identically named features to one. Since all taxonomy information is #' taken into account, @@ -30,9 +30,12 @@ #' option, it is possible to specify whether these strains are combined if their #' taxonomy information along with OTU number matches. #' (Default: \code{TRUE}) -#' +#' #' @param collapse_features Deprecated. Use \code{collapse.rows} instead. #' +#' @param verbose \code{Logical scalar}. Choose whether to show +#' messages. (Default: \code{TRUE}) +#' #' @param ... optional arguments (not used). #' #' @return A single \code{SummarizedExperiment} object. @@ -45,14 +48,14 @@ #' \code{rownames} and #' \code{colnames}. \code{rowTree} and \code{colTree} are preserved if linkage #' between rows/cols and the tree is found. -#' +#' #' Equally named rows are interpreted as equal. Further -#' matching based on \code{rowData} is not done. For samples, collapsing -#' is disabled by default meaning that equally named samples that are stored -#' in different objects are interpreted as unique. Collapsing can be enabled +#' matching based on \code{rowData} is not done. For samples, collapsing +#' is disabled by default meaning that equally named samples that are stored +#' in different objects are interpreted as unique. Collapsing can be enabled #' with \code{collapse.cols = TRUE} when equally named samples describe the same -#' sample. -#' +#' sample. +#' #' If, for example, all rows are not shared with #' individual objects, there are missing values in \code{assays}. #' The notation of missing @@ -60,31 +63,31 @@ #' of #' \code{TreeSummarizedExperiment} objects, also \code{rowTree}, \code{colTree}, #' and -#' \code{referenceSeq} are preserved if possible. The data is preserved if +#' \code{referenceSeq} are preserved if possible. The data is preserved if #' all the rows or columns can be found from it. -#' -#' Compared to \code{cbind} and \code{rbind} \code{mergeSEs} -#' allows more freely merging since \code{cbind} and \code{rbind} expect +#' +#' Compared to \code{cbind} and \code{rbind} \code{mergeSEs} +#' allows more freely merging since \code{cbind} and \code{rbind} expect #' that rows and columns are matching, respectively. -#' +#' #' You can choose joining methods from \code{'full'}, \code{'inner'}, -#' \code{'left'}, and \code{'right'}. In all the methods, all the samples are +#' \code{'left'}, and \code{'right'}. In all the methods, all the samples are #' included in the result object. However, with different methods, it is -#' possible +#' possible #' to choose which rows are included. -#' +#' #' \itemize{ #' \item{\code{full} -- all unique features} #' \item{\code{inner} -- all shared features} #' \item{\code{left} -- all the features of the first object} #' \item{\code{right} -- all the features of the second object} #' } -#' +#' #' The output depends on the input. If the input contains #' \code{SummarizedExperiment} #' object, then the output will be \code{SummarizedExperiment}. When all the #' input -#' objects belong to \code{TreeSummarizedExperiment}, the output will be +#' objects belong to \code{TreeSummarizedExperiment}, the output will be #' \code{TreeSummarizedExperiment}. #' #' @seealso @@ -99,42 +102,42 @@ #' #' @name mergeSEs #' @export -#' +#' #' @examples #' data(GlobalPatterns) #' data(esophagus) #' data(enterotype) -#' +#' #' # Take only subsets so that it wont take so long #' tse1 <- GlobalPatterns[1:100, ] #' tse2 <- esophagus #' tse3 <- enterotype[1:100, ] -#' +#' #' # Merge two TreeSEs #' tse <- mergeSEs(tse1, tse2) -#' +#' #' # Merge a list of TreeSEs #' list <- SimpleList(tse1, tse2, tse3) #' tse <- mergeSEs(list, assay.type = "counts", missing.values = 0) #' tse -#' +#' #' # With 'join', it is possible to specify the merging method. Subsets are used #' # here just to show the functionality #' tse_temp <- mergeSEs(tse[1:10, 1:10], tse[5:100, 11:20], join = "left") #' tse_temp -#' +#' #' # If your objects contain samples that describe one and same sample, #' # you can collapse equally named samples to one by specifying 'collapse.cols' -#' tse_temp <- mergeSEs(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), +#' tse_temp <- mergeSEs(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), #' collapse.cols = TRUE, #' join = "inner") #' tse_temp -#' +#' #' # Merge all available assays #' tse <- transformAssay(tse, method="relabundance") #' ts1 <- transformAssay(tse1, method="relabundance") #' tse_temp <- mergeSEs(tse, tse1, assay.type = assayNames(tse)) -#' +#' NULL ###################### Function for SimpleList of TreeSEs ###################### @@ -143,12 +146,12 @@ NULL #' @export setMethod("mergeSEs", signature = c(x = "SimpleList"), function(x, assay.type="counts", assay_name = NULL, join = "full", - missing.values = missing_values, missing_values = NA, - collapse.cols = collapse_samples, collapse_samples = FALSE, + missing.values = missing_values, missing_values = NA, + collapse.cols = collapse_samples, collapse_samples = FALSE, collapse.rows = collapse_features, collapse_features = TRUE, verbose = TRUE, ... ){ ################## Input check ################## - # Check the objects + # Check the objects class <- .check_objects_and_give_class(x) if (!is.null(assay_name) & is.null(assay.type)) { .Deprecated(new="assay.type", old="assay_name", msg=paste0( @@ -227,7 +230,7 @@ setMethod("mergeSEs", signature = c(x = "SummarizedExperiment"), if( !(is(y, "SummarizedExperiment")) ){ stop("'y' must be a 'SummarizedExperiment' object.", call. = FALSE) - } + } ################ Input check end ################ # Create a list based on TreeSEs list <- SimpleList(x, y) @@ -300,7 +303,7 @@ setMethod("mergeSEs", signature = c(x = "list"), if( verbose ){ message("\r", i+1, "/", length(x)+1, appendLF = FALSE) } - + # Get the ith object temp <- x[[i]] # Add rownames to rowData so that full matches are found @@ -326,7 +329,7 @@ setMethod("mergeSEs", signature = c(x = "list"), if( class == "TreeSummarizedExperiment" ){ tse_args <- .get_TreeSE_args(temp, tse_args) } - + # Create an object tse <- do.call(FUN_constructor, args = args) } @@ -411,7 +414,7 @@ setMethod("mergeSEs", signature = c(x = "list"), if( verbose ){ message("Adding referenceSeqs...") } - + # Get the rownames that are included in reference sequences rows_that_have_seqs <- lapply(refSeqs, FUN = function(x){ names(x[[1]]) @@ -427,7 +430,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Get the maximum number of DNA sets that individual TreeSE had / max # number of sets that individual rownames set had. max_numrow <- max(lengths(refSeqs)) - + # Initialize a list result_list <- list() # Loop from 1 to max number of DNA sets @@ -487,7 +490,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # All rownames/colnames should be included in trees/links if( !all(names %in% links[["names"]]) || is.null(names) || length(names) == 0 ){ - warning(MARGIN, "Tree(s) does not match with the data so it ", + warning(MARGIN, "Tree(s) does not match with the data so it ", "is discarded.", call. = FALSE) return(tse) } @@ -568,12 +571,12 @@ setMethod("mergeSEs", signature = c(x = "list"), if( !is.null(tse@rowTree) ){ # Get trees that will be added trees_add <- tse@rowTree - # Get rowLinks, convert them to basic DataFrame, + # Get rowLinks, convert them to basic DataFrame, # so that additional column can be added links <- DataFrame(rowLinks(tse)) # Add rownames as one of the columns links$names <- rownames(tse) - + # If there is no data yet / if rowTree arguments are NULL if( is.null(tse_args$rowTrees) ){ # Get the tree data as a list. Tree is as a list, and links as DF @@ -588,7 +591,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # How many trees there already are tree_num_before <- length(tse_args$rowTrees$tree) # Get unique names - unique_names <- make.unique( + unique_names <- make.unique( names( c(tse_args$rowTrees$tree, trees_add) ) ) # Update the names of current data @@ -598,12 +601,12 @@ setMethod("mergeSEs", signature = c(x = "list"), # Get corresponding current names names_add <- names(trees_add) # Update tree names from links - links[ , "whichTree" ] <- + links[ , "whichTree" ] <- unique_names_add[ match( links[ , "whichTree" ], names_add ) ] # Update tree names names(trees_add) <- unique_names_add # Add data to a list - tse_args$rowTrees <- list( + tse_args$rowTrees <- list( trees = c(tse_args$rowTrees$trees, trees_add), links = rbind(tse_args$rowTrees$links, links) ) @@ -613,12 +616,12 @@ setMethod("mergeSEs", signature = c(x = "list"), if( !is.null(tse@colTree) ){ # Get trees that will be added trees_add <- tse@rowTree - # Get colLinks, convert them to basic DataFrame, + # Get colLinks, convert them to basic DataFrame, # so that additional column can be added links <- DataFrame(colLinks(tse)) # Add colnames as one of the columns links$names <- colnames(tse) - + # If there is no data yet / if colTree arguments are NULL if( is.null(tse_args$colTrees) ){ # Get the tree data as a list. Tree is as a list, and links as DF @@ -633,7 +636,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # How many trees there already are tree_num_before <- length(tse_args$colTrees$tree) # Get unique names - unique_names <- make.unique( + unique_names <- make.unique( names( c(tse_args$colTrees$tree, trees_add) ) ) # Update the names of current data @@ -643,12 +646,12 @@ setMethod("mergeSEs", signature = c(x = "list"), # Get corresponding current names names_add <- names(trees_add) # Update tree names from links - links[ , "whichTree" ] <- + links[ , "whichTree" ] <- unique_names_add[ match( links[ , "whichTree" ], names_add ) ] # Update tree names names(trees_add) <- unique_names_add # Add data to a list - tse_args$rowTrees <- list( + tse_args$rowTrees <- list( trees = c(tse_args$colTrees$trees, trees_add), links = rbind(tse_args$colTrees$links, links) ) @@ -673,14 +676,14 @@ setMethod("mergeSEs", signature = c(x = "list"), tse_args$refSeqs <- refSeqs } else{ # otherwise add data to a list - tse_args$refSeqs <- c( tse_args$refSeqs, refSeqs ) + tse_args$refSeqs <- c( tse_args$refSeqs, refSeqs ) } } return(tse_args) } ######################## .get_SummarizedExperiment_data ######################## -# This function gets the desired data from one SE object and creates a list of +# This function gets the desired data from one SE object and creates a list of # arguments containing the data # Arguments of SCE and TreeSE are also fetched with this function. # TreeSE-specific slots are collected with different function so that they are @@ -701,7 +704,7 @@ setMethod("mergeSEs", signature = c(x = "list"), metadata = metadata ) return(args) - + } ######################## .check_objects_and_give_class ######################### @@ -713,7 +716,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Allowed classes allowed_classes <- c("TreeSummarizedExperiment", "SingleCellExperiment", "SummarizedExperiment") - + # Get the class based on hierarchy TreeSE --> SCE --> SE # and check that objects are in correct format classes <- lapply(x, .check_object_for_merge) @@ -726,7 +729,7 @@ setMethod("mergeSEs", signature = c(x = "list"), } else { class <- allowed_classes[3] } - + # If there are multiple classes, give a warning if( length(unique( classes )) > 1 ){ warning("The Input consist of multiple classes. ", @@ -766,7 +769,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Get class class <- class(x) return(class) - + } ########################### .assays_cannot_be_found ############################ # This function checks that the assay(s) can be found from TreeSE objects of @@ -816,7 +819,7 @@ setMethod("mergeSEs", signature = c(x = "list"), { .check_assay_present(assay.type, tse) return(TRUE) - + }, error = function(cond) { return(FALSE) @@ -882,7 +885,7 @@ setMethod("mergeSEs", signature = c(x = "list"), names(assays) <- assay.type # Combine metadata metadata <- c( metadata(tse1), metadata(tse2) ) - + # Create a list of data args <- list(assays = assays, rowData = rowdata, @@ -902,16 +905,16 @@ setMethod("mergeSEs", signature = c(x = "list"), # Take assays assay1 <- assay(tse1, assay.type) assay2 <- assay(tse2, assay.type) - + # Merge two assays into one assay <- .join_two_tables(assay1, assay2, join) - + # Convert into matrix assay <- as.matrix(assay) - + # Fill missing values assay[ is.na(assay) ] <- missing.values - + # Order the assay based on rowData and colData assay <- assay[ match(rownames(rd), rownames(assay)), , drop = FALSE ] assay <- assay[ , match(rownames(cd), colnames(assay)), drop = FALSE] @@ -927,7 +930,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Take rowDatas rd1 <- rowData(tse1) rd2 <- rowData(tse2) - + # Convert column names to lower if( length(colnames(rd1)) > 0 ){ colnames(rd1) <- tolower(colnames(rd1)) @@ -935,10 +938,10 @@ setMethod("mergeSEs", signature = c(x = "list"), if( length(colnames(rd2)) > 0 ){ colnames(rd2) <- tolower(colnames(rd2)) } - + # Merge rowdata rd <- .join_two_tables(rd1, rd2, join) - + # There might be duplicated rownames. This might occur when there are # features with equal taxonomy data but merged datasets have some # additional info that do not match with each other. --> collapse @@ -991,9 +994,9 @@ setMethod("mergeSEs", signature = c(x = "list"), rownames(rd) <- rd[["rownames_merge_ID"]] rd[["rownames_merge_ID"]] <- NULL } - + } - + # Get column indices that match with taxonomy ranks ranks_ind <- match( TAXONOMY_RANKS, colnames(rd) ) # Remove NAs @@ -1012,7 +1015,7 @@ setMethod("mergeSEs", signature = c(x = "list"), substr(rank_names, 2, nchar(rank_names)), sep = "") # Add new names to colnames of rd_rank colnames(rd_rank) <- new_rank_names - + # Combine columns rd <- cbind(rd_rank, rd_other) } @@ -1028,12 +1031,12 @@ setMethod("mergeSEs", signature = c(x = "list"), # Take colDatas cd1 <- colData(tse1) cd2 <- colData(tse2) - + # Merge coldata cd <- .join_two_tables(cd1, cd2, join = "full") # Convert into DataFrame cd <- DataFrame(cd) - + return(cd) } @@ -1058,11 +1061,11 @@ setMethod("mergeSEs", signature = c(x = "list"), left = FALSE, right = TRUE ) - + # Ensure that the data is always data.frame df1 <- as.data.frame(df1) df2 <- as.data.frame(df2) - + # STEP 1: Check whether variables can be merged; if their classes are equal. # Adjust colnames if their classes differ if( ncol(df1) > 0 && ncol(df2) > 0 ){ @@ -1091,7 +1094,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Take into account if other df has only NA values in column --> class # of that variable can be wrong --> use known class classes$no_match <- FALSE - classes[classes$found_both, "no_match"] <- + classes[classes$found_both, "no_match"] <- classes[classes$found_both, "class.x"] != classes[ classes$found_both, "class.y"] & classes$not_na.x[ classes$found_both] & classes$not_na.y[classes$found_both] @@ -1131,11 +1134,11 @@ setMethod("mergeSEs", signature = c(x = "list"), # Add rownames to one of the columns df1$rownames_merge_ID <- rownames(df1) df2$rownames_merge_ID <- rownames(df2) - + # STEP 2: merge # Finally merge data frames into one data frame df <- merge(df1, df2, all.x = all.x, all.y = all.y) - + # STEP 3: polish # Get column names because they can be changed when class is changed colnames <- colnames(df) diff --git a/man/getCrossAssociation.Rd b/man/getCrossAssociation.Rd index 5e47ecf80..886cbb6b2 100644 --- a/man/getCrossAssociation.Rd +++ b/man/getCrossAssociation.Rd @@ -302,6 +302,7 @@ res <- getCrossAssociation( # To calculate correlation of features to principal coordinates, you have to # first calculate PCoA... +library(scater) tse <- runMDS( tse, assay.type = "rclr", FUN = getDissimilarity, method = "euclidean") # ...then calculate the correlation. diff --git a/man/mergeSEs.Rd b/man/mergeSEs.Rd index f19931aa7..12811a042 100644 --- a/man/mergeSEs.Rd +++ b/man/mergeSEs.Rd @@ -65,6 +65,9 @@ taxonomy information along with OTU number matches. \item{collapse_features}{Deprecated. Use \code{collapse.rows} instead.} +\item{verbose}{\code{Logical scalar}. Choose whether to show +messages. (Default: \code{TRUE})} + \item{y}{a \code{\link{SummarizedExperiment}} object when \code{x} is a \code{\link{SummarizedExperiment}} object. Disabled when \code{x} is a list.} } @@ -149,7 +152,7 @@ tse_temp # If your objects contain samples that describe one and same sample, # you can collapse equally named samples to one by specifying 'collapse.cols' -tse_temp <- mergeSEs(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), +tse_temp <- mergeSEs(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), collapse.cols = TRUE, join = "inner") tse_temp diff --git a/man/rarefyAssay.Rd b/man/rarefyAssay.Rd index 5a5245ffc..242e5bb32 100644 --- a/man/rarefyAssay.Rd +++ b/man/rarefyAssay.Rd @@ -5,33 +5,28 @@ \alias{rarefyAssay,SummarizedExperiment-method} \title{Subsample Counts} \usage{ -rarefyAssay( - x, - assay.type = assay_name, - assay_name = "counts", - sample = min_size, - min_size = min(colSums2(assay(x))), - replace = TRUE, - name = "subsampled", - verbose = TRUE, - ... -) +rarefyAssay(x, ...) \S4method{rarefyAssay}{SummarizedExperiment}( x, assay.type = assay_name, assay_name = "counts", sample = min_size, - min_size = min(colSums2(assay(x))), - replace = TRUE, + min_size = min(colSums2(assay(x, assay.type))), + replace = FALSE, name = "subsampled", - verbose = TRUE, ... ) } \arguments{ \item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} +\item{...}{optional arguments: +\itemize{ +\item \code{verbose}: \code{Logical scalar}. Choose whether to show +messages. (Default: \code{TRUE}) +}} + \item{assay.type}{\code{Character scalar}. Specifies which assay to use for calculation. (Default: \code{"counts"})} @@ -49,12 +44,6 @@ replacement. Ths works similarly to \code{sample(..., replace = TRUE)}. \item{name}{\code{Character scalar}. The name for the transformed assay to be stored. (Default: \code{method})} - -\item{...}{optional arguments: -\itemize{ -\item \code{verbose}: \code{Logical scalar}. Choose whether to show -messages. (Default: \code{TRUE}) -}} } \value{ \code{rarefyAssay} return \code{x} with subsampled data. From d01f785b087d171ce0381ce62e31c0fea94399bb Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Sat, 18 Jan 2025 10:50:02 +0200 Subject: [PATCH 6/6] up --- tests/testthat/test-5Unifrac.R | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/testthat/test-5Unifrac.R b/tests/testthat/test-5Unifrac.R index 212dd834e..0c1036d2e 100644 --- a/tests/testthat/test-5Unifrac.R +++ b/tests/testthat/test-5Unifrac.R @@ -3,36 +3,36 @@ test_that("Unifrac beta diversity", { data(esophagus, package="mia") tse <- esophagus tse <- transformAssay(tse, assay.type="counts", method="relabundance") - + expect_error( - getDissimilarity(tse, method = "unifrac",assay.type = "test", + getDissimilarity(tse, method = "unifrac",assay.type = "test", tree.name = "phylo", weighted = FALSE) ) expect_error( - getDissimilarity(tse, method = "unifrac", assay.type = 2, + getDissimilarity(tse, method = "unifrac", assay.type = 2, tree.name = "phylo", weighted = FALSE) ) expect_error( - getDissimilarity(tse, method = "unifrac", assay.type = TRUE, + getDissimilarity(tse, method = "unifrac", assay.type = TRUE, tree.name = "phylo", weighted = FALSE) ) expect_error( - getDissimilarity(tse, method = "unifrac", assay.type = "counts", + getDissimilarity(tse, method = "unifrac", assay.type = "counts", tree.name = "test", weighted = FALSE) ) expect_error( - getDissimilarity(tse, method = "unifrac", assay.type = "counts", + getDissimilarity(tse, method = "unifrac", assay.type = "counts", tree.name = 1, weighted = FALSE) ) expect_error( - getDissimilarity(tse, method = "unifrac", assay.type = "counts", + getDissimilarity(tse, method = "unifrac", assay.type = "counts", tree.name = TRUE, weighted = FALSE) ) expect_error( - getDissimilarity(tse, method = "unifrac", assay.type = "counts", + getDissimilarity(tse, method = "unifrac", assay.type = "counts", tree.name = "phylo", weighted = 1) ) - + data(GlobalPatterns, package="mia") tse <- GlobalPatterns # Calculate unweighted unifrac @@ -44,12 +44,12 @@ test_that("Unifrac beta diversity", { # Calculate weighted unifrac. Allow tolerance since weighted unifrac # calculation in rbiom has some stochasticity. That is most likely due # multithreading and complex structure of tree (loops). - unifrac_mia <- as.matrix(getDissimilarity(tse, method = "unifrac", + unifrac_mia <- as.matrix(getDissimilarity(tse, method = "unifrac", weighted = TRUE)) unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse), weighted = TRUE, rowTree(tse))) - expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 1e-3) - + expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 5e-3) + # Test that the function works correctly when there are multiple trees. # The function should subset the data based on tree. tse <- GlobalPatterns @@ -78,9 +78,9 @@ test_that("Unifrac beta diversity", { unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = TRUE, rowTree(tse_ref))) expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 1e-3) - - # Test the function with agglomerated data. .get_unifrac renames - # rownames based on tips and links to them. Then it also prunes the tree so + + # Test the function with agglomerated data. .get_unifrac renames + # rownames based on tips and links to them. Then it also prunes the tree so # that rows are in tips. tse <- GlobalPatterns tse <- agglomerateByRank(tse, rank = "Species")