From 331e3e16ce6a6d858c0dbaa9757eb93cc0d821da Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Tue, 14 Jan 2025 18:41:43 +0200 Subject: [PATCH 1/2] Improve getTaxonomyLabels --- R/taxonomy.R | 53 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/R/taxonomy.R b/R/taxonomy.R index 4acbdd96f..a26c14366 100644 --- a/R/taxonomy.R +++ b/R/taxonomy.R @@ -68,7 +68,14 @@ #' #' @param use_grepl Deprecated. Use \code{use.grepl} instead. #' -#' @param ... optional arguments not used currently. +#' @param ... additional arguments +#' \itemize{ +#' \item \code{lowest.rank}: A lowest taxonomy level to be considered in +#' \code{getTaxonomyLabels}. Ranks lower than this will be collapsed into rank +#' specified by \code{lowest.rank}. For example, if genus level is specified, +#' species will be collapsed into genus. If \code{NULL}, the data is not +#' collapsed. (Default: \code{NULL}) +#' } #' #' @param ranks \code{Character vector}. A vector of ranks to be set. #' @details @@ -339,9 +346,15 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), stop("'resolve.loops' must be TRUE or FALSE.", call. = FALSE) } # - dup <- duplicated(rowData(x)[,taxonomyRanks(x)]) + # Collapse taxonomy ranks if user has specified so + x <- .collapse_lowest_taxonomy_ranks(x, ...) + + dup <- duplicated(rowData(x)[,taxonomyRanks(x), drop = FALSE]) if(any(dup)){ - td <- apply(rowData(x)[,taxonomyRanks(x)],1L,paste,collapse = "___") + td <- apply( + rowData(x)[,taxonomyRanks(x), drop = FALSE], + 1L, + paste, collapse = "___") td_non_dup <- td[!dup] m <- match(td, td_non_dup) } @@ -362,6 +375,38 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), } ) +# This function is for collapsing the lowest taxonomy ranks into single value. +# For instance, if user specifies genus rank, genus and species are collapsed +# into one, and species rank is removed from the taxonomy table. If family is +# specified, along with these two, also family is incorporated into this value. +.collapse_lowest_taxonomy_ranks <- function( + x, lowest.rank = NULL, empty.fields = c(NA, "", " ", "\t", "-", "_"), + ...){ + # By default, we keep the taxonomy table untouched. + if( !is.null(lowest.rank) ){ + .check_taxonomic_rank(lowest.rank, x) + # Get available taxonomy ranks + available_ranks <- taxonomyRanks(x) + # Get indices of ranks that we are going to collapse into one + mod_ranks <- seq( + which(available_ranks == lowest.rank), length(available_ranks)) + # For each row, collapse ranks into one + new_rank <- apply(rowData(x)[, mod_ranks, drop = FALSE], 1, function(x){ + # Check if empty, and replace with NA if it is + x[ x %in% empty.fields ] <- NA + # Collapse values + x <- paste(na.omit(x), collapse = "_") + return(x) + }) + # Remove the collapsed ranks from the original table + rowData(x) <- rowData(x)[, -mod_ranks, drop = FALSE] + # Add the collapsed values to the taxonomy table + new_rank <- unname(new_rank) + rowData(x)[[lowest.rank]] <- new_rank + } + return(x) +} + #' @importFrom IRanges CharacterList LogicalList .get_tax_ranks_selected <- function(x,rd, tax_cols, empty.fields){ # We need DataFrame here to handle cases with a single entry in tax_cols @@ -445,6 +490,8 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), #' #' @inheritParams taxonomy-methods #' +#' @param ... optional arguments not used currently. +#' #' @details #' #' \code{addHierarchyTree} calculates a hierarchy tree from the available From c9735433c536a142b1b94204254a21816d79ae0e Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Tue, 14 Jan 2025 19:01:50 +0200 Subject: [PATCH 2/2] up --- man/taxonomy-methods.Rd | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd index 3617c6e2b..bae2fb1f4 100644 --- a/man/taxonomy-methods.Rd +++ b/man/taxonomy-methods.Rd @@ -78,7 +78,14 @@ value of \code{taxonomyRanks()} function.} regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be removed if \code{na.rm = TRUE} before agglomeration.} -\item{...}{optional arguments not used currently.} +\item{...}{additional arguments +\itemize{ +\item \code{lowest.rank}: A lowest taxonomy level to be considered in +\code{getTaxonomyLabels}. Ranks lower than this will be collapsed into rank +specified by \code{lowest.rank}. For example, if genus level is specified, +species will be collapsed into genus. If \code{NULL}, the data is not +collapsed. (Default: \code{NULL}) +}} \item{ranks}{\code{Character vector}. A vector of ranks to be set.}