From 964e2faf8c06b93a73400f08a2fc574145adca82 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Fri, 17 Jun 2022 11:37:55 -0400 Subject: [PATCH 01/13] Update `case_when()` to use `vec_case_when()` --- R/case_when.R | 264 ++++++++-------- R/vec-case-when.R | 215 +++++++++++++ man/case_when.Rd | 119 ++++---- tests/testthat/_snaps/case-when.md | 59 +++- tests/testthat/_snaps/vec-case-when.md | 239 +++++++++++++++ tests/testthat/test-case-when.R | 83 ++++- tests/testthat/test-vec-case-when.R | 403 +++++++++++++++++++++++++ 7 files changed, 1180 insertions(+), 202 deletions(-) create mode 100644 R/vec-case-when.R create mode 100644 tests/testthat/_snaps/vec-case-when.md create mode 100644 tests/testthat/test-vec-case-when.R diff --git a/R/case_when.R b/R/case_when.R index 4c86c84f8c..9fd2e12f86 100644 --- a/R/case_when.R +++ b/R/case_when.R @@ -1,95 +1,94 @@ -#' A general vectorised if +#' A general vectorised if-else #' -#' This function allows you to vectorise multiple [if_else()] -#' statements. It is an R equivalent of the SQL `CASE WHEN` statement. -#' If no cases match, `NA` is returned. +#' This function allows you to vectorise multiple [if_else()] statements. It is +#' an R equivalent of the SQL `CASE WHEN` statement. If no cases match, a +#' missing value is returned unless a `.default` is supplied. #' -#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of two-sided formulas. The left hand side (LHS) -#' determines which values match this case. The right hand side (RHS) -#' provides the replacement value. +#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of two-sided +#' formulas. The left hand side (LHS) determines which values match this case. +#' The right hand side (RHS) provides the replacement value. #' -#' The LHS must evaluate to a logical vector. The RHS does not need to be -#' logical, but all RHSs must evaluate to the same type of vector. +#' The LHS inputs must evaluate to logical vectors. #' -#' Both LHS and RHS may have the same length of either 1 or `n`. The -#' value of `n` must be consistent across all cases. The case of -#' `n == 0` is treated as a variant of `n != 1`. +#' The RHS inputs will be coerced to their common type. +#' +#' All inputs will be recycled to their common size. That said, we encourage +#' all LHS inputs to be the same size, and encourage the RHS inputs to be +#' either size 1 or the same size as the LHS inputs. This produces the most +#' predictable and stable code. #' #' `NULL` inputs are ignored. +#' +#' @param .default The value used when all of the LHS inputs return either +#' `FALSE` or `NA`. +#' +#' `.default` must be size 1 or the same size as the common size computed +#' from `...`. +#' +#' `.default` participates in the computation of the common type with the RHS +#' inputs. +#' +#' If `NULL`, the default, a missing value will be used. +#' +#' @param .ptype An optional prototype declaring the desired output type. If +#' supplied, this overrides the common type of the RHS inputs. +#' +#' @param .size An optional size declaring the desired output size. If supplied, +#' this overrides the common size computed from `...`. +#' +#' @return A vector with the same size as the common size computed from the +#' inputs in `...` and the same type as the common type of the RHS inputs +#' in `...`. +#' #' @export -#' @return A vector of length 1 or `n`, matching the length of the logical -#' input or output vectors, with the type (and attributes) of the first -#' RHS. Inconsistent lengths or types will generate an error. #' @examples -#' x <- 1:50 +#' x <- 1:70 #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", -#' TRUE ~ as.character(x) +#' .default = as.character(x) #' ) #' #' # Like an if statement, the arguments are evaluated in order, so you must #' # proceed from the most specific to the most general. This won't work: #' case_when( -#' TRUE ~ as.character(x), -#' x %% 5 == 0 ~ "fizz", -#' x %% 7 == 0 ~ "buzz", -#' x %% 35 == 0 ~ "fizz buzz" -#' ) -#' -#' # If none of the cases match, NA is used: -#' case_when( #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", -#' x %% 35 == 0 ~ "fizz buzz" +#' x %% 35 == 0 ~ "fizz buzz", +#' .default = as.character(x) #' ) #' -#' # Note that NA values in the vector x do not get special treatment. If you want -#' # to explicitly handle NA values you can use the `is.na` function: -#' x[2:4] <- NA_real_ +#' # If none of the cases match and no `.default` is supplied, NA is used: #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", -#' is.na(x) ~ "nope", -#' TRUE ~ as.character(x) #' ) #' -#' # All RHS values need to be of the same type. Inconsistent types will throw an error. -#' # This applies also to NA values used in RHS: NA is logical, use -#' # typed values like NA_real_, NA_complex, NA_character_, NA_integer_ as appropriate. +#' # Note that `NA` values on the LHS are treated like `FALSE` and will be +#' # assigned the `.default` value. You must handle them explicitly if you +#' # want to use a different value. The exact way to handle missing values is +#' # dependent on the set of LHS conditions you use. +#' x[2:4] <- NA_real_ #' case_when( -#' x %% 35 == 0 ~ NA_character_, +#' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", -#' TRUE ~ as.character(x) -#' ) -#' case_when( -#' x %% 35 == 0 ~ 35, -#' x %% 5 == 0 ~ 5, -#' x %% 7 == 0 ~ 7, -#' TRUE ~ NA_real_ +#' is.na(x) ~ "nope", +#' .default = as.character(x) #' ) #' -#' # case_when() evaluates all RHS expressions, and then constructs its +#' # `case_when()` evaluates all RHS expressions, and then constructs its #' # result by extracting the selected (via the LHS expressions) parts. -#' # In particular NaNs are produced in this case: +#' # In particular `NaN`s are produced in this case: #' y <- seq(-2, 2, by = .5) #' case_when( #' y >= 0 ~ sqrt(y), -#' TRUE ~ y +#' .default = y #' ) #' -#' # This throws an error as NA is logical not numeric -#' try(case_when( -#' x %% 35 == 0 ~ 35, -#' x %% 5 == 0 ~ 5, -#' x %% 7 == 0 ~ 7, -#' TRUE ~ NA -#' )) -#' -#' # case_when is particularly useful inside mutate when you want to +#' # `case_when()` is particularly useful inside `mutate()` when you want to #' # create a new variable that relies on a complex combination of existing #' # variables #' starwars %>% @@ -97,8 +96,8 @@ #' mutate( #' type = case_when( #' height > 200 | mass > 200 ~ "large", -#' species == "Droid" ~ "robot", -#' TRUE ~ "other" +#' species == "Droid" ~ "robot", +#' .default = "other" #' ) #' ) #' @@ -109,8 +108,8 @@ #' case_character_type <- function(height, mass, species) { #' case_when( #' height > 200 | mass > 200 ~ "large", -#' species == "Droid" ~ "robot", -#' TRUE ~ "other" +#' species == "Droid" ~ "robot", +#' .default = "other" #' ) #' } #' @@ -128,66 +127,113 @@ #' # no `else` clause: #' case_character_type <- function(height, mass, species, robots = TRUE) { #' case_when( -#' height > 200 | mass > 200 ~ "large", +#' height > 200 | mass > 200 ~ "large", #' if (robots) species == "Droid" ~ "robot", -#' TRUE ~ "other" +#' .default = "other" #' ) #' } #' #' starwars %>% #' mutate(type = case_character_type(height, mass, species, robots = FALSE)) %>% #' pull(type) -case_when <- function(...) { - fs <- compact_null(list2(...)) - n <- length(fs) +case_when <- function(..., + .default = NULL, + .ptype = NULL, + .size = NULL) { + args <- list2(...) + default_env <- caller_env() + dots_env <- current_env() error_call <- current_env() - if (n == 0) { - abort("No cases provided.", call = error_call) - } - query <- vector("list", n) - value <- vector("list", n) + args <- case_when_formula_evaluate( + args = args, + size = .size, + default_env = default_env, + dots_env = dots_env, + error_call = error_call + ) + + conditions <- args$conditions + values <- args$values + + vec_case_when( + conditions = conditions, + values = values, + default = .default, + default_arg = ".default", + ptype = .ptype, + size = .size + ) +} + +case_when_formula_evaluate <- function(args, + size, + default_env, + dots_env, + error_call) { + # `case_when()`'s formula interface compacts `NULL`s + args <- compact_null(args) + n_args <- length(args) + + conditions <- vector("list", n_args) + values <- vector("list", n_args) - default_env <- caller_env() quos_pairs <- map2( - fs, seq_along(fs), - validate_formula, default_env = default_env, dots_env = current_env(), error_call = error_call + .x = args, + .y = seq_len(n_args), + .f = validate_and_split_formula, + default_env = default_env, + dots_env = dots_env, + error_call = error_call ) - for (i in seq_len(n)) { + for (i in seq_len(n_args)) { pair <- quos_pairs[[i]] - query[[i]] <- eval_tidy(pair$lhs, env = default_env) - value[[i]] <- eval_tidy(pair$rhs, env = default_env) - - if (!is.logical(query[[i]])) { - abort_case_when_logical(pair$lhs, i, query[[i]], error_call = error_call) - } + conditions[[i]] <- eval_tidy(pair$lhs, env = default_env) + values[[i]] <- eval_tidy(pair$rhs, env = default_env) } - m <- validate_case_when_length(query, value, fs, error_call = error_call) + # Add the expressions as names for `conditions` and `values` for nice errors. + # These names also get passed on to `vec_case_when()`. + condition_names <- map(quos_pairs, function(pair) pair$lhs) + condition_names <- map_chr(condition_names, as_label) + names(conditions) <- condition_names - out <- value[[1]][rep(NA_integer_, m)] - replaced <- rep(FALSE, m) + value_names <- map(quos_pairs, function(pair) pair$rhs) + value_names <- map_chr(value_names, as_label) + names(values) <- value_names - for (i in seq_len(n)) { - out <- replace_with(out, query[[i]] & !replaced, value[[i]], NULL, error_call = error_call) - replaced <- replaced | (query[[i]] & !is.na(query[[i]])) - } + # `case_when()`'s formula interface finds the common size of ALL of its inputs. + # This is what allows `TRUE ~` to work. + size <- vec_size_common(!!!conditions, !!!values, .size = size, .call = error_call) - out + conditions <- vec_recycle_common(!!!conditions, .size = size, .call = error_call) + values <- vec_recycle_common(!!!values, .size = size, .call = error_call) + + list( + conditions = conditions, + values = values + ) } -validate_formula <- function(x, i, default_env, dots_env, error_call = caller_env()) { - # Formula might be quosured +validate_and_split_formula <- function(x, + i, + default_env, + dots_env, + error_call) { if (is_quosure(x)) { + # We specially handle quosures, assuming they hold formulas default_env <- quo_get_env(x) x <- quo_get_expr(x) } if (!is_formula(x)) { - arg <- substitute(...(), dots_env)[[1]] - abort_case_when_formula(arg, i, x, error_call = error_call) + arg <- substitute(...(), dots_env)[[i]] + deparsed <- fmt_obj1(deparse_trunc(arg)) + type <- friendly_type_of(x) + message <- glue("Case {i} ({deparsed}) must be a two-sided formula, not {type}.") + abort(message, call = error_call) } if (is_null(f_lhs(x))) { abort("Formulas must be two-sided.", call = error_call) @@ -201,41 +247,3 @@ validate_formula <- function(x, i, default_env, dots_env, error_call = caller_en rhs = new_quosure(f_rhs(x), env) ) } - -abort_case_when_formula <- function(arg, i, obj, error_call = caller_env()) { - deparsed <- fmt_obj1(deparse_trunc(arg)) - type <- friendly_type_of(obj) - msg <- glue("Case {i} ({deparsed}) must be a two-sided formula, not {type}.") - abort(msg, call = error_call) -} - -abort_case_when_logical <- function(lhs, i, query, error_call = caller_env()) { - deparsed <- fmt_obj1(deparse_trunc(quo_squash(lhs))) - type <- friendly_type_of(query) - msg <- glue("LHS of case {i} ({deparsed}) must be a logical vector, not {type}.") - abort(msg, call = error_call) -} - -validate_case_when_length <- function(query, value, fs, error_call = caller_env()) { - lhs_lengths <- lengths(query) - rhs_lengths <- lengths(value) - all_lengths <- unique(c(lhs_lengths, rhs_lengths)) - - if (length(all_lengths) <= 1) { - return(all_lengths[[1]]) - } - - non_atomic_lengths <- all_lengths[all_lengths != 1] - len <- non_atomic_lengths[[1]] - - if (length(non_atomic_lengths) == 1) { - return(len) - } - - inconsistent_lengths <- non_atomic_lengths[-1] - lhs_problems <- lhs_lengths %in% inconsistent_lengths - rhs_problems <- rhs_lengths %in% inconsistent_lengths - problems <- lhs_problems | rhs_problems - - check_length_val(inconsistent_lengths, len, header = fmt_calls(fs[problems]), error_call = error_call) -} diff --git a/R/vec-case-when.R b/R/vec-case-when.R new file mode 100644 index 0000000000..e2ca3526df --- /dev/null +++ b/R/vec-case-when.R @@ -0,0 +1,215 @@ +vec_case_when <- function(conditions, + values, + ..., + conditions_arg = "", + values_arg = "", + default = NULL, + default_arg = "default", + ptype = NULL, + size = NULL, + call = caller_env()) { + check_dots_empty0(..., call = call) + + vec_check_list(conditions, arg = "conditions", call = call) + vec_check_list(values, arg = "values", call = call) + + list_check_all_vectors(values, arg = values_arg, call = call) + + n_conditions <- length(conditions) + n_values <- length(values) + + if (n_conditions != n_values) { + message <- glue( + "The number of supplied conditions ({n_conditions}) must equal ", + "the number of supplied values ({n_values})." + ) + abort(message, call = call) + } + if (n_conditions == 0L) { + abort("At least one condition must be supplied.", call = call) + } + + if (!is_string(conditions_arg)) { + abort("`conditions_arg` must be a string.", call = call) + } + if (!is_string(values_arg)) { + abort("`values_arg` must be a string.", call = call) + } + if (!is_string(default_arg)) { + abort("`default_arg` must be a string.", call = call) + } + + condition_args <- list_names(conditions, arg = conditions_arg) + value_args <- list_names(values, arg = values_arg) + + names(conditions) <- condition_args + names(values) <- value_args + + for (i in seq_len(n_conditions)) { + condition <- conditions[[i]] + condition_arg <- condition_args[[i]] + + vec_assert( + x = condition, + ptype = logical(), + arg = condition_arg, + call = call + ) + } + + size <- vec_size_common( + !!!conditions, + .size = size, + .call = call + ) + + # Allow `default` to participate in common type determination. + # In terms of size/ptype behavior it is exactly like any other `values` element. + # Have to collect inputs and splice them in all at once due to + # https://github.com/r-lib/vctrs/issues/1570 + extras <- list(default) + names(extras) <- default_arg + everything <- c(values, extras) + + ptype <- vec_ptype_common( + !!!everything, + .ptype = ptype, + .call = call + ) + + # Cast early to generate correct error message indices + values <- vec_cast_common( + !!!values, + .to = ptype, + .call = call + ) + + if (is.null(default)) { + default <- vec_init(ptype) + } else { + default <- vec_cast( + x = default, + to = ptype, + x_arg = default_arg, + call = call + ) + } + + # Check for correct sizes + for (i in seq_len(n_conditions)) { + condition <- conditions[[i]] + condition_arg <- condition_args[[i]] + + vec_assert(condition, size = size, arg = condition_arg, call = call) + } + + value_sizes <- list_sizes(values) + + for (i in seq_len(n_values)) { + value_size <- value_sizes[[i]] + + if (value_size != 1L) { + value <- values[[i]] + value_arg <- value_args[[i]] + + vec_assert(value, size = size, arg = value_arg, call = call) + } + } + + default_size <- vec_size(default) + if (default_size != 1L) { + vec_assert(default, size = size, arg = default_arg, call = call) + } + + n_used <- 0L + locs <- vector("list", n_values) + + # Starts as unused. Any `TRUE` value in `condition` flips it to used. + are_unused <- vec_rep(TRUE, times = size) + + for (i in seq_len(n_conditions)) { + if (!any(are_unused)) { + # Early exit if all values are matched, for performance + break + } + + condition <- conditions[[i]] + + # Treat `NA` in `condition` as `FALSE`. + # `TRUE & NA == NA`, `FALSE & NA == FALSE`. + # `which()` drops `NA`s + loc <- are_unused & condition + loc <- which(loc) + + locs[[i]] <- loc + + are_unused[loc] <- FALSE + n_used <- n_used + 1L + } + + if (n_used == n_conditions && any(are_unused)) { + # If all of the `conditions` are used, then we check if we need `default` + are_unused <- which(are_unused) + + n_used <- n_used + 1L + n_values <- n_values + 1L + + locs[[n_values]] <- are_unused + values[[n_values]] <- default + value_sizes[[n_values]] <- default_size + } + + for (i in seq_len(n_used)) { + loc <- locs[[i]] + value <- values[[i]] + value_size <- value_sizes[[i]] + + if (value_size == 1L) { + # Recycle "up" + value <- vec_recycle(value, size = vec_size(loc)) + } else { + # Slice "down" + value <- vec_slice(value, loc) + } + + values[[i]] <- value + } + + # Remove names used for error messages. We don't want them in the result. + values <- unname(values) + + if (n_used != n_values) { + # Trim to only what will be used to fill the result + seq_used <- seq_len(n_used) + values <- values[seq_used] + locs <- locs[seq_used] + } + + vec_unchop( + x = values, + indices = locs, + ptype = ptype + ) +} + +list_names <- function(x, arg = "") { + names <- names2(x) + unnamed <- names == "" + + if (arg == "") { + loc_unnamed <- which(unnamed) + names[loc_unnamed] <- vec_paste0("..", loc_unnamed) + } else { + loc_named <- which(!unnamed) + loc_unnamed <- which(unnamed) + names[loc_named] <- vec_paste0(arg, "$", names[loc_named]) + names[loc_unnamed] <- vec_paste0(arg, "[[", loc_unnamed, "]]") + } + + names +} + +vec_paste0 <- function (...) { + args <- vec_recycle_common(...) + exec(paste0, !!!args) +} diff --git a/man/case_when.Rd b/man/case_when.Rd index 993b1d89c5..5f5bd7c30f 100644 --- a/man/case_when.Rd +++ b/man/case_when.Rd @@ -2,104 +2,101 @@ % Please edit documentation in R/case_when.R \name{case_when} \alias{case_when} -\title{A general vectorised if} +\title{A general vectorised if-else} \usage{ -case_when(...) +case_when(..., .default = NULL, .ptype = NULL, .size = NULL) } \arguments{ -\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A sequence of two-sided formulas. The left hand side (LHS) -determines which values match this case. The right hand side (RHS) -provides the replacement value. +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A sequence of two-sided +formulas. The left hand side (LHS) determines which values match this case. +The right hand side (RHS) provides the replacement value. -The LHS must evaluate to a logical vector. The RHS does not need to be -logical, but all RHSs must evaluate to the same type of vector. +The LHS inputs must evaluate to logical vectors. -Both LHS and RHS may have the same length of either 1 or \code{n}. The -value of \code{n} must be consistent across all cases. The case of -\code{n == 0} is treated as a variant of \code{n != 1}. +The RHS inputs will be coerced to their common type. + +All inputs will be recycled to their common size. That said, we encourage +all LHS inputs to be the same size, and encourage the RHS inputs to be +either size 1 or the same size as the LHS inputs. This produces the most +predictable and stable code. \code{NULL} inputs are ignored.} + +\item{.default}{The value used when all of the LHS inputs return either +\code{FALSE} or \code{NA}. + +\code{.default} must be size 1 or the same size as the common size computed +from \code{...}. + +\code{.default} participates in the computation of the common type with the RHS +inputs. + +If \code{NULL}, the default, a missing value will be used.} + +\item{.ptype}{An optional prototype declaring the desired output type. If +supplied, this overrides the common type of the RHS inputs.} + +\item{.size}{An optional size declaring the desired output size. If supplied, +this overrides the common size computed from \code{...}.} } \value{ -A vector of length 1 or \code{n}, matching the length of the logical -input or output vectors, with the type (and attributes) of the first -RHS. Inconsistent lengths or types will generate an error. +A vector with the same size as the common size computed from the +inputs in \code{...} and the same type as the common type of the RHS inputs +in \code{...}. } \description{ -This function allows you to vectorise multiple \code{\link[=if_else]{if_else()}} -statements. It is an R equivalent of the SQL \verb{CASE WHEN} statement. -If no cases match, \code{NA} is returned. +This function allows you to vectorise multiple \code{\link[=if_else]{if_else()}} statements. It is +an R equivalent of the SQL \verb{CASE WHEN} statement. If no cases match, a +missing value is returned unless a \code{.default} is supplied. } \examples{ -x <- 1:50 +x <- 1:70 case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", - TRUE ~ as.character(x) + .default = as.character(x) ) # Like an if statement, the arguments are evaluated in order, so you must # proceed from the most specific to the most general. This won't work: -case_when( - TRUE ~ as.character(x), - x \%\% 5 == 0 ~ "fizz", - x \%\% 7 == 0 ~ "buzz", - x \%\% 35 == 0 ~ "fizz buzz" -) - -# If none of the cases match, NA is used: case_when( x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", - x \%\% 35 == 0 ~ "fizz buzz" + x \%\% 35 == 0 ~ "fizz buzz", + .default = as.character(x) ) -# Note that NA values in the vector x do not get special treatment. If you want -# to explicitly handle NA values you can use the `is.na` function: -x[2:4] <- NA_real_ +# If none of the cases match and no `.default` is supplied, NA is used: case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", - is.na(x) ~ "nope", - TRUE ~ as.character(x) ) -# All RHS values need to be of the same type. Inconsistent types will throw an error. -# This applies also to NA values used in RHS: NA is logical, use -# typed values like NA_real_, NA_complex, NA_character_, NA_integer_ as appropriate. +# Note that `NA` values on the LHS are treated like `FALSE` and will be +# assigned the `.default` value. You must handle them explicitly if you +# want to use a different value. The exact way to handle missing values is +# dependent on the set of LHS conditions you use. +x[2:4] <- NA_real_ case_when( - x \%\% 35 == 0 ~ NA_character_, + x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", - TRUE ~ as.character(x) -) -case_when( - x \%\% 35 == 0 ~ 35, - x \%\% 5 == 0 ~ 5, - x \%\% 7 == 0 ~ 7, - TRUE ~ NA_real_ + is.na(x) ~ "nope", + .default = as.character(x) ) -# case_when() evaluates all RHS expressions, and then constructs its +# `case_when()` evaluates all RHS expressions, and then constructs its # result by extracting the selected (via the LHS expressions) parts. -# In particular NaNs are produced in this case: +# In particular `NaN`s are produced in this case: y <- seq(-2, 2, by = .5) case_when( y >= 0 ~ sqrt(y), - TRUE ~ y + .default = y ) -# This throws an error as NA is logical not numeric -try(case_when( - x \%\% 35 == 0 ~ 35, - x \%\% 5 == 0 ~ 5, - x \%\% 7 == 0 ~ 7, - TRUE ~ NA -)) - -# case_when is particularly useful inside mutate when you want to +# `case_when()` is particularly useful inside `mutate()` when you want to # create a new variable that relies on a complex combination of existing # variables starwars \%>\% @@ -107,8 +104,8 @@ starwars \%>\% mutate( type = case_when( height > 200 | mass > 200 ~ "large", - species == "Droid" ~ "robot", - TRUE ~ "other" + species == "Droid" ~ "robot", + .default = "other" ) ) @@ -119,8 +116,8 @@ starwars \%>\% case_character_type <- function(height, mass, species) { case_when( height > 200 | mass > 200 ~ "large", - species == "Droid" ~ "robot", - TRUE ~ "other" + species == "Droid" ~ "robot", + .default = "other" ) } @@ -138,9 +135,9 @@ starwars \%>\% # no `else` clause: case_character_type <- function(height, mass, species, robots = TRUE) { case_when( - height > 200 | mass > 200 ~ "large", + height > 200 | mass > 200 ~ "large", if (robots) species == "Droid" ~ "robot", - TRUE ~ "other" + .default = "other" ) } diff --git a/tests/testthat/_snaps/case-when.md b/tests/testthat/_snaps/case-when.md index 82569b30be..aebf61aabf 100644 --- a/tests/testthat/_snaps/case-when.md +++ b/tests/testthat/_snaps/case-when.md @@ -1,36 +1,81 @@ +# `.default` isn't part of recycling + + Code + case_when(FALSE ~ 1L, .default = 2:5) + Condition + Error in `case_when()`: + ! `.default` must have size 1, not size 4. + +# `.default` is part of common type computation + + Code + case_when(TRUE ~ 1L, .default = "x") + Condition + Error in `case_when()`: + ! Can't combine `1L` and `.default` . + +# passes through `.size` correctly + + Code + case_when(TRUE ~ 1:2, .size = 3) + Condition + Error in `case_when()`: + ! Can't recycle `1:2` (size 2) to size 3. + +# invalid type errors are correct (#6261) (#6206) + + Code + case_when(TRUE ~ 1, TRUE ~ "x") + Condition + Error in `case_when()`: + ! Can't combine `1` and `"x"` . + # case_when() give meaningful errors Code (expect_error(case_when(c(TRUE, FALSE) ~ 1:3, c(FALSE, TRUE) ~ 1:2))) Output - + Error in `case_when()`: - ! `c(TRUE, FALSE) ~ 1:3` must be length 2 or one, not 3. + ! Can't recycle `c(TRUE, FALSE)` (size 2) to match `1:3` (size 3). Code (expect_error(case_when(c(TRUE, FALSE) ~ 1, c(FALSE, TRUE, FALSE) ~ 2, c(FALSE, TRUE, FALSE, NA) ~ 3))) Output - + Error in `case_when()`: - ! `c(FALSE, TRUE, FALSE) ~ 2`, `c(FALSE, TRUE, FALSE, NA) ~ 3` must be length 2 or one, not 3, 4. + ! Can't recycle `c(TRUE, FALSE)` (size 2) to match `c(FALSE, TRUE, FALSE)` (size 3). Code (expect_error(case_when(50 ~ 1:3))) Output - + Error in `case_when()`: - ! LHS of case 1 (`50`) must be a logical vector, not a double vector. + ! `50` must be a vector with type . + Instead, it has type . Code (expect_error(case_when(paste(50)))) Output Error in `case_when()`: ! Case 1 (`paste(50)`) must be a two-sided formula, not a character vector. + Code + (expect_error(case_when(y ~ x, paste(50)))) + Output + + Error in `case_when()`: + ! Case 2 (`paste(50)`) must be a two-sided formula, not a character vector. Code (expect_error(case_when())) Output Error in `case_when()`: - ! No cases provided. + ! At least one condition must be supplied. + Code + (expect_error(case_when(NULL))) + Output + + Error in `case_when()`: + ! At least one condition must be supplied. Code (expect_error(case_when(~ 1:2))) Output diff --git a/tests/testthat/_snaps/vec-case-when.md b/tests/testthat/_snaps/vec-case-when.md new file mode 100644 index 0000000000..220b8d82bd --- /dev/null +++ b/tests/testthat/_snaps/vec-case-when.md @@ -0,0 +1,239 @@ +# `conditions` inputs can be size zero + + Code + vec_case_when(list(logical()), list(1:2)) + Condition + Error: + ! `..1` must have size 0, not size 2. + +# `values` are cast to their common type + + Code + vec_case_when(list(FALSE, TRUE), list(1, "x")) + Condition + Error: + ! Can't combine `..1` and `..2` . + +# `values` must be size 1 or same size as the `conditions` + + Code + vec_case_when(list(c(TRUE, FALSE, TRUE, TRUE)), list(1:3)) + Condition + Error: + ! `..1` must have size 4, not size 3. + +# `default` must be size 1 or same size as `conditions` (exact same as any other `values` input) + + Code + vec_case_when(list(FALSE), list(1L), default = 2:3) + Condition + Error: + ! `default` must have size 1, not size 2. + +# `default_arg` can be customized + + Code + vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") + Condition + Error: + ! `foo` must have size 1, not size 2. + +--- + + Code + vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") + Condition + Error: + ! Can't combine `..1` and `foo` . + +# `conditions_arg` is validated + + Code + vec_case_when(list(TRUE), list(1), conditions_arg = 1) + Condition + Error: + ! `conditions_arg` must be a string. + +# `values_arg` is validated + + Code + vec_case_when(list(TRUE), list(1), values_arg = 1) + Condition + Error: + ! `values_arg` must be a string. + +# `default_arg` is validated + + Code + vec_case_when(list(TRUE), list(1), default_arg = 1) + Condition + Error: + ! `default_arg` must be a string. + +# `conditions` must all be the same size + + Code + vec_case_when(list(c(TRUE, FALSE), TRUE), list(1, 2)) + Condition + Error: + ! `..2` must have size 2, not size 1. + +--- + + Code + vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2)) + Condition + Error: + ! Can't recycle `..1` (size 2) to match `..2` (size 3). + +# `conditions` must be logical (and aren't cast to logical!) + + Code + vec_case_when(list(1), list(2)) + Condition + Error: + ! `..1` must be a vector with type . + Instead, it has type . + +--- + + Code + vec_case_when(list(TRUE, 3.5), list(2, 4)) + Condition + Error: + ! `..2` must be a vector with type . + Instead, it has type . + +# `size` overrides the `conditions` sizes + + Code + vec_case_when(list(TRUE), list(1), size = 5) + Condition + Error: + ! `..1` must have size 5, not size 1. + +--- + + Code + vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2), size = 2) + Condition + Error: + ! `..2` must have size 2, not size 3. + +# `ptype` overrides the `values` types + + Code + vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) + Condition + Error: + ! Can't convert `..1` to . + +# number of `conditions` and `values` must be the same + + Code + vec_case_when(list(TRUE), list()) + Condition + Error: + ! The number of supplied conditions (1) must equal the number of supplied values (0). + +--- + + Code + vec_case_when(list(TRUE, TRUE), list(1)) + Condition + Error: + ! The number of supplied conditions (2) must equal the number of supplied values (1). + +# can't have empty inputs + + Code + vec_case_when(list(), list()) + Condition + Error: + ! At least one condition must be supplied. + +--- + + Code + vec_case_when(list(), list(), default = 1) + Condition + Error: + ! At least one condition must be supplied. + +# dots must be empty + + Code + vec_case_when(list(TRUE), list(1), 2) + Condition + Error: + ! `...` must be empty. + x Problematic argument: + * ..1 = 2 + i Did you forget to name an argument? + +# `conditions` must be a list + + Code + vec_case_when(1, list(2)) + Condition + Error: + ! `conditions` must be a list, not a number. + +# `values` must be a list + + Code + vec_case_when(list(TRUE), 1) + Condition + Error: + ! `values` must be a list, not a number. + +# named inputs show up in the error message + + Code + vec_case_when(list(x = 1.5), list(1)) + Condition + Error: + ! `x` must be a vector with type . + Instead, it has type . + +--- + + Code + vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") + Condition + Error: + ! `foo$x` must be a vector with type . + Instead, it has type . + +--- + + Code + vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) + Condition + Error: + ! `x` must have size 2, not size 1. + +--- + + Code + vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") + Condition + Error: + ! `foo$x` must have size 2, not size 1. + +--- + + Code + vec_case_when(list(TRUE, FALSE), list(1, x = "y")) + Condition + Error: + ! Can't combine `..1` and `x` . + +--- + + Code + vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") + Condition + Error: + ! Can't combine `foo[[1]]` and `foo$x` . + diff --git a/tests/testthat/test-case-when.R b/tests/testthat/test-case-when.R index 904855a7a4..9cff1dca83 100644 --- a/tests/testthat/test-case-when.R +++ b/tests/testthat/test-case-when.R @@ -43,6 +43,31 @@ test_that("NA conditions (#2927)", { ) }) +test_that("any `TRUE` overrides an `NA`", { + x <- c(1, 2, NA, 3) + expect <- c("one", "not_one", "missing", "not_one") + + # `TRUE` overriding before the `NA` + expect_identical( + case_when( + is.na(x) ~ "missing", + x == 1 ~ "one", + .default = "not_one" + ), + expect + ) + + # `TRUE` overriding after the `NA` + expect_identical( + case_when( + x == 1 ~ "one", + is.na(x) ~ "missing", + .default = "not_one" + ), + expect + ) +}) + test_that("atomic conditions (#2909)", { expect_equal( case_when( @@ -79,7 +104,7 @@ test_that("zero-length conditions and values (#3041)", { test_that("case_when can be used in anonymous functions (#3422)", { res <- tibble(a = 1:3) %>% - mutate(b = (function(x) case_when(x < 2 ~ TRUE, TRUE ~ FALSE))(a)) %>% + mutate(b = (function(x) case_when(x < 2 ~ TRUE, .default = FALSE))(a)) %>% pull() expect_equal(res, c(TRUE, FALSE, FALSE)) }) @@ -87,9 +112,9 @@ test_that("case_when can be used in anonymous functions (#3422)", { test_that("case_when() can be used inside mutate()", { out <- mtcars[1:4, ] %>% mutate(out = case_when( - cyl == 4 ~ 1, - .data[["am"]] == 1 ~ 2, - TRUE ~ 0 + cyl == 4 ~ 1, + .data[["am"]] == 1 ~ 2, + .default = 0 )) %>% pull() expect_identical(out, c(2, 2, 1, 0)) @@ -163,7 +188,7 @@ test_that("NULL inputs are compacted", { out <- case_when( x == 2 ~ TRUE, if (bool) x == 3 ~ NA, - TRUE ~ FALSE + .default = FALSE ) expect_identical(out, c(FALSE, TRUE, FALSE)) @@ -171,14 +196,54 @@ test_that("NULL inputs are compacted", { out <- case_when( x == 2 ~ TRUE, if (bool) x == 3 ~ NA, - TRUE ~ FALSE + .default = FALSE ) expect_identical(out, c(FALSE, TRUE, NA)) }) +test_that("passes through `.default` correctly", { + expect_identical(case_when(FALSE ~ 1, .default = 2), 2) + expect_identical(case_when(FALSE ~ 1:5, .default = 2), rep(2, 5)) + expect_identical(case_when(FALSE ~ 1:5, .default = 2:6), 2:6) +}) + +test_that("`.default` isn't part of recycling", { + # Because eventually we want to only take the output size from the LHS conditions, + # so having `.default` participate in the common size is a step in the wrong + # direction + expect_snapshot(error = TRUE, { + case_when(FALSE ~ 1L, .default = 2:5) + }) +}) + +test_that("`.default` is part of common type computation", { + expect_identical(case_when(TRUE ~ 1L, .default = 2), 1) + + expect_snapshot(error = TRUE, { + case_when(TRUE ~ 1L, .default = "x") + }) +}) + +test_that("passes through `.ptype` correctly", { + expect_identical(case_when(TRUE ~ 1, .ptype = integer()), 1L) +}) + +test_that("passes through `.size` correctly", { + expect_identical(case_when(TRUE ~ 1, .size = 2), c(1, 1)) + + expect_snapshot(error = TRUE, { + case_when(TRUE ~ 1:2, .size = 3) + }) +}) # Errors ------------------------------------------------------------------ +test_that("invalid type errors are correct (#6261) (#6206)", { + expect_snapshot(error = TRUE, { + case_when(TRUE ~ 1, TRUE ~ "x") + }) +}) + test_that("case_when() give meaningful errors", { expect_snapshot({ (expect_error( @@ -202,9 +267,15 @@ test_that("case_when() give meaningful errors", { (expect_error( case_when(paste(50)) )) + (expect_error( + case_when(y ~ x, paste(50)) + )) (expect_error( case_when() )) + (expect_error( + case_when(NULL) + )) (expect_error( case_when(~1:2) )) diff --git a/tests/testthat/test-vec-case-when.R b/tests/testthat/test-vec-case-when.R new file mode 100644 index 0000000000..869420a11d --- /dev/null +++ b/tests/testthat/test-vec-case-when.R @@ -0,0 +1,403 @@ +test_that("works with data frames", { + conditions <- list( + c(FALSE, TRUE, FALSE, FALSE), + c(TRUE, TRUE, FALSE, FALSE), + c(FALSE, TRUE, FALSE, TRUE) + ) + values <- list( + data_frame(x = 1, y = 2), + data_frame(x = 3, y = 4), + data_frame(x = 3:6, y = 4:7) + ) + + out <- vec_case_when(conditions, values) + + expect_identical( + out, + data_frame( + x = c(3, 1, NA, 6), + y = c(4, 2, NA, 7) + ) + ) +}) + +test_that("first `TRUE` case wins", { + conditions <- list( + c(TRUE, FALSE), + c(TRUE, TRUE), + c(TRUE, TRUE) + ) + values <- list( + 1, + 2, + 3 + ) + + expect_identical( + vec_case_when(conditions, values), + c(1, 2) + ) +}) + +test_that("can replace missing values by catching with `is.na()`", { + x <- c(1:3, NA) + + conditions <- list( + x <= 1, + x <= 2, + is.na(x) + ) + values <- list( + 1, + 2, + 0 + ) + + expect_identical( + vec_case_when(conditions, values), + c(1, 2, NA, 0) + ) +}) + +test_that("Unused logical `NA` can still be cast to `values` ptype", { + # Requires that casting happen before recycling, because it recycles + # to size zero, resulting in a logical rather than an unspecified. + expect_identical(vec_case_when(list(TRUE, FALSE), list("x", NA)), "x") + expect_identical(vec_case_when(list(FALSE, TRUE), list("x", NA)), NA_character_) +}) + +test_that("`conditions` inputs can be size zero", { + expect_identical( + vec_case_when( + list(logical(), logical()), + list(1, 2) + ), + numeric() + ) + + expect_snapshot(error = TRUE, { + vec_case_when(list(logical()), list(1:2)) + }) +}) + +test_that("retains names of `values` inputs", { + value1 <- c(x = 1, y = 2) + value2 <- c(z = 3, w = 4) + + out <- vec_case_when( + list(c(TRUE, FALSE), c(TRUE, TRUE)), + list(value1, value2) + ) + + expect_named(out, c("x", "w")) +}) + +test_that("`values` are cast to their common type", { + expect_identical(vec_case_when(list(FALSE, TRUE), list(1, 2L)), 2) + expect_identical(vec_case_when(list(FALSE, TRUE), list(1, NA)), NA_real_) + + expect_snapshot(error = TRUE, { + vec_case_when(list(FALSE, TRUE), list(1, "x")) + }) +}) + +test_that("`values` must be size 1 or same size as the `conditions`", { + expect_identical( + vec_case_when( + list(c(TRUE, TRUE)), + list(1) + ), + c(1, 1) + ) + expect_identical( + vec_case_when( + list(c(TRUE, FALSE), c(TRUE, TRUE)), + list(c(1, 2), c(3, 4)) + ), + c(1, 4) + ) + + expect_snapshot(error = TRUE, { + vec_case_when( + list(c(TRUE, FALSE, TRUE, TRUE)), + list(1:3) + ) + }) +}) + +test_that("Unhandled `NA` are given a value of `default`", { + expect_identical( + vec_case_when(list(NA), list(1)), + NA_real_ + ) + + expect_identical( + vec_case_when(list(NA), list(1), default = 2), + 2 + ) + + expect_identical( + vec_case_when( + list( + c(FALSE, NA, TRUE, FALSE), + c(NA, FALSE, TRUE, FALSE) + ), + list( + 2, + 3 + ), + default = 4 + ), + c(4, 4, 2, 4) + ) +}) + +test_that("`NA` is overridden by any `TRUE` values", { + x <- c(1, 2, NA, 3) + expect <- c("one", "not_one", "missing", "not_one") + + # `TRUE` overriding before the `NA` + conditions <- list( + is.na(x), + x == 1 + ) + values <- list( + "missing", + "one" + ) + expect_identical( + vec_case_when( + conditions, + values, + default = "not_one" + ), + expect + ) + + # `TRUE` overriding after the `NA` + conditions <- list( + x == 1, + is.na(x) + ) + values <- list( + "one", + "missing" + ) + expect_identical( + vec_case_when( + conditions, + values, + default = "not_one" + ), + expect + ) +}) + +test_that("works when there is a used `default` and no missing values", { + expect_identical(vec_case_when(list(c(TRUE, FALSE)), list(1), default = 3:4), c(1, 4)) +}) + +test_that("works when there are missing values but no `default`", { + expect_identical(vec_case_when(list(c(TRUE, NA)), list(1)), c(1, NA)) +}) + +test_that("A `NULL` `default` fills in with missing values", { + expect_identical( + vec_case_when(list(c(TRUE, FALSE, FALSE)), list(1)), + c(1, NA, NA) + ) +}) + +test_that("`default` fills in all unused slots", { + expect_identical( + vec_case_when(list(c(TRUE, FALSE, FALSE)), list(1), default = 2), + c(1, 2, 2) + ) +}) + +test_that("`default` is initialized correctly in the logical / unspecified case", { + # i.e. `vec_ptype(NA)` is unspecified but the result should be finalized to logical + expect_identical(vec_case_when(list(FALSE), list(NA)), NA) +}) + +test_that("`default` can be vectorized, and is sliced to fit as needed", { + out <- vec_case_when( + list( + c(FALSE, TRUE, FALSE, TRUE, FALSE), + c(FALSE, TRUE, FALSE, FALSE, TRUE) + ), + list( + 1:5, + 6:10 + ), + default = 11:15 + ) + + expect_identical(out, c(11L, 2L, 13L, 4L, 10L)) +}) + +test_that("`default` must be size 1 or same size as `conditions` (exact same as any other `values` input)", { + expect_snapshot(error = TRUE, { + vec_case_when(list(FALSE), list(1L), default = 2:3) + }) +}) + +test_that("`default` participates in common type determination (exact same as any other `values` input)", { + expect_identical(vec_case_when(list(FALSE), list(1L), default = 2), 2) +}) + +test_that("`default` that is an unused logical `NA` can still be cast to `values` ptype", { + # Requires that casting happen before recycling, because it recycles + # to size zero, resulting in a logical rather than an unspecified. + expect_identical(vec_case_when(list(TRUE), list("x"), default = NA), "x") +}) + +test_that("`default_arg` can be customized", { + expect_snapshot(error = TRUE, { + vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") + }) +}) + +test_that("`conditions_arg` is validated", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(1), conditions_arg = 1) + }) +}) + +test_that("`values_arg` is validated", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(1), values_arg = 1) + }) +}) + +test_that("`default_arg` is validated", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(1), default_arg = 1) + }) +}) + +test_that("`conditions` must all be the same size", { + expect_snapshot(error = TRUE, { + vec_case_when( + list(c(TRUE, FALSE), TRUE), + list(1, 2) + ) + }) + expect_snapshot(error = TRUE, { + vec_case_when( + list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), + list(1, 2) + ) + }) +}) + +test_that("`conditions` must be logical (and aren't cast to logical!)", { + expect_snapshot(error = TRUE, { + vec_case_when(list(1), list(2)) + }) + + # Make sure input numbering is right in the error message! + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE, 3.5), list(2, 4)) + }) +}) + +test_that("`size` overrides the `conditions` sizes", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(1), size = 5) + }) + + expect_snapshot(error = TRUE, { + vec_case_when( + list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), + list(1, 2), + size = 2 + ) + }) +}) + +test_that("`ptype` overrides the `values` types", { + expect_identical( + vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = integer()), + 2L + ) + + expect_snapshot(error = TRUE, { + vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) + }) +}) + +test_that("number of `conditions` and `values` must be the same", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list()) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE, TRUE), list(1)) + }) +}) + +test_that("can't have empty inputs", { + expect_snapshot(error = TRUE, { + vec_case_when(list(), list()) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(), list(), default = 1) + }) +}) + +test_that("dots must be empty", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(1), 2) + }) +}) + +test_that("`conditions` must be a list", { + expect_snapshot(error = TRUE, { + vec_case_when(1, list(2)) + }) +}) + +test_that("`values` must be a list", { + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), 1) + }) +}) + +test_that("named inputs show up in the error message", { + expect_snapshot(error = TRUE, { + vec_case_when(list(x = 1.5), list(1)) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") + }) + + expect_snapshot(error = TRUE, { + vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") + }) + + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE, FALSE), list(1, x = "y")) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") + }) + + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(NULL)) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(x = NULL)) + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(NULL), values_arg = "foo") + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") + }) +}) From d8b4204297f0a062b4f18be79095d57fa1e8f0de Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Fri, 17 Jun 2022 11:38:21 -0400 Subject: [PATCH 02/13] Update internal usage of `case_when()` to use `.default` --- R/utils-format.r | 2 +- data-raw/starwars.R | 2 +- tests/testthat/test-across.R | 8 ++++---- tests/testthat/test-colwise-select.R | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/utils-format.r b/R/utils-format.r index 31353005b0..d03fc35d20 100644 --- a/R/utils-format.r +++ b/R/utils-format.r @@ -33,7 +33,7 @@ ruler <- function(width = getOption("width")) { y <- case_when( x %% 10 == 0 ~ as.character((x %/% 10) %% 10), x %% 5 == 0 ~ "+", - TRUE ~ "-" + .default = "-" ) cat(y, "\n", sep = "") cat(x %% 10, "\n", sep = "") diff --git a/data-raw/starwars.R b/data-raw/starwars.R index 9e34c49312..581f6a2cd1 100644 --- a/data-raw/starwars.R +++ b/data-raw/starwars.R @@ -85,7 +85,7 @@ starwars <- mutate(starwars, gender = case_when( sex == "male" ~ "masculine", sex == "female" ~ "feminine", - TRUE ~ unname(genders[name]) + .default = unname(genders[name]) ) ) diff --git a/tests/testthat/test-across.R b/tests/testthat/test-across.R index ae641efde1..ba48b05d0b 100644 --- a/tests/testthat/test-across.R +++ b/tests/testthat/test-across.R @@ -818,13 +818,13 @@ test_that("expr_subtitute() stops at lambdas (#5896)", { test_that("expr_subtitute() keeps at double-sided formula (#5894)", { expect_identical( - expr_substitute(expr(case_when(.x < 5 ~ 5, TRUE ~ .x)), quote(.x), quote(a)), - expr(case_when(a < 5 ~ 5, TRUE ~ a)) + expr_substitute(expr(case_when(.x < 5 ~ 5, .default = .x)), quote(.x), quote(a)), + expr(case_when(a < 5 ~ 5, .default = a)) ) expect_identical( - expr_substitute(expr(case_when(. < 5 ~ 5, TRUE ~ .)), quote(.), quote(a)), - expr(case_when(a < 5 ~ 5, TRUE ~ a)) + expr_substitute(expr(case_when(. < 5 ~ 5, .default = .)), quote(.), quote(a)), + expr(case_when(a < 5 ~ 5, .default = a)) ) }) diff --git a/tests/testthat/test-colwise-select.R b/tests/testthat/test-colwise-select.R index c1ed258667..a239f301bd 100644 --- a/tests/testthat/test-colwise-select.R +++ b/tests/testthat/test-colwise-select.R @@ -152,7 +152,7 @@ test_that("rename_at() handles empty selection (#4324)", { }) test_that("rename_all/at() call the function with simple character vector (#4459)", { - fun <- function(x) case_when(x == 'mpg' ~ 'fuel_efficiency', TRUE ~ x) + fun <- function(x) case_when(x == 'mpg' ~ 'fuel_efficiency', .default = x) out <- rename_all(mtcars,fun) expect_equal(names(out)[1L], 'fuel_efficiency') From dd486031d140290d25dabb8f0b8137d86ba6ee6d Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Fri, 17 Jun 2022 11:38:31 -0400 Subject: [PATCH 03/13] NEWS bullet --- NEWS.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/NEWS.md b/NEWS.md index bc4aa8531f..1cefff7c58 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,38 @@ # dplyr (development version) +* `case_when()` has been rewritten to utilize vctrs (#5106). This comes with a + number of useful improvements: + + * There is a new `.default` argument that is intended to replace usage of + `TRUE ~ default_value` as a more explicit and readable way to specify + a default value. In the future, we will deprecate the unsafe recycling of + the LHS inputs that allows `TRUE ~` to work, so we encourage you to switch + over to using `.default` instead. + + * The types of the RHS inputs no longer have to match exactly. For example, + the following no longer requires you to use `NA_character_` instead of just + `NA`. + + ``` + x <- c("little", "unknown", "small", "missing", "large") + + case_when( + x %in% c("little", "small") ~ "one", + x %in% c("big", "large") ~ "two", + x %in% c("missing", "unknown") ~ NA + ) + ``` + + * `case_when()` now supports a larger variety of value types. For example, + you can use a data frame to create multiple columns at once. + + * There are new `.ptype` and `.size` arguments which allow you to enforce + a particular output type and size. This allows you to construct a completely + type and size stable call to `case_when()`. + + * The error thrown when types or lengths were incorrect has been improved + (#6261, #6206). + * `arrange()` now uses a faster algorithm for sorting character vectors, which is heavily inspired by data.table's `forder()`. Additionally, the default locale for sorting character vectors is now the C locale, which is a breaking From 34e5f8f53cba863d4c2988058b25ca5b439e14a2 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 30 Jun 2022 10:25:25 -0400 Subject: [PATCH 04/13] Update NEWS.md Co-authored-by: Lionel Henry --- NEWS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1cefff7c58..2619ce6eb2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,9 +17,9 @@ x <- c("little", "unknown", "small", "missing", "large") case_when( - x %in% c("little", "small") ~ "one", - x %in% c("big", "large") ~ "two", - x %in% c("missing", "unknown") ~ NA + x %in% c("little", "small") ~ "one", + x %in% c("big", "large") ~ "two", + x %in% c("missing", "unknown") ~ NA ) ``` From 39f43fee032f4d5f40ee295df2e2d1369b7aa1bf Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 10:31:58 -0400 Subject: [PATCH 05/13] Clarify usefulness of LHS/RHS recycling --- R/case_when.R | 6 +++--- man/case_when.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/case_when.R b/R/case_when.R index 9fd2e12f86..8c4d7763ce 100644 --- a/R/case_when.R +++ b/R/case_when.R @@ -13,9 +13,9 @@ #' The RHS inputs will be coerced to their common type. #' #' All inputs will be recycled to their common size. That said, we encourage -#' all LHS inputs to be the same size, and encourage the RHS inputs to be -#' either size 1 or the same size as the LHS inputs. This produces the most -#' predictable and stable code. +#' all LHS inputs to be the same size. Recycling is mainly useful for RHS +#' inputs, where you might supply a size 1 input that will be recycled to the +#' size of the LHS inputs. #' #' `NULL` inputs are ignored. #' diff --git a/man/case_when.Rd b/man/case_when.Rd index 5f5bd7c30f..8360eb8a60 100644 --- a/man/case_when.Rd +++ b/man/case_when.Rd @@ -16,9 +16,9 @@ The LHS inputs must evaluate to logical vectors. The RHS inputs will be coerced to their common type. All inputs will be recycled to their common size. That said, we encourage -all LHS inputs to be the same size, and encourage the RHS inputs to be -either size 1 or the same size as the LHS inputs. This produces the most -predictable and stable code. +all LHS inputs to be the same size. Recycling is mainly useful for RHS +inputs, where you might supply a size 1 input that will be recycled to the +size of the LHS inputs. \code{NULL} inputs are ignored.} From 3779c32f840fd6b3f9d0b2031413f7ccee872f47 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 10:55:59 -0400 Subject: [PATCH 06/13] Make two sided formula check a special case for better error reporting --- R/case_when.R | 15 +++++++++------ tests/testthat/_snaps/case-when.md | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/case_when.R b/R/case_when.R index 8c4d7763ce..2945992c85 100644 --- a/R/case_when.R +++ b/R/case_when.R @@ -228,16 +228,19 @@ validate_and_split_formula <- function(x, x <- quo_get_expr(x) } - if (!is_formula(x)) { + if (!is_formula(x, lhs = TRUE)) { arg <- substitute(...(), dots_env)[[i]] deparsed <- fmt_obj1(deparse_trunc(arg)) - type <- friendly_type_of(x) - message <- glue("Case {i} ({deparsed}) must be a two-sided formula, not {type}.") + + if (is_formula(x)) { + type <- "a two-sided formula" + } else { + type <- glue("a two-sided formula, not {friendly_type_of(x)}") + } + + message <- glue("Case {i} ({deparsed}) must be {type}.") abort(message, call = error_call) } - if (is_null(f_lhs(x))) { - abort("Formulas must be two-sided.", call = error_call) - } # Formula might be unevaluated, e.g. if it's been quosured env <- f_env(x) %||% default_env diff --git a/tests/testthat/_snaps/case-when.md b/tests/testthat/_snaps/case-when.md index aebf61aabf..ebb7801509 100644 --- a/tests/testthat/_snaps/case-when.md +++ b/tests/testthat/_snaps/case-when.md @@ -81,5 +81,5 @@ Output Error in `case_when()`: - ! Formulas must be two-sided. + ! Case 1 (`~1:2`) must be a two-sided formula. From 60949cbe501384c0e59cae94e771abaa04c9ce41 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 10:57:04 -0400 Subject: [PATCH 07/13] Actually generate the new snapshots in `test-vec-case-when.R` --- tests/testthat/_snaps/vec-case-when.md | 32 ++++++++++++++++++++++++++ tests/testthat/test-vec-case-when.R | 8 +++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/tests/testthat/_snaps/vec-case-when.md b/tests/testthat/_snaps/vec-case-when.md index 220b8d82bd..7f5b927051 100644 --- a/tests/testthat/_snaps/vec-case-when.md +++ b/tests/testthat/_snaps/vec-case-when.md @@ -237,3 +237,35 @@ Error: ! Can't combine `foo[[1]]` and `foo$x` . +--- + + Code + vec_case_when(list(TRUE), list(NULL)) + Condition + Error: + ! `..1` must be a vector, not NULL. + +--- + + Code + vec_case_when(list(TRUE), list(x = NULL)) + Condition + Error: + ! `x` must be a vector, not NULL. + +--- + + Code + vec_case_when(list(TRUE), list(NULL), values_arg = "foo") + Condition + Error: + ! `foo[[1]]` must be a vector, not NULL. + +--- + + Code + vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") + Condition + Error: + ! `foo$x` must be a vector, not NULL. + diff --git a/tests/testthat/test-vec-case-when.R b/tests/testthat/test-vec-case-when.R index 869420a11d..21374515db 100644 --- a/tests/testthat/test-vec-case-when.R +++ b/tests/testthat/test-vec-case-when.R @@ -5,16 +5,16 @@ test_that("works with data frames", { c(FALSE, TRUE, FALSE, TRUE) ) values <- list( - data_frame(x = 1, y = 2), - data_frame(x = 3, y = 4), - data_frame(x = 3:6, y = 4:7) + vctrs::data_frame(x = 1, y = 2), + vctrs::data_frame(x = 3, y = 4), + vctrs::data_frame(x = 3:6, y = 4:7) ) out <- vec_case_when(conditions, values) expect_identical( out, - data_frame( + vctrs::data_frame( x = c(3, 1, NA, 6), y = c(4, 2, NA, 7) ) From 0779f5de6f16cd349eab166af412a3c4ad644164 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 14:17:49 -0400 Subject: [PATCH 08/13] Use `as_label()` instead of custom deparser --- R/case_when.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/case_when.R b/R/case_when.R index 2945992c85..1f8c288dc5 100644 --- a/R/case_when.R +++ b/R/case_when.R @@ -230,7 +230,7 @@ validate_and_split_formula <- function(x, if (!is_formula(x, lhs = TRUE)) { arg <- substitute(...(), dots_env)[[i]] - deparsed <- fmt_obj1(deparse_trunc(arg)) + arg <- glue::backtick(as_label(arg)) if (is_formula(x)) { type <- "a two-sided formula" @@ -238,7 +238,7 @@ validate_and_split_formula <- function(x, type <- glue("a two-sided formula, not {friendly_type_of(x)}") } - message <- glue("Case {i} ({deparsed}) must be {type}.") + message <- glue("Case {i} ({arg}) must be {type}.") abort(message, call = error_call) } From 9cc5cb9bd6e34f2edf7f3a59b0c85a3a35ea4b6a Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 16:01:00 -0400 Subject: [PATCH 09/13] Restructure `list_names()` as `names_as_error_names()` --- R/vec-case-when.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/vec-case-when.R b/R/vec-case-when.R index e2ca3526df..260a66b87f 100644 --- a/R/vec-case-when.R +++ b/R/vec-case-when.R @@ -39,8 +39,11 @@ vec_case_when <- function(conditions, abort("`default_arg` must be a string.", call = call) } - condition_args <- list_names(conditions, arg = conditions_arg) - value_args <- list_names(values, arg = values_arg) + condition_args <- names2(conditions) + condition_args <- names_as_error_names(condition_args, arg = conditions_arg) + + value_args <- names2(values) + value_args <- names_as_error_names(value_args, arg = values_arg) names(conditions) <- condition_args names(values) <- value_args @@ -192,8 +195,7 @@ vec_case_when <- function(conditions, ) } -list_names <- function(x, arg = "") { - names <- names2(x) +names_as_error_names <- function(names, arg = "") { unnamed <- names == "" if (arg == "") { From feb4deed139721e2c10fa66ad752fcf307697839 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 16:05:15 -0400 Subject: [PATCH 10/13] Use `n_processed` over ambiguous `n_used` --- R/vec-case-when.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/vec-case-when.R b/R/vec-case-when.R index 260a66b87f..d4d3c8c011 100644 --- a/R/vec-case-when.R +++ b/R/vec-case-when.R @@ -124,7 +124,7 @@ vec_case_when <- function(conditions, vec_assert(default, size = size, arg = default_arg, call = call) } - n_used <- 0L + n_processed <- 0L locs <- vector("list", n_values) # Starts as unused. Any `TRUE` value in `condition` flips it to used. @@ -147,14 +147,14 @@ vec_case_when <- function(conditions, locs[[i]] <- loc are_unused[loc] <- FALSE - n_used <- n_used + 1L + n_processed <- n_processed + 1L } - if (n_used == n_conditions && any(are_unused)) { + if (n_processed == n_conditions && any(are_unused)) { # If all of the `conditions` are used, then we check if we need `default` are_unused <- which(are_unused) - n_used <- n_used + 1L + n_processed <- n_processed + 1L n_values <- n_values + 1L locs[[n_values]] <- are_unused @@ -162,7 +162,7 @@ vec_case_when <- function(conditions, value_sizes[[n_values]] <- default_size } - for (i in seq_len(n_used)) { + for (i in seq_len(n_processed)) { loc <- locs[[i]] value <- values[[i]] value_size <- value_sizes[[i]] @@ -181,11 +181,11 @@ vec_case_when <- function(conditions, # Remove names used for error messages. We don't want them in the result. values <- unname(values) - if (n_used != n_values) { + if (n_processed != n_values) { # Trim to only what will be used to fill the result - seq_used <- seq_len(n_used) - values <- values[seq_used] - locs <- locs[seq_used] + seq_processed <- seq_len(n_processed) + values <- values[seq_processed] + locs <- locs[seq_processed] } vec_unchop( From 8690a642564a36a9f8ddd4151a4362f03f86fefa Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 16:06:40 -0400 Subject: [PATCH 11/13] Use better variable name for unused locations --- R/vec-case-when.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/vec-case-when.R b/R/vec-case-when.R index d4d3c8c011..8b1442706b 100644 --- a/R/vec-case-when.R +++ b/R/vec-case-when.R @@ -152,12 +152,12 @@ vec_case_when <- function(conditions, if (n_processed == n_conditions && any(are_unused)) { # If all of the `conditions` are used, then we check if we need `default` - are_unused <- which(are_unused) + loc_unused <- which(are_unused) n_processed <- n_processed + 1L n_values <- n_values + 1L - locs[[n_values]] <- are_unused + locs[[n_values]] <- loc_unused values[[n_values]] <- default value_sizes[[n_values]] <- default_size } From d2034979c37a4a31864c7e7c1afd3048e89d34e7 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 16:40:30 -0400 Subject: [PATCH 12/13] Allow `vec_case_when()` to own its errors since it might be user facing --- R/case_when.R | 5 +- R/vec-case-when.R | 8 +- tests/testthat/_snaps/vec-case-when.md | 139 ++++++++++++++++--------- tests/testthat/test-vec-case-when.R | 15 +++ 4 files changed, 113 insertions(+), 54 deletions(-) diff --git a/R/case_when.R b/R/case_when.R index 1f8c288dc5..b4bfafb51b 100644 --- a/R/case_when.R +++ b/R/case_when.R @@ -160,10 +160,13 @@ case_when <- function(..., vec_case_when( conditions = conditions, values = values, + conditions_arg = "", + values_arg = "", default = .default, default_arg = ".default", ptype = .ptype, - size = .size + size = .size, + call = error_call ) } diff --git a/R/vec-case-when.R b/R/vec-case-when.R index 8b1442706b..ab6b8b8e11 100644 --- a/R/vec-case-when.R +++ b/R/vec-case-when.R @@ -1,14 +1,14 @@ vec_case_when <- function(conditions, values, ..., - conditions_arg = "", - values_arg = "", + conditions_arg = "conditions", + values_arg = "values", default = NULL, default_arg = "default", ptype = NULL, size = NULL, - call = caller_env()) { - check_dots_empty0(..., call = call) + call = current_env()) { + check_dots_empty0(...) vec_check_list(conditions, arg = "conditions", call = call) vec_check_list(values, arg = "values", call = call) diff --git a/tests/testthat/_snaps/vec-case-when.md b/tests/testthat/_snaps/vec-case-when.md index 7f5b927051..b3cd9ac54a 100644 --- a/tests/testthat/_snaps/vec-case-when.md +++ b/tests/testthat/_snaps/vec-case-when.md @@ -3,31 +3,31 @@ Code vec_case_when(list(logical()), list(1:2)) Condition - Error: - ! `..1` must have size 0, not size 2. + Error in `vec_case_when()`: + ! `values[[1]]` must have size 0, not size 2. # `values` are cast to their common type Code vec_case_when(list(FALSE, TRUE), list(1, "x")) Condition - Error: - ! Can't combine `..1` and `..2` . + Error in `vec_case_when()`: + ! Can't combine `values[[1]]` and `values[[2]]` . # `values` must be size 1 or same size as the `conditions` Code vec_case_when(list(c(TRUE, FALSE, TRUE, TRUE)), list(1:3)) Condition - Error: - ! `..1` must have size 4, not size 3. + Error in `vec_case_when()`: + ! `values[[1]]` must have size 4, not size 3. # `default` must be size 1 or same size as `conditions` (exact same as any other `values` input) Code vec_case_when(list(FALSE), list(1L), default = 2:3) Condition - Error: + Error in `vec_case_when()`: ! `default` must have size 1, not size 2. # `default_arg` can be customized @@ -35,7 +35,7 @@ Code vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") Condition - Error: + Error in `vec_case_when()`: ! `foo` must have size 1, not size 2. --- @@ -43,15 +43,15 @@ Code vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") Condition - Error: - ! Can't combine `..1` and `foo` . + Error in `vec_case_when()`: + ! Can't combine `values[[1]]` and `foo` . # `conditions_arg` is validated Code vec_case_when(list(TRUE), list(1), conditions_arg = 1) Condition - Error: + Error in `vec_case_when()`: ! `conditions_arg` must be a string. # `values_arg` is validated @@ -59,7 +59,7 @@ Code vec_case_when(list(TRUE), list(1), values_arg = 1) Condition - Error: + Error in `vec_case_when()`: ! `values_arg` must be a string. # `default_arg` is validated @@ -67,7 +67,7 @@ Code vec_case_when(list(TRUE), list(1), default_arg = 1) Condition - Error: + Error in `vec_case_when()`: ! `default_arg` must be a string. # `conditions` must all be the same size @@ -75,24 +75,24 @@ Code vec_case_when(list(c(TRUE, FALSE), TRUE), list(1, 2)) Condition - Error: - ! `..2` must have size 2, not size 1. + Error in `vec_case_when()`: + ! `conditions[[2]]` must have size 2, not size 1. --- Code vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2)) Condition - Error: - ! Can't recycle `..1` (size 2) to match `..2` (size 3). + Error in `vec_case_when()`: + ! Can't recycle `conditions[[1]]` (size 2) to match `conditions[[2]]` (size 3). # `conditions` must be logical (and aren't cast to logical!) Code vec_case_when(list(1), list(2)) Condition - Error: - ! `..1` must be a vector with type . + Error in `vec_case_when()`: + ! `conditions[[1]]` must be a vector with type . Instead, it has type . --- @@ -100,8 +100,8 @@ Code vec_case_when(list(TRUE, 3.5), list(2, 4)) Condition - Error: - ! `..2` must be a vector with type . + Error in `vec_case_when()`: + ! `conditions[[2]]` must be a vector with type . Instead, it has type . # `size` overrides the `conditions` sizes @@ -109,31 +109,31 @@ Code vec_case_when(list(TRUE), list(1), size = 5) Condition - Error: - ! `..1` must have size 5, not size 1. + Error in `vec_case_when()`: + ! `conditions[[1]]` must have size 5, not size 1. --- Code vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2), size = 2) Condition - Error: - ! `..2` must have size 2, not size 3. + Error in `vec_case_when()`: + ! `conditions[[2]]` must have size 2, not size 3. # `ptype` overrides the `values` types Code vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) Condition - Error: - ! Can't convert `..1` to . + Error in `vec_case_when()`: + ! Can't convert `values[[1]]` to . # number of `conditions` and `values` must be the same Code vec_case_when(list(TRUE), list()) Condition - Error: + Error in `vec_case_when()`: ! The number of supplied conditions (1) must equal the number of supplied values (0). --- @@ -141,7 +141,7 @@ Code vec_case_when(list(TRUE, TRUE), list(1)) Condition - Error: + Error in `vec_case_when()`: ! The number of supplied conditions (2) must equal the number of supplied values (1). # can't have empty inputs @@ -149,7 +149,7 @@ Code vec_case_when(list(), list()) Condition - Error: + Error in `vec_case_when()`: ! At least one condition must be supplied. --- @@ -157,7 +157,7 @@ Code vec_case_when(list(), list(), default = 1) Condition - Error: + Error in `vec_case_when()`: ! At least one condition must be supplied. # dots must be empty @@ -165,7 +165,7 @@ Code vec_case_when(list(TRUE), list(1), 2) Condition - Error: + Error in `vec_case_when()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 @@ -176,7 +176,7 @@ Code vec_case_when(1, list(2)) Condition - Error: + Error in `vec_case_when()`: ! `conditions` must be a list, not a number. # `values` must be a list @@ -184,7 +184,7 @@ Code vec_case_when(list(TRUE), 1) Condition - Error: + Error in `vec_case_when()`: ! `values` must be a list, not a number. # named inputs show up in the error message @@ -192,8 +192,8 @@ Code vec_case_when(list(x = 1.5), list(1)) Condition - Error: - ! `x` must be a vector with type . + Error in `vec_case_when()`: + ! `conditions$x` must be a vector with type . Instead, it has type . --- @@ -201,64 +201,89 @@ Code vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") Condition - Error: + Error in `vec_case_when()`: ! `foo$x` must be a vector with type . Instead, it has type . +--- + + Code + vec_case_when(list(x = 1.5), list(1), conditions_arg = "") + Condition + Error in `vec_case_when()`: + ! `x` must be a vector with type . + Instead, it has type . + --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) Condition - Error: - ! `x` must have size 2, not size 1. + Error in `vec_case_when()`: + ! `conditions$x` must have size 2, not size 1. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") Condition - Error: + Error in `vec_case_when()`: ! `foo$x` must have size 2, not size 1. +--- + + Code + vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "") + Condition + Error in `vec_case_when()`: + ! `x` must have size 2, not size 1. + --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y")) Condition - Error: - ! Can't combine `..1` and `x` . + Error in `vec_case_when()`: + ! Can't combine `values[[1]]` and `values$x` . --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") Condition - Error: + Error in `vec_case_when()`: ! Can't combine `foo[[1]]` and `foo$x` . +--- + + Code + vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "") + Condition + Error in `vec_case_when()`: + ! Can't combine `..1` and `x` . + --- Code vec_case_when(list(TRUE), list(NULL)) Condition - Error: - ! `..1` must be a vector, not NULL. + Error in `vec_case_when()`: + ! `values[[1]]` must be a vector, not NULL. --- Code vec_case_when(list(TRUE), list(x = NULL)) Condition - Error: - ! `x` must be a vector, not NULL. + Error in `vec_case_when()`: + ! `values$x` must be a vector, not NULL. --- Code vec_case_when(list(TRUE), list(NULL), values_arg = "foo") Condition - Error: + Error in `vec_case_when()`: ! `foo[[1]]` must be a vector, not NULL. --- @@ -266,6 +291,22 @@ Code vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") Condition - Error: + Error in `vec_case_when()`: ! `foo$x` must be a vector, not NULL. +--- + + Code + vec_case_when(list(TRUE), list(NULL), values_arg = "") + Condition + Error in `vec_case_when()`: + ! `..1` must be a vector, not NULL. + +--- + + Code + vec_case_when(list(TRUE), list(x = NULL), values_arg = "") + Condition + Error in `vec_case_when()`: + ! `x` must be a vector, not NULL. + diff --git a/tests/testthat/test-vec-case-when.R b/tests/testthat/test-vec-case-when.R index 21374515db..3c7b8124d0 100644 --- a/tests/testthat/test-vec-case-when.R +++ b/tests/testthat/test-vec-case-when.R @@ -373,6 +373,9 @@ test_that("named inputs show up in the error message", { expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") }) + expect_snapshot(error = TRUE, { + vec_case_when(list(x = 1.5), list(1), conditions_arg = "") + }) expect_snapshot(error = TRUE, { vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) @@ -380,6 +383,9 @@ test_that("named inputs show up in the error message", { expect_snapshot(error = TRUE, { vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") }) + expect_snapshot(error = TRUE, { + vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "") + }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y")) @@ -387,6 +393,9 @@ test_that("named inputs show up in the error message", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "") + }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL)) @@ -400,4 +409,10 @@ test_that("named inputs show up in the error message", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(NULL), values_arg = "") + }) + expect_snapshot(error = TRUE, { + vec_case_when(list(TRUE), list(x = NULL), values_arg = "") + }) }) From 44ad4ac32a451676f51113b2dc3a462ffb3ae041 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 30 Jun 2022 16:56:10 -0400 Subject: [PATCH 13/13] Mention how to handle `NA` behavior yourself Hints at the fact that this has to be tailored to your specific usage of `case_when()`, making it hard to generalize (like with a `.missing` argument) --- R/case_when.R | 7 +++++++ man/case_when.Rd | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/R/case_when.R b/R/case_when.R index b4bfafb51b..8f53f1da98 100644 --- a/R/case_when.R +++ b/R/case_when.R @@ -28,6 +28,13 @@ #' `.default` participates in the computation of the common type with the RHS #' inputs. #' +#' `NA` values in the LHS conditions are treated like `FALSE`, meaning that +#' the result at those locations will be assigned the `.default` value. To +#' handle missing values in the conditions differently, you must explicitly +#' catch them with another condition before they fall through to the +#' `.default`. This typically involves some variation of `is.na(x) ~ value` +#' tailored to your usage of `case_when()`. +#' #' If `NULL`, the default, a missing value will be used. #' #' @param .ptype An optional prototype declaring the desired output type. If diff --git a/man/case_when.Rd b/man/case_when.Rd index 8360eb8a60..a748048ec8 100644 --- a/man/case_when.Rd +++ b/man/case_when.Rd @@ -31,6 +31,13 @@ from \code{...}. \code{.default} participates in the computation of the common type with the RHS inputs. +\code{NA} values in the LHS conditions are treated like \code{FALSE}, meaning that +the result at those locations will be assigned the \code{.default} value. To +handle missing values in the conditions differently, you must explicitly +catch them with another condition before they fall through to the +\code{.default}. This typically involves some variation of \code{is.na(x) ~ value} +tailored to your usage of \code{case_when()}. + If \code{NULL}, the default, a missing value will be used.} \item{.ptype}{An optional prototype declaring the desired output type. If