diff --git a/NEWS.md b/NEWS.md index ae047be820..b80eb35c63 100644 --- a/NEWS.md +++ b/NEWS.md @@ -91,6 +91,8 @@ 12. Typo checking in `i` available since 1.11.4 is extended to work in non-English sessions, [#4989](https://github.com/Rdatatable/data.table/issues/4989). Thanks to Michael Chirico for the PR. +13. `fifelse()` now coerces logical `NA` to other types and the `na` argument supports vectorized input, [#4277](https://github.com/Rdatatable/data.table/issues/4277) [#4286](https://github.com/Rdatatable/data.table/issues/4286) [#4287](https://github.com/Rdatatable/data.table/issues/4287). Thanks to @michaelchirico and @shrektan for reporting, and @shrektan for implementing. + ## BUG FIXES 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9f2cafb3c5..48c502581e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15859,7 +15859,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.") -test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please") +test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'no' is of type character but 'yes' is logical. Please") test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).") test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).") test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec))) @@ -15885,7 +15885,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na) test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na)) test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na)) test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na)) -test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please") +test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but 'yes' is double. Please") test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na)) test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na)) test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na)) @@ -16322,7 +16322,7 @@ test(2100.03, fifelse(test_vec_na, TRUE, FALSE, TRUE), as.logical(out_vec_na)) test(2100.04, fifelse(test_vec_na, "1", "0","2"), as.character(out_vec_na)) test(2100.05, fifelse(test_vec_na, 1+0i, 0+0i, 2+0i), as.complex(out_vec_na)) test(2100.06, fifelse(c(TRUE,FALSE,NA), list(1:5), list(5:1), list(15:11)), list(1:5,5:1,15:11)) -test(2100.07, fifelse(test_vec_na, 1, 0, 2L), error = "'yes' is of type double but 'na' is of type integer. Please make sure that both arguments have the same type.") +test(2100.07, fifelse(test_vec_na, 1, 0, 2L), c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2)) # corece na test(2100.08, fifelse(test_vec_na, 1, 0, c(2,3)), error = "Length of 'na' is 2 but must be 1") test(2100.09, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), as.Date("2019-08-29")), as.Date(c(18139, 18138, 18138, 18138, 18138, 18137), origin = '1970-01-01')) test(2100.10, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), 18137), error = "'yes' has different class than 'na'. Please make sure that both arguments have the same class.") @@ -17600,3 +17600,21 @@ test(2186, DT[, if (TRUE) .(a=1L) else .(a=1L, b=2L)], DT, DT = setDT(replicate(getOption('width'), 1, simplify = FALSE)) test(2187, {print(DT, col.names='none'); TRUE}, notOutput="V") +# fifelse now supports vector na arguments and coerces NA to other types, PR#4289 +test(2188.01, fifelse(c(TRUE, FALSE, TRUE, NA), 1L, 2L, 1.0), c(1, 2, 1, 1)) +test(2188.02, fifelse(c(TRUE, FALSE, TRUE, NA), 1, 2, 1L), c(1, 2, 1, 1)) +test(2188.03, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, 101:104), c(1L, 12L, 3L, 104L)) +test(2188.04, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, 101:104), c(NA, 12L, NA, 104L)) +test(2188.05, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, 101:104), c(1L, NA, 3L, 104L)) +test(2188.06, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, NA), c(1L, 12L, 3L, NA)) +test(2188.07, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, NA), c(1L, NA, 3L, NA)) +test(2188.08, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA), c(NA, NA, NA, NA)) +test(2188.09, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA_character_), rep(NA_character_, 4L)) +test(2188.10, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, 101:104), c(NA, NA, NA, 104L)) +test(2188.11, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, NA), c(NA, 12L, NA, NA)) +test(2188.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), as.Date(c(NA, NA, NA, "2020-01-01"))) +test(2188.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message +test(2188.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")), error="'no' has different class than 'na'. Please") +test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different") +test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA + diff --git a/man/fifelse.Rd b/man/fifelse.Rd index 2fe355c98c..4165dd796d 100644 --- a/man/fifelse.Rd +++ b/man/fifelse.Rd @@ -11,10 +11,10 @@ \arguments{ \item{test}{ A logical vector. } \item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. } - \item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and length \code{1}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. } + \item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and its length must be either \code{1} or the same length of \code{test}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. } } \details{ -In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes. +In contrast to \code{\link[base]{ifelse}} attributes are copied from the first non-\code{NA} argument to the output. This is useful when returning \code{Date}, \code{factor} or other classes. } \value{ A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}. diff --git a/src/fifelse.c b/src/fifelse.c index 5218cf7b4f..e5d22a1eea 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -11,127 +11,182 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { const int64_t len0 = xlength(l); const int64_t len1 = xlength(a); const int64_t len2 = xlength(b); + const int64_t len3 = xlength(na); SEXPTYPE ta = TYPEOF(a); SEXPTYPE tb = TYPEOF(b); - int nprotect = 0; + SEXPTYPE tn = TYPEOF(na); + // na_a/b/n means a scalar NA (or NULL for the na argument), which is considered to be coerced into other types + bool na_a = len1==1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; + bool na_b = len2==1 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; + bool na_n = isNull(na) || (len3==1 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); - if (ta != tb) { - if (ta == INTSXP && tb == REALSXP) { - SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++; - a = tmp; + if (!na_a && len1!=1 && len1!=len0) + error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0); + if (!na_b && len2!=1 && len2!=len0) + error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0); + if (!na_n && len3!=1 && len3!=len0) + error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0); + + int nprotect = 0; + SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP; + if (!(na_a && na_b && na_n)) { + SEXPTYPE ta0 = ta, tb0 = tb, tn0 = tn; // record the original type for error message use + if (!na_b && tans==INTSXP && tb==REALSXP) tans = tb; + if (!na_n && tans==INTSXP && tn==REALSXP) tans = tn; + if (!na_a && tans==REALSXP && ta==INTSXP) { + a = PROTECT(coerceVector(a, REALSXP)); nprotect++; ta = REALSXP; - } else if (ta == REALSXP && tb == INTSXP) { - SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++; - b = tmp; + } + // it's not possible that non-NA `yes`' type will be different from `tans` + if (!na_b && tans==REALSXP && tb==INTSXP) { + b = PROTECT(coerceVector(b, REALSXP)); nprotect++; tb = REALSXP; - } else { - error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb)); } + if (!na_b && tans != tb) + error(_("'no' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tb0), tans==ta0 ? "yes" : "na", tans==ta0 ? type2char(ta0) : type2char(tn0)); + if (!na_n && tans==REALSXP && tn==INTSXP) { + na = PROTECT(coerceVector(na, REALSXP)); nprotect++; + tn = REALSXP; + } + if (!na_n && tans != tn) + error(_("'na' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tn0), tans==ta0 ? "yes" : "no", tans==ta0 ? type2char(ta0) : type2char(tb0)); } - - if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) - error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class.")); - UNPROTECT(2); - - if (isFactor(a)) { - if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0)) - error(_("'yes' and 'no' are both type factor but their levels are different.")); + + if (!na_a && !na_b) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) + error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class.")); + UNPROTECT(2); + } + if (!na_a && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) + error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class.")); + UNPROTECT(2); + } + if (!na_b && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) + error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class.")); UNPROTECT(2); } + + if (isFactor(a) || isFactor(b)) { + if (!na_a && !na_b) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0)) + error(_("'yes' and 'no' are both type factor but their levels are different.")); + UNPROTECT(2); + } + if (!na_a && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) + error(_("'yes' and 'na' are both type factor but their levels are different.")); + UNPROTECT(2); + } + if (!na_b && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) + error(_("'no' and 'na' are both type factor but their levels are different.")); + UNPROTECT(2); + } + } - if (len1!=1 && len1!=len0) - error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0); - if (len2!=1 && len2!=len0) - error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0); const int64_t amask = len1>1 ? INT64_MAX : 0; // for scalar 'a' bitwise AND will reset iterator to first element: pa[i & amask] -> pa[0] const int64_t bmask = len2>1 ? INT64_MAX : 0; + const int64_t nmask = len3>1 ? INT64_MAX : 0; const int *restrict pl = LOGICAL(l); - SEXP ans = PROTECT(allocVector(ta, len0)); nprotect++; - copyMostAttrib(a, ans); - - bool nonna = !isNull(na); - if (nonna) { - if (xlength(na) != 1) - error(_("Length of 'na' is %"PRId64" but must be 1"), (int64_t)xlength(na)); - SEXPTYPE tn = TYPEOF(na); - if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) { - nonna = false; - } else { - if (tn != ta) - error(_("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tn)); - if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) - error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class.")); - UNPROTECT(2); - if (isFactor(a)) { - if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) - error(_("'yes' and 'na' are both type factor but their levels are different.")); - UNPROTECT(2); - } - } - } + SEXP ans = PROTECT(allocVector(tans, len0)); nprotect++; + if (!na_a) + copyMostAttrib(a, ans); + else if (!na_b) + copyMostAttrib(b, ans); + else if (!na_n) + copyMostAttrib(na, ans); - switch(ta) { + switch(tans) { case LGLSXP: { int *restrict pans = LOGICAL(ans); - const int *restrict pa = LOGICAL(a); - const int *restrict pb = LOGICAL(b); - const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL; + const int *restrict pa; if (!na_a) pa = LOGICAL(a); + const int *restrict pb; if (!na_b) pb = LOGICAL(b); + const int *restrict pna; if (!na_n) pna = LOGICAL(na); + const int na = NA_LOGICAL; #pragma omp parallel for num_threads(getDTthreads(len0, true)) for (int64_t i=0; i