Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fifelse() coerces NA to other types and supports vectorized na argument #4289

Merged
merged 9 commits into from
May 14, 2021
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@

12. Typo checking in `i` available since 1.11.4 is extended to work in non-English sessions, [#4989](https://github.com/Rdatatable/data.table/issues/4989). Thanks to Michael Chirico for the PR.

13. `fifelse()` now coerces logical `NA` to other types and the `na` argument supports vectorized input, [#4277](https://github.com/Rdatatable/data.table/issues/4277) [#4286](https://github.com/Rdatatable/data.table/issues/4286) [#4287](https://github.com/Rdatatable/data.table/issues/4287). Thanks to @michaelchirico and @shrektan for reporting, and @shrektan for implementing.

## BUG FIXES

1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
Expand Down
24 changes: 21 additions & 3 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15859,7 +15859,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o
test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.")
test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please")
test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'no' is of type character but 'yes' is logical. Please")
test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).")
test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).")
test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec)))
Expand All @@ -15885,7 +15885,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na)
test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na))
test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na))
test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na))
test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please")
test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but 'yes' is double. Please")
test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na))
test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na))
test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na))
Expand Down Expand Up @@ -16322,7 +16322,7 @@ test(2100.03, fifelse(test_vec_na, TRUE, FALSE, TRUE), as.logical(out_vec_na))
test(2100.04, fifelse(test_vec_na, "1", "0","2"), as.character(out_vec_na))
test(2100.05, fifelse(test_vec_na, 1+0i, 0+0i, 2+0i), as.complex(out_vec_na))
test(2100.06, fifelse(c(TRUE,FALSE,NA), list(1:5), list(5:1), list(15:11)), list(1:5,5:1,15:11))
test(2100.07, fifelse(test_vec_na, 1, 0, 2L), error = "'yes' is of type double but 'na' is of type integer. Please make sure that both arguments have the same type.")
test(2100.07, fifelse(test_vec_na, 1, 0, 2L), c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2)) # corece na
test(2100.08, fifelse(test_vec_na, 1, 0, c(2,3)), error = "Length of 'na' is 2 but must be 1")
test(2100.09, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), as.Date("2019-08-29")), as.Date(c(18139, 18138, 18138, 18138, 18138, 18137), origin = '1970-01-01'))
test(2100.10, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), 18137), error = "'yes' has different class than 'na'. Please make sure that both arguments have the same class.")
Expand Down Expand Up @@ -17600,3 +17600,21 @@ test(2186, DT[, if (TRUE) .(a=1L) else .(a=1L, b=2L)], DT,
DT = setDT(replicate(getOption('width'), 1, simplify = FALSE))
test(2187, {print(DT, col.names='none'); TRUE}, notOutput="V")

# fifelse now supports vector na arguments and coerces NA to other types, PR#4289
test(2188.01, fifelse(c(TRUE, FALSE, TRUE, NA), 1L, 2L, 1.0), c(1, 2, 1, 1))
test(2188.02, fifelse(c(TRUE, FALSE, TRUE, NA), 1, 2, 1L), c(1, 2, 1, 1))
test(2188.03, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, 101:104), c(1L, 12L, 3L, 104L))
test(2188.04, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, 101:104), c(NA, 12L, NA, 104L))
test(2188.05, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, 101:104), c(1L, NA, 3L, 104L))
test(2188.06, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, NA), c(1L, 12L, 3L, NA))
test(2188.07, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, NA), c(1L, NA, 3L, NA))
test(2188.08, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA), c(NA, NA, NA, NA))
test(2188.09, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA_character_), rep(NA_character_, 4L))
test(2188.10, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, 101:104), c(NA, NA, NA, 104L))
test(2188.11, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, NA), c(NA, 12L, NA, NA))
test(2188.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), as.Date(c(NA, NA, NA, "2020-01-01")))
test(2188.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message
test(2188.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")), error="'no' has different class than 'na'. Please")
test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different")
test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA

4 changes: 2 additions & 2 deletions man/fifelse.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
\arguments{
\item{test}{ A logical vector. }
\item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. }
\item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and length \code{1}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. }
\item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and its length must be either \code{1} or the same length of \code{test}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. }
}
\details{
In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
In contrast to \code{\link[base]{ifelse}} attributes are copied from the first non-\code{NA} argument to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
}
\value{
A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}.
Expand Down
205 changes: 130 additions & 75 deletions src/fifelse.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,127 +11,182 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) {
const int64_t len0 = xlength(l);
const int64_t len1 = xlength(a);
const int64_t len2 = xlength(b);
const int64_t len3 = xlength(na);
SEXPTYPE ta = TYPEOF(a);
SEXPTYPE tb = TYPEOF(b);
int nprotect = 0;
SEXPTYPE tn = TYPEOF(na);
// na_a/b/n means a scalar NA (or NULL for the na argument), which is considered to be coerced into other types
bool na_a = len1==1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL;
bool na_b = len2==1 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL;
bool na_n = isNull(na) || (len3==1 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL);

if (ta != tb) {
if (ta == INTSXP && tb == REALSXP) {
SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++;
a = tmp;
if (!na_a && len1!=1 && len1!=len0)
error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0);
if (!na_b && len2!=1 && len2!=len0)
error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0);
if (!na_n && len3!=1 && len3!=len0)
error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0);

int nprotect = 0;
SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP;
if (!(na_a && na_b && na_n)) {
SEXPTYPE ta0 = ta, tb0 = tb, tn0 = tn; // record the original type for error message use
if (!na_b && tans==INTSXP && tb==REALSXP) tans = tb;
if (!na_n && tans==INTSXP && tn==REALSXP) tans = tn;
if (!na_a && tans==REALSXP && ta==INTSXP) {
a = PROTECT(coerceVector(a, REALSXP)); nprotect++;
ta = REALSXP;
} else if (ta == REALSXP && tb == INTSXP) {
SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++;
b = tmp;
}
// it's not possible that non-NA `yes`' type will be different from `tans`
if (!na_b && tans==REALSXP && tb==INTSXP) {
b = PROTECT(coerceVector(b, REALSXP)); nprotect++;
tb = REALSXP;
} else {
error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb));
}
if (!na_b && tans != tb)
error(_("'no' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tb0), tans==ta0 ? "yes" : "na", tans==ta0 ? type2char(ta0) : type2char(tn0));
if (!na_n && tans==REALSXP && tn==INTSXP) {
na = PROTECT(coerceVector(na, REALSXP)); nprotect++;
tn = REALSXP;
}
if (!na_n && tans != tn)
error(_("'na' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tn0), tans==ta0 ? "yes" : "no", tans==ta0 ? type2char(ta0) : type2char(tb0));
}

if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class."));
UNPROTECT(2);

if (isFactor(a)) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0))
error(_("'yes' and 'no' are both type factor but their levels are different."));

if (!na_a && !na_b) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
}
if (!na_a && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
}
if (!na_b && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
}

if (isFactor(a) || isFactor(b)) {
if (!na_a && !na_b) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0))
error(_("'yes' and 'no' are both type factor but their levels are different."));
UNPROTECT(2);
}
if (!na_a && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
error(_("'yes' and 'na' are both type factor but their levels are different."));
UNPROTECT(2);
}
if (!na_b && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
error(_("'no' and 'na' are both type factor but their levels are different."));
UNPROTECT(2);
}
}

if (len1!=1 && len1!=len0)
error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0);
if (len2!=1 && len2!=len0)
error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0);
const int64_t amask = len1>1 ? INT64_MAX : 0; // for scalar 'a' bitwise AND will reset iterator to first element: pa[i & amask] -> pa[0]
const int64_t bmask = len2>1 ? INT64_MAX : 0;
const int64_t nmask = len3>1 ? INT64_MAX : 0;

const int *restrict pl = LOGICAL(l);
SEXP ans = PROTECT(allocVector(ta, len0)); nprotect++;
copyMostAttrib(a, ans);

bool nonna = !isNull(na);
if (nonna) {
if (xlength(na) != 1)
error(_("Length of 'na' is %"PRId64" but must be 1"), (int64_t)xlength(na));
SEXPTYPE tn = TYPEOF(na);
if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) {
nonna = false;
} else {
if (tn != ta)
error(_("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tn));
if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
if (isFactor(a)) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
error(_("'yes' and 'na' are both type factor but their levels are different."));
UNPROTECT(2);
}
}
}
SEXP ans = PROTECT(allocVector(tans, len0)); nprotect++;
if (!na_a)
copyMostAttrib(a, ans);
else if (!na_b)
copyMostAttrib(b, ans);
else if (!na_n)
copyMostAttrib(na, ans);

switch(ta) {
switch(tans) {
case LGLSXP: {
int *restrict pans = LOGICAL(ans);
const int *restrict pa = LOGICAL(a);
const int *restrict pb = LOGICAL(b);
const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL;
const int *restrict pa; if (!na_a) pa = LOGICAL(a);
const int *restrict pb; if (!na_b) pb = LOGICAL(b);
const int *restrict pna; if (!na_n) pna = LOGICAL(na);
const int na = NA_LOGICAL;
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case INTSXP: {
int *restrict pans = INTEGER(ans);
const int *restrict pa = INTEGER(a);
const int *restrict pb = INTEGER(b);
const int pna = nonna ? INTEGER(na)[0] : NA_INTEGER;
const int *restrict pa; if (!na_a) pa = INTEGER(a);
const int *restrict pb; if (!na_b) pb = INTEGER(b);
const int *restrict pna; if (!na_n) pna = INTEGER(na);
const int na = NA_INTEGER;
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case REALSXP: {
double *restrict pans = REAL(ans);
const double *restrict pa = REAL(a);
const double *restrict pb = REAL(b);
const double na_double = Rinherits(a, char_integer64) ? NA_INT64_D : NA_REAL; // Rinherits() is true for nanotime
const double pna = nonna ? REAL(na)[0] : na_double;
const double *restrict pa; if (!na_a) pa = REAL(a);
const double *restrict pb; if (!na_b) pb = REAL(b);
const double *restrict pna; if (!na_n) pna = REAL(na);
const double na = Rinherits(a, char_integer64) ? NA_INT64_D : NA_REAL; // Rinherits() is true for nanotime
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case STRSXP : {
const SEXP *restrict pa = STRING_PTR(a);
const SEXP *restrict pb = STRING_PTR(b);
const SEXP pna = nonna ? STRING_PTR(na)[0] : NA_STRING;
const SEXP *restrict pa; if (!na_a) pa = STRING_PTR(a);
const SEXP *restrict pb; if (!na_b) pb = STRING_PTR(b);
const SEXP *restrict pna; if (!na_n) pna = STRING_PTR(na);
const SEXP na = NA_STRING;
for (int64_t i=0; i<len0; ++i) {
SET_STRING_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna));
SET_STRING_ELT(
ans, i, pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask])
);
}
} break;
case CPLXSXP : {
Rcomplex *restrict pans = COMPLEX(ans);
const Rcomplex *restrict pa = COMPLEX(a);
const Rcomplex *restrict pb = COMPLEX(b);
const Rcomplex pna = nonna ? COMPLEX(na)[0] : NA_CPLX;
const Rcomplex *restrict pa; if (!na_a) pa = COMPLEX(a);
const Rcomplex *restrict pb; if (!na_b) pb = COMPLEX(b);
const Rcomplex *restrict pna; if (!na_n) pna = COMPLEX(na);
const Rcomplex na = NA_CPLX;
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case VECSXP : {
const SEXP *restrict pa = SEXPPTR_RO(a);
const SEXP *restrict pb = SEXPPTR_RO(b);
const SEXP *restrict pna = SEXPPTR_RO(na);
const SEXP *restrict pa; if (!na_a) pa = SEXPPTR_RO(a);
const SEXP *restrict pb; if (!na_b) pb = SEXPPTR_RO(b);
const SEXP *restrict pna; if (!na_n) pna = SEXPPTR_RO(na);
for (int64_t i=0; i<len0; ++i) {
if (pl[i]==NA_LOGICAL) {
if (nonna)
SET_VECTOR_ELT(ans, i, pna[0]);
continue; // allocVector already initialized with R_NilValue
if (pl[i] == NA_LOGICAL) {
if (!na_n) SET_VECTOR_ELT(ans, i, pna[i & nmask]);
} else if (pl[i]==0) {
if (!na_b) SET_VECTOR_ELT(ans, i, pb[i & bmask]);
} else if (pl[i]==1) {
if (!na_a) SET_VECTOR_ELT(ans, i, pa[i & amask]);
}
SET_VECTOR_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : pa[i & amask]);
}
} break;
default:
Expand Down