Skip to content

Commit

Permalink
fifelse() coerces NA to other types and supports vectorized na argume…
Browse files Browse the repository at this point in the history
…nt (#4289)
  • Loading branch information
shrektan authored May 14, 2021
1 parent 9847445 commit 03dff91
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 80 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@

12. Typo checking in `i` available since 1.11.4 is extended to work in non-English sessions, [#4989](https://github.com/Rdatatable/data.table/issues/4989). Thanks to Michael Chirico for the PR.

13. `fifelse()` now coerces logical `NA` to other types and the `na` argument supports vectorized input, [#4277](https://github.com/Rdatatable/data.table/issues/4277) [#4286](https://github.com/Rdatatable/data.table/issues/4286) [#4287](https://github.com/Rdatatable/data.table/issues/4287). Thanks to @michaelchirico and @shrektan for reporting, and @shrektan for implementing.

## BUG FIXES

1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
Expand Down
24 changes: 21 additions & 3 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15859,7 +15859,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o
test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.")
test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please")
test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'no' is of type character but 'yes' is logical. Please")
test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).")
test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).")
test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec)))
Expand All @@ -15885,7 +15885,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na)
test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na))
test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na))
test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na))
test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please")
test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but 'yes' is double. Please")
test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na))
test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na))
test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na))
Expand Down Expand Up @@ -16322,7 +16322,7 @@ test(2100.03, fifelse(test_vec_na, TRUE, FALSE, TRUE), as.logical(out_vec_na))
test(2100.04, fifelse(test_vec_na, "1", "0","2"), as.character(out_vec_na))
test(2100.05, fifelse(test_vec_na, 1+0i, 0+0i, 2+0i), as.complex(out_vec_na))
test(2100.06, fifelse(c(TRUE,FALSE,NA), list(1:5), list(5:1), list(15:11)), list(1:5,5:1,15:11))
test(2100.07, fifelse(test_vec_na, 1, 0, 2L), error = "'yes' is of type double but 'na' is of type integer. Please make sure that both arguments have the same type.")
test(2100.07, fifelse(test_vec_na, 1, 0, 2L), c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2)) # corece na
test(2100.08, fifelse(test_vec_na, 1, 0, c(2,3)), error = "Length of 'na' is 2 but must be 1")
test(2100.09, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), as.Date("2019-08-29")), as.Date(c(18139, 18138, 18138, 18138, 18138, 18137), origin = '1970-01-01'))
test(2100.10, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), 18137), error = "'yes' has different class than 'na'. Please make sure that both arguments have the same class.")
Expand Down Expand Up @@ -17600,3 +17600,21 @@ test(2186, DT[, if (TRUE) .(a=1L) else .(a=1L, b=2L)], DT,
DT = setDT(replicate(getOption('width'), 1, simplify = FALSE))
test(2187, {print(DT, col.names='none'); TRUE}, notOutput="V")

# fifelse now supports vector na arguments and coerces NA to other types, PR#4289
test(2188.01, fifelse(c(TRUE, FALSE, TRUE, NA), 1L, 2L, 1.0), c(1, 2, 1, 1))
test(2188.02, fifelse(c(TRUE, FALSE, TRUE, NA), 1, 2, 1L), c(1, 2, 1, 1))
test(2188.03, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, 101:104), c(1L, 12L, 3L, 104L))
test(2188.04, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, 101:104), c(NA, 12L, NA, 104L))
test(2188.05, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, 101:104), c(1L, NA, 3L, 104L))
test(2188.06, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, NA), c(1L, 12L, 3L, NA))
test(2188.07, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, NA), c(1L, NA, 3L, NA))
test(2188.08, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA), c(NA, NA, NA, NA))
test(2188.09, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA_character_), rep(NA_character_, 4L))
test(2188.10, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, 101:104), c(NA, NA, NA, 104L))
test(2188.11, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, NA), c(NA, 12L, NA, NA))
test(2188.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), as.Date(c(NA, NA, NA, "2020-01-01")))
test(2188.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message
test(2188.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")), error="'no' has different class than 'na'. Please")
test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different")
test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA

4 changes: 2 additions & 2 deletions man/fifelse.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
\arguments{
\item{test}{ A logical vector. }
\item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. }
\item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and length \code{1}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. }
\item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and its length must be either \code{1} or the same length of \code{test}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. }
}
\details{
In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
In contrast to \code{\link[base]{ifelse}} attributes are copied from the first non-\code{NA} argument to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
}
\value{
A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}.
Expand Down
205 changes: 130 additions & 75 deletions src/fifelse.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,127 +11,182 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) {
const int64_t len0 = xlength(l);
const int64_t len1 = xlength(a);
const int64_t len2 = xlength(b);
const int64_t len3 = xlength(na);
SEXPTYPE ta = TYPEOF(a);
SEXPTYPE tb = TYPEOF(b);
int nprotect = 0;
SEXPTYPE tn = TYPEOF(na);
// na_a/b/n means a scalar NA (or NULL for the na argument), which is considered to be coerced into other types
bool na_a = len1==1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL;
bool na_b = len2==1 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL;
bool na_n = isNull(na) || (len3==1 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL);

if (ta != tb) {
if (ta == INTSXP && tb == REALSXP) {
SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++;
a = tmp;
if (!na_a && len1!=1 && len1!=len0)
error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0);
if (!na_b && len2!=1 && len2!=len0)
error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0);
if (!na_n && len3!=1 && len3!=len0)
error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0);

int nprotect = 0;
SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP;
if (!(na_a && na_b && na_n)) {
SEXPTYPE ta0 = ta, tb0 = tb, tn0 = tn; // record the original type for error message use
if (!na_b && tans==INTSXP && tb==REALSXP) tans = tb;
if (!na_n && tans==INTSXP && tn==REALSXP) tans = tn;
if (!na_a && tans==REALSXP && ta==INTSXP) {
a = PROTECT(coerceVector(a, REALSXP)); nprotect++;
ta = REALSXP;
} else if (ta == REALSXP && tb == INTSXP) {
SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++;
b = tmp;
}
// it's not possible that non-NA `yes`' type will be different from `tans`
if (!na_b && tans==REALSXP && tb==INTSXP) {
b = PROTECT(coerceVector(b, REALSXP)); nprotect++;
tb = REALSXP;
} else {
error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb));
}
if (!na_b && tans != tb)
error(_("'no' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tb0), tans==ta0 ? "yes" : "na", tans==ta0 ? type2char(ta0) : type2char(tn0));
if (!na_n && tans==REALSXP && tn==INTSXP) {
na = PROTECT(coerceVector(na, REALSXP)); nprotect++;
tn = REALSXP;
}
if (!na_n && tans != tn)
error(_("'na' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tn0), tans==ta0 ? "yes" : "no", tans==ta0 ? type2char(ta0) : type2char(tb0));
}

if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class."));
UNPROTECT(2);

if (isFactor(a)) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0))
error(_("'yes' and 'no' are both type factor but their levels are different."));

if (!na_a && !na_b) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
}
if (!na_a && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
}
if (!na_b && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
}

if (isFactor(a) || isFactor(b)) {
if (!na_a && !na_b) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0))
error(_("'yes' and 'no' are both type factor but their levels are different."));
UNPROTECT(2);
}
if (!na_a && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
error(_("'yes' and 'na' are both type factor but their levels are different."));
UNPROTECT(2);
}
if (!na_b && !na_n) {
if (!R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
error(_("'no' and 'na' are both type factor but their levels are different."));
UNPROTECT(2);
}
}

if (len1!=1 && len1!=len0)
error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0);
if (len2!=1 && len2!=len0)
error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0);
const int64_t amask = len1>1 ? INT64_MAX : 0; // for scalar 'a' bitwise AND will reset iterator to first element: pa[i & amask] -> pa[0]
const int64_t bmask = len2>1 ? INT64_MAX : 0;
const int64_t nmask = len3>1 ? INT64_MAX : 0;

const int *restrict pl = LOGICAL(l);
SEXP ans = PROTECT(allocVector(ta, len0)); nprotect++;
copyMostAttrib(a, ans);

bool nonna = !isNull(na);
if (nonna) {
if (xlength(na) != 1)
error(_("Length of 'na' is %"PRId64" but must be 1"), (int64_t)xlength(na));
SEXPTYPE tn = TYPEOF(na);
if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) {
nonna = false;
} else {
if (tn != ta)
error(_("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tn));
if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class."));
UNPROTECT(2);
if (isFactor(a)) {
if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
error(_("'yes' and 'na' are both type factor but their levels are different."));
UNPROTECT(2);
}
}
}
SEXP ans = PROTECT(allocVector(tans, len0)); nprotect++;
if (!na_a)
copyMostAttrib(a, ans);
else if (!na_b)
copyMostAttrib(b, ans);
else if (!na_n)
copyMostAttrib(na, ans);

switch(ta) {
switch(tans) {
case LGLSXP: {
int *restrict pans = LOGICAL(ans);
const int *restrict pa = LOGICAL(a);
const int *restrict pb = LOGICAL(b);
const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL;
const int *restrict pa; if (!na_a) pa = LOGICAL(a);
const int *restrict pb; if (!na_b) pb = LOGICAL(b);
const int *restrict pna; if (!na_n) pna = LOGICAL(na);
const int na = NA_LOGICAL;
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case INTSXP: {
int *restrict pans = INTEGER(ans);
const int *restrict pa = INTEGER(a);
const int *restrict pb = INTEGER(b);
const int pna = nonna ? INTEGER(na)[0] : NA_INTEGER;
const int *restrict pa; if (!na_a) pa = INTEGER(a);
const int *restrict pb; if (!na_b) pb = INTEGER(b);
const int *restrict pna; if (!na_n) pna = INTEGER(na);
const int na = NA_INTEGER;
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case REALSXP: {
double *restrict pans = REAL(ans);
const double *restrict pa = REAL(a);
const double *restrict pb = REAL(b);
const double na_double = Rinherits(a, char_integer64) ? NA_INT64_D : NA_REAL; // Rinherits() is true for nanotime
const double pna = nonna ? REAL(na)[0] : na_double;
const double *restrict pa; if (!na_a) pa = REAL(a);
const double *restrict pb; if (!na_b) pb = REAL(b);
const double *restrict pna; if (!na_n) pna = REAL(na);
const double na = Rinherits(a, char_integer64) ? NA_INT64_D : NA_REAL; // Rinherits() is true for nanotime
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case STRSXP : {
const SEXP *restrict pa = STRING_PTR(a);
const SEXP *restrict pb = STRING_PTR(b);
const SEXP pna = nonna ? STRING_PTR(na)[0] : NA_STRING;
const SEXP *restrict pa; if (!na_a) pa = STRING_PTR(a);
const SEXP *restrict pb; if (!na_b) pb = STRING_PTR(b);
const SEXP *restrict pna; if (!na_n) pna = STRING_PTR(na);
const SEXP na = NA_STRING;
for (int64_t i=0; i<len0; ++i) {
SET_STRING_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna));
SET_STRING_ELT(
ans, i, pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask])
);
}
} break;
case CPLXSXP : {
Rcomplex *restrict pans = COMPLEX(ans);
const Rcomplex *restrict pa = COMPLEX(a);
const Rcomplex *restrict pb = COMPLEX(b);
const Rcomplex pna = nonna ? COMPLEX(na)[0] : NA_CPLX;
const Rcomplex *restrict pa; if (!na_a) pa = COMPLEX(a);
const Rcomplex *restrict pb; if (!na_b) pb = COMPLEX(b);
const Rcomplex *restrict pna; if (!na_n) pna = COMPLEX(na);
const Rcomplex na = NA_CPLX;
#pragma omp parallel for num_threads(getDTthreads(len0, true))
for (int64_t i=0; i<len0; ++i) {
pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
pans[i] = pl[i]==0 ?
(na_b ? na : pb[i & bmask]) :
pl[i]==1 ?
(na_a ? na : pa[i & amask]) :
(na_n ? na : pna[i & nmask]);
}
} break;
case VECSXP : {
const SEXP *restrict pa = SEXPPTR_RO(a);
const SEXP *restrict pb = SEXPPTR_RO(b);
const SEXP *restrict pna = SEXPPTR_RO(na);
const SEXP *restrict pa; if (!na_a) pa = SEXPPTR_RO(a);
const SEXP *restrict pb; if (!na_b) pb = SEXPPTR_RO(b);
const SEXP *restrict pna; if (!na_n) pna = SEXPPTR_RO(na);
for (int64_t i=0; i<len0; ++i) {
if (pl[i]==NA_LOGICAL) {
if (nonna)
SET_VECTOR_ELT(ans, i, pna[0]);
continue; // allocVector already initialized with R_NilValue
if (pl[i] == NA_LOGICAL) {
if (!na_n) SET_VECTOR_ELT(ans, i, pna[i & nmask]);
} else if (pl[i]==0) {
if (!na_b) SET_VECTOR_ELT(ans, i, pb[i & bmask]);
} else if (pl[i]==1) {
if (!na_a) SET_VECTOR_ELT(ans, i, pa[i & amask]);
}
SET_VECTOR_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : pa[i & amask]);
}
} break;
default:
Expand Down

0 comments on commit 03dff91

Please sign in to comment.