From 0a0e39b7bff5565ea77d7f55338f5638c91a587c Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 7 Mar 2020 09:44:03 +0000 Subject: [PATCH 1/8] fifelse() coerces NA to other types and supports vector na argument --- NEWS.md | 2 + inst/tests/tests.Rraw | 20 ++++- man/fifelse.Rd | 4 +- src/fifelse.c | 181 ++++++++++++++++++++++++++---------------- 4 files changed, 132 insertions(+), 75 deletions(-) diff --git a/NEWS.md b/NEWS.md index 71fd76aa65..051ba973a6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -81,6 +81,8 @@ unit = "s") 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. +15. `fifelse()` now coerces the logical `NA` to other types and the `na` argument supports vectorized input. Thanks to @shrektan for the PR. + ## BUG FIXES 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7cc6819e8f..e7447a47a9 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15704,7 +15704,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.") -test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please") +test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'no' is of type character but expect logical") test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).") test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).") test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec))) @@ -15730,7 +15730,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na) test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na)) test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na)) test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na)) -test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please") +test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but expect double") test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na)) test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na)) test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na)) @@ -16167,7 +16167,7 @@ test(2100.03, fifelse(test_vec_na, TRUE, FALSE, TRUE), as.logical(out_vec_na)) test(2100.04, fifelse(test_vec_na, "1", "0","2"), as.character(out_vec_na)) test(2100.05, fifelse(test_vec_na, 1+0i, 0+0i, 2+0i), as.complex(out_vec_na)) test(2100.06, fifelse(c(TRUE,FALSE,NA), list(1:5), list(5:1), list(15:11)), list(1:5,5:1,15:11)) -test(2100.07, fifelse(test_vec_na, 1, 0, 2L), error = "'yes' is of type double but 'na' is of type integer. Please make sure that both arguments have the same type.") +test(2100.07, fifelse(test_vec_na, 1, 0, 2L), c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2)) # corece na test(2100.08, fifelse(test_vec_na, 1, 0, c(2,3)), error = "Length of 'na' is 2 but must be 1") test(2100.09, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), as.Date("2019-08-29")), as.Date(c(18139, 18138, 18138, 18138, 18138, 18137), origin = '1970-01-01')) test(2100.10, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), 18137), error = "'yes' has different class than 'na'. Please make sure that both arguments have the same class.") @@ -16846,3 +16846,17 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# fifelse supports vector na arguments and corece NA to other types +test(2139.01, fifelse(c(TRUE, FALSE, TRUE, NA), 1L, 2L, 1.0), c(1, 2, 1, 1)) +test(2139.02, fifelse(c(TRUE, FALSE, TRUE, NA), 1, 2, 1L), c(1, 2, 1, 1)) +test(2139.03, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, 101:104), c(1L, 12L, 3L, 104L)) +test(2139.04, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, 101:104), c(NA, 12L, NA, 104L)) +test(2139.05, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, 101:104), c(1L, NA, 3L, 104L)) +test(2139.06, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, NA), c(1L, 12L, 3L, NA)) +test(2139.07, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, NA), c(1L, NA, 3L, NA)) +test(2139.08, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA), c(NA, NA, NA, NA)) +test(2139.09, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA_character_), rep(NA_character_, 4L)) +test(2139.10, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, 101:104), c(NA, NA, NA, 104L)) +test(2139.11, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, NA), c(NA, 12L, NA, NA)) +test(2139.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), as.Date(c(NA, NA, NA, "2020-01-01"))) diff --git a/man/fifelse.Rd b/man/fifelse.Rd index 2fe355c98c..4165dd796d 100644 --- a/man/fifelse.Rd +++ b/man/fifelse.Rd @@ -11,10 +11,10 @@ \arguments{ \item{test}{ A logical vector. } \item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. } - \item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and length \code{1}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. } + \item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and its length must be either \code{1} or the same length of \code{test}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. } } \details{ -In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes. +In contrast to \code{\link[base]{ifelse}} attributes are copied from the first non-\code{NA} argument to the output. This is useful when returning \code{Date}, \code{factor} or other classes. } \value{ A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}. diff --git a/src/fifelse.c b/src/fifelse.c index 3a05fce6d3..8887b26886 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -11,127 +11,168 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { const int64_t len0 = xlength(l); const int64_t len1 = xlength(a); const int64_t len2 = xlength(b); + const int64_t len3 = xlength(na); SEXPTYPE ta = TYPEOF(a); SEXPTYPE tb = TYPEOF(b); + SEXPTYPE tn = TYPEOF(na); + bool na_a = ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; + bool na_b = tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; + bool na_n = isNull(na) || (tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); + int nprotect = 0; - - if (ta != tb) { - if (ta == INTSXP && tb == REALSXP) { - SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++; - a = tmp; + SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP; + if (!(na_a && na_b && na_n)) { + if (!na_b && tans==INTSXP && tb==REALSXP) tans = tb; + if (!na_n && tans==INTSXP && tn==REALSXP) tans = tn; + if (!na_a && tans==REALSXP && ta==INTSXP) { + a = PROTECT(coerceVector(a, REALSXP)); nprotect++; ta = REALSXP; - } else if (ta == REALSXP && tb == INTSXP) { - SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++; - b = tmp; + } + if (!na_a && tans != ta) + error(_("'yes' is of type %s but expect %s."), type2char(ta), type2char(tans)); + if (!na_b && tans==REALSXP && tb==INTSXP) { + b = PROTECT(coerceVector(b, REALSXP)); nprotect++; tb = REALSXP; - } else { - error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb)); } + if (!na_b && tans != tb) + error(_("'no' is of type %s but expect %s."), type2char(tb), type2char(tans)); + if (!na_n && tans==REALSXP && tn==INTSXP) { + na = PROTECT(coerceVector(na, REALSXP)); nprotect++; + tn = REALSXP; + } + if (!na_n && tans != tn) + error(_("'na' is of type %s but expect %s."), type2char(tn), type2char(tans)); } - - if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) + + if (!na_a && !na_b && !R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class.")); UNPROTECT(2); - - if (isFactor(a)) { - if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0)) + if (!na_a && !na_n && !R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) + error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class.")); + UNPROTECT(2); + if (!na_b && !na_n && !R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) + error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class.")); + UNPROTECT(2); + + if (isFactor(a) || isFactor(b)) { + if (!na_a && !na_b && !R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0)) error(_("'yes' and 'no' are both type factor but their levels are different.")); UNPROTECT(2); + if (!na_a && !na_n && !R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) + error(_("'yes' and 'na' are both type factor but their levels are different.")); + UNPROTECT(2); + if (!na_b && !na_n && !R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) + error(_("'no' and 'na' are both type factor but their levels are different.")); + UNPROTECT(2); } - if (len1!=1 && len1!=len0) + if (!na_a && len1!=1 && len1!=len0) error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0); - if (len2!=1 && len2!=len0) + if (!na_b && len2!=1 && len2!=len0) error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0); + if (!na_n && len3!=1 && len3!=len0) + error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0); const int64_t amask = len1>1 ? INT64_MAX : 0; // for scalar 'a' bitwise AND will reset iterator to first element: pa[i & amask] -> pa[0] const int64_t bmask = len2>1 ? INT64_MAX : 0; + const int64_t nmask = len3>1 ? INT64_MAX : 0; const int *restrict pl = LOGICAL(l); - SEXP ans = PROTECT(allocVector(ta, len0)); nprotect++; - copyMostAttrib(a, ans); - - bool nonna = !isNull(na); - if (nonna) { - if (xlength(na) != 1) - error(_("Length of 'na' is %"PRId64" but must be 1"), (int64_t)xlength(na)); - SEXPTYPE tn = TYPEOF(na); - if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) { - nonna = false; - } else { - if (tn != ta) - error(_("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tn)); - if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) - error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class.")); - UNPROTECT(2); - if (isFactor(a)) { - if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) - error(_("'yes' and 'na' are both type factor but their levels are different.")); - UNPROTECT(2); - } - } - } + SEXP ans = PROTECT(allocVector(tans, len0)); nprotect++; + if (!na_a) + copyMostAttrib(a, ans); + else if (!na_b) + copyMostAttrib(b, ans); + else if (!na_n) + copyMostAttrib(na, ans); - switch(ta) { + switch(tans) { case LGLSXP: { int *restrict pans = LOGICAL(ans); - const int *restrict pa = LOGICAL(a); - const int *restrict pb = LOGICAL(b); - const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL; + const int *restrict pa; if (!na_a) pa = LOGICAL(a); + const int *restrict pb; if (!na_b) pb = LOGICAL(b); + const int *restrict pna; if (!na_n) pna = LOGICAL(na); + const int na = NA_LOGICAL; #pragma omp parallel for num_threads(getDTthreads()) for (int64_t i=0; i Date: Sat, 7 Mar 2020 10:21:58 +0000 Subject: [PATCH 2/8] better write the na check in a seperated if --- src/fifelse.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index 8887b26886..41f10e4829 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -166,12 +166,12 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { const SEXP *restrict pb; if (!na_b) pb = SEXPPTR_RO(b); const SEXP *restrict pna; if (!na_n) pna = SEXPPTR_RO(na); for (int64_t i=0; i Date: Sat, 7 Mar 2020 23:49:07 +0800 Subject: [PATCH 3/8] fix a bug that UNPROTECT may be called invalidly --- src/fifelse.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index 41f10e4829..fb5c02dd28 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -44,27 +44,39 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { error(_("'na' is of type %s but expect %s."), type2char(tn), type2char(tans)); } - if (!na_a && !na_b && !R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) - error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class.")); - UNPROTECT(2); - if (!na_a && !na_n && !R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) - error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class.")); - UNPROTECT(2); - if (!na_b && !na_n && !R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) - error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class.")); - UNPROTECT(2); - - if (isFactor(a) || isFactor(b)) { - if (!na_a && !na_b && !R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0)) - error(_("'yes' and 'no' are both type factor but their levels are different.")); + if (!na_a && !na_b) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) + error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class.")); UNPROTECT(2); - if (!na_a && !na_n && !R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) - error(_("'yes' and 'na' are both type factor but their levels are different.")); + } + if (!na_a && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) + error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class.")); UNPROTECT(2); - if (!na_b && !na_n && !R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) - error(_("'no' and 'na' are both type factor but their levels are different.")); + } + if (!na_b && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) + error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class.")); UNPROTECT(2); } + + if (isFactor(a) || isFactor(b)) { + if (!na_a && !na_b) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0)) + error(_("'yes' and 'no' are both type factor but their levels are different.")); + UNPROTECT(2); + } + if (!na_a && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) + error(_("'yes' and 'na' are both type factor but their levels are different.")); + UNPROTECT(2); + } + if (!na_b && !na_n) { + if (!R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) + error(_("'no' and 'na' are both type factor but their levels are different.")); + UNPROTECT(2); + } + } if (!na_a && len1!=1 && len1!=len0) error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0); From 2b69bc4c3dc3f284ea274c8bb9e41cc92f6a13e0 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sun, 8 Mar 2020 00:27:18 +0800 Subject: [PATCH 4/8] improve the error message when types of arguments are not consistent --- inst/tests/tests.Rraw | 4 ++-- src/fifelse.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e7447a47a9..75e9c353c3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15704,7 +15704,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.") -test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'no' is of type character but expect logical") +test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'no' is of type character but 'yes' is logical. Please") test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).") test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).") test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec))) @@ -15730,7 +15730,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na) test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na)) test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na)) test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na)) -test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but expect double") +test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but 'yes' is double. Please") test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na)) test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na)) test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na)) diff --git a/src/fifelse.c b/src/fifelse.c index fb5c02dd28..aacd6d42a8 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -22,26 +22,26 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { int nprotect = 0; SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP; if (!(na_a && na_b && na_n)) { + SEXPTYPE ta0 = ta, tb0 = tb, tn0 = tn; // record the original type for error message use if (!na_b && tans==INTSXP && tb==REALSXP) tans = tb; if (!na_n && tans==INTSXP && tn==REALSXP) tans = tn; if (!na_a && tans==REALSXP && ta==INTSXP) { a = PROTECT(coerceVector(a, REALSXP)); nprotect++; ta = REALSXP; } - if (!na_a && tans != ta) - error(_("'yes' is of type %s but expect %s."), type2char(ta), type2char(tans)); + // it's not possible that non-NA `yes`' type will be different from `tans` if (!na_b && tans==REALSXP && tb==INTSXP) { b = PROTECT(coerceVector(b, REALSXP)); nprotect++; tb = REALSXP; } if (!na_b && tans != tb) - error(_("'no' is of type %s but expect %s."), type2char(tb), type2char(tans)); + error(_("'no' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tb0), tans==ta0 ? "yes" : "na", tans==ta0 ? type2char(ta0) : type2char(tn0)); if (!na_n && tans==REALSXP && tn==INTSXP) { na = PROTECT(coerceVector(na, REALSXP)); nprotect++; tn = REALSXP; } if (!na_n && tans != tn) - error(_("'na' is of type %s but expect %s."), type2char(tn), type2char(tans)); + error(_("'na' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tn0), tans==ta0 ? "yes" : "no", tans==ta0 ? type2char(ta0) : type2char(tb0)); } if (!na_a && !na_b) { From 0acf214e4b6247375bbe0332d295737e9dc11d54 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 7 Mar 2020 16:39:23 +0000 Subject: [PATCH 5/8] more tests --- inst/tests/tests.Rraw | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 75e9c353c3..1dda18609b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16847,7 +16847,7 @@ test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) -# fifelse supports vector na arguments and corece NA to other types +# by @shrektan: fifelse supports vector na arguments and corece NA to other types test(2139.01, fifelse(c(TRUE, FALSE, TRUE, NA), 1L, 2L, 1.0), c(1, 2, 1, 1)) test(2139.02, fifelse(c(TRUE, FALSE, TRUE, NA), 1, 2, 1L), c(1, 2, 1, 1)) test(2139.03, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, 101:104), c(1L, 12L, 3L, 104L)) @@ -16860,3 +16860,7 @@ test(2139.09, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA_character_), rep(NA_c test(2139.10, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, 101:104), c(NA, NA, NA, 104L)) test(2139.11, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, NA), c(NA, 12L, NA, NA)) test(2139.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), as.Date(c(NA, NA, NA, "2020-01-01"))) +test(2139.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message +test(2139.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")), error="'no' has different class than 'na'. Please") +test(2139.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different") +test(2139.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA From b9f33e1cdeb5bf51aae1fc3ca4b97d0cb4757b97 Mon Sep 17 00:00:00 2001 From: shrektan Date: Fri, 14 May 2021 22:24:04 +0800 Subject: [PATCH 6/8] only check NA when the length is not zero --- src/fifelse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index c62e6f76ed..591f59db2f 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -15,9 +15,9 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { SEXPTYPE ta = TYPEOF(a); SEXPTYPE tb = TYPEOF(b); SEXPTYPE tn = TYPEOF(na); - bool na_a = ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; - bool na_b = tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; - bool na_n = isNull(na) || (tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); + bool na_a = len1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; + bool na_b = len2 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; + bool na_n = isNull(na) || (len3 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); int nprotect = 0; SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP; From 9d3a7fbf54b8409f57faeafa1f312b802a87f795 Mon Sep 17 00:00:00 2001 From: shrektan Date: Fri, 14 May 2021 22:39:57 +0800 Subject: [PATCH 7/8] na_a/b/n should stand for the scalar NA --- src/fifelse.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index 591f59db2f..b1c13b8aeb 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -15,9 +15,10 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { SEXPTYPE ta = TYPEOF(a); SEXPTYPE tb = TYPEOF(b); SEXPTYPE tn = TYPEOF(na); - bool na_a = len1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; - bool na_b = len2 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; - bool na_n = isNull(na) || (len3 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); + // na_a/b/n means a scalar NA (or NULL for the na argument), which is considered to be coerced into other types + bool na_a = len1==1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; + bool na_b = len2==1 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; + bool na_n = isNull(na) || (len3==1 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); int nprotect = 0; SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP; From 0c810d28cce01d18289abde5572177ea405cb329 Mon Sep 17 00:00:00 2001 From: shrektan Date: Fri, 14 May 2021 22:41:12 +0800 Subject: [PATCH 8/8] check the length of the arguments earlier --- src/fifelse.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index b1c13b8aeb..e5d22a1eea 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -19,6 +19,13 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { bool na_a = len1==1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL; bool na_b = len2==1 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL; bool na_n = isNull(na) || (len3==1 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL); + + if (!na_a && len1!=1 && len1!=len0) + error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0); + if (!na_b && len2!=1 && len2!=len0) + error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0); + if (!na_n && len3!=1 && len3!=len0) + error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0); int nprotect = 0; SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP; @@ -79,12 +86,6 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { } } - if (!na_a && len1!=1 && len1!=len0) - error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0); - if (!na_b && len2!=1 && len2!=len0) - error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0); - if (!na_n && len3!=1 && len3!=len0) - error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0); const int64_t amask = len1>1 ? INT64_MAX : 0; // for scalar 'a' bitwise AND will reset iterator to first element: pa[i & amask] -> pa[0] const int64_t bmask = len2>1 ? INT64_MAX : 0; const int64_t nmask = len3>1 ? INT64_MAX : 0;