From d1aece623f24c15bde146596983141974ccbba98 Mon Sep 17 00:00:00 2001 From: 2005m Date: Fri, 1 Nov 2019 19:53:09 +0000 Subject: [PATCH 01/15] fcase --- NAMESPACE | 1 + NEWS.md | 13 +++ R/wrappers.R | 1 + inst/tests/tests.Rraw | 84 ++++++++++++++++++++ man/fcase.Rd | 59 ++++++++++++++ src/data.table.h | 4 + src/fifelse.c | 179 ++++++++++++++++++++++++++++++++++++++++++ src/init.c | 5 ++ 8 files changed, 346 insertions(+) create mode 100644 man/fcase.Rd diff --git a/NAMESPACE b/NAMESPACE index 49bd3a35d6..b0736c8b76 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(setNumericRounding, getNumericRounding) export(chmatch, "%chin%", chorder, chgroup) export(rbindlist) export(fifelse) +export(fcase) export(fread) export(fwrite) export(foverlaps) diff --git a/NEWS.md b/NEWS.md index f53b8fa6e7..e9d46d7f6b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,19 @@ ## NEW FEATURES +1. New function `fcase(.,default)` has been implemented in C, [#3823](https://github.com/Rdatatable/data.table/issues/3823). It is comparable to `dplyr::case_when`. Please see `?data.table::fcase` for more details. + +```R +# default 4 threads on a laptop with 16GB RAM and 8 logical CPU +x = sample(1L:10L, 3e8, replace=TRUE) # 1GB +microbenchmark::microbenchmark( + dplyr::case_when(x < 5L ~ 1L, x > 5L ~ 3L), + data.table::fcase((x < 5L, 1L, x > 5L, 3L)), + times = 5L, + unit = "s" +) +``` + ## BUG FIXES ## NOTES diff --git a/R/wrappers.R b/R/wrappers.R index d1ca379709..b581ea589d 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -6,6 +6,7 @@ fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE) setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) +fcase = function(...,default=NA) .External(CfcaseR, default,...) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) coerceFill = function(x) .Call(CcoerceFillR, x) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 81d9fcaebd..745eb539e6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16370,6 +16370,90 @@ test(2120.07, iDT[(i_id), order(e_date, e_time)], c(3L,4L,1L,2L)) # wrapping wi test(2120.08, tmp[iDT[(i_id), order(e_date, e_time)]], # different result with the NA data.table(i_id=c("A",NA,"B","C"), N=c(5L,NA,5L,5L))) +# fcase, #3823 +test_vec1 = -5L:5L < 0L +test_vec2 = -5L:5L > 0L +test_vec3 = -5L:5L < 5L +test_vec_na1 = c(test_vec1, NA) +test_vec_na2 = c(test_vec2, NA) +out_vec = c(1,1,1,1,1,NA,0,0,0,0,0) +out_vec_def = c(1,1,1,1,1,2,0,0,0,0,0) +out_vec_na = c(1,1,1,1,1,NA,0,0,0,0,0,NA) +out_vec_oc = c(1,1,1,1,1,NA,NA,NA,NA,NA,NA) + +test(2121.01, fcase(test_vec1, 1L, test_vec2, 0L), as.integer(out_vec)) +test(2121.02, fcase(test_vec1, 1, test_vec2, 0), out_vec) +test(2121.03, fcase(test_vec1, "1", test_vec2, "0"), as.character(out_vec)) +test(2121.04, fcase(test_vec1, TRUE, test_vec2, FALSE), as.logical(out_vec)) +test(2121.05, fcase(test_vec1, 1+0i, test_vec2, 0+0i), as.complex(out_vec)) +test(2121.06, fcase(test_vec1, list(1), test_vec2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) +test(2121.07, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14")), c(rep(as.Date("2019-10-11"),5),NA,rep(as.Date("2019-10-14"),5))) +test(2121.08, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3])), factor(c(rep("a",5),NA,rep("b",5)), levels=letters[1:3])) +test(2121.09, fcase(test_vec1, 1L, test_vec2, 0L, default=2L), as.integer(out_vec_def)) +test(2121.10, fcase(test_vec1, 1, test_vec2, 0,default=2), out_vec_def) +test(2121.11, fcase(test_vec1, "1", test_vec2, "0", default ="2"), as.character(out_vec_def)) +test(2121.12, fcase(test_vec1, TRUE, test_vec2, FALSE, default=TRUE), as.logical(out_vec_def)) +test(2121.13, fcase(test_vec1, 1+0i, test_vec2, 0+0i, default=2+0i), as.complex(out_vec_def)) +test(2121.14, fcase(test_vec1, list(1), test_vec2, list(0),default=list(2)), list(1,1,1,1,1, 2, 0, 0, 0, 0, 0)) +test(2121.15, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default=as.Date("2019-10-15")), c(rep(as.Date("2019-10-11"),5),as.Date("2019-10-15"),rep(as.Date("2019-10-14"),5))) +test(2121.16, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3]),default=factor("c", levels=letters[1:3])), factor(c(rep("a",5),"c",rep("b",5)), levels=letters[1:3])) +test(2121.17, fcase(test_vec1, as.raw(1), test_vec2, as.raw(0)), error="Type raw is not supported.") +test(2121.18, fcase(test_vec1, factor("a", levels=letters[1]), test_vec2, factor("b", levels=letters[1:3])), error="Argument #2 and argument #4 are both factor but their levels are different.") +test(2121.19, fcase(test_vec1, factor("a", levels=letters[1:2]), test_vec2, factor("b", levels=letters[1:2]),default=factor("c", levels=letters[1:3])), error="Resulting value and 'default' are both type factor but their levels are different.") +test(2121.20, fcase(test_vec1, 1L:10L, test_vec2, 3L:12L, test_vec2), error="Please supply an even number of arguments in ..., consisting of logical condition, resulting value pairs (in that order); received 6 inputs (including 'default' argument).") +test(2121.21, fcase(test_vec1, 1L, test_vec2, 3), error="Argument #4 is of type double, however argument #2 is of type integer. Please make sure all output values have the same type.") +test(2121.22, fcase(test_vec1, "FALSE", test_vec2, TRUE), error="Argument #4 is of type logical, however argument #2 is of type character. Please make sure all output values have the same type.") +test(2121.23, fcase(test_vec1, "FALSE", test_vec2, 5L), error="Argument #4 is of type integer, however argument #2 is of type character. Please make sure all output values have the same type.") +test(2121.24, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default="2019-10-15"), error="Resulting value is of type double but 'default' is of type character. Please make sure that both arguments have the same type.") +test(2121.25, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default=123), error="Resulting value has different class than 'default'. Please make sure that both arguments have the same class.") +if(test_bit64) { + i=as.integer64(1:12)+3e9 + test(2121.26, fcase(test_vec_na1, i, test_vec_na2, i+100), c(i[1L:5L], as.integer64(NA),i[7L:12L]+100)) +} +if(test_nanotime) { + n=nanotime(1:12) + test(2121.27, fcase(test_vec_na1, n, test_vec_na2, n+100), c(n[1L:5L], nanotime(NA),n[7L:12L]+100)) +} +test(2121.28, fcase(test_vec1, rep(1L,11L), test_vec2, rep(0L,11L)), as.integer(out_vec)) +test(2121.29, fcase(test_vec1, rep(1,11L), test_vec2, rep(0,11L)), out_vec) +test(2121.30, fcase(test_vec1, rep("1",11L), test_vec2, rep("0",11L)), as.character(out_vec)) +test(2121.31, fcase(test_vec1, rep(TRUE,11L), test_vec2, rep(FALSE,11L)), as.logical(out_vec)) +test(2121.32, fcase(test_vec1, rep(1+0i,11L), test_vec2, rep(0+0i,11L)), as.complex(out_vec)) +test(2121.33, fcase(test_vec1, rep(list(1),11L), test_vec2, rep(list(0),11L)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) +test(2121.34, fcase(test_vec1, rep(as.Date("2019-10-11"),11L), test_vec2, rep(as.Date("2019-10-14"),11L)), c(rep(as.Date("2019-10-11"),5),NA,rep(as.Date("2019-10-14"),5))) +test(2121.35, fcase(test_vec1, rep(factor("a", levels=letters[1:3]),11L), test_vec2, rep(factor("b", levels=letters[1:3]),11L)), factor(c(rep("a",5),NA,rep("b",5)), levels=letters[1:3])) +test(2121.36, fcase(test_vec_na1, 1L, test_vec_na2, 0L), as.integer(out_vec_na)) +test(2121.37, fcase(test_vec_na1, 1, test_vec_na2, 0), out_vec_na) +test(2121.38, fcase(test_vec_na1, "1", test_vec_na2, "0"), as.character(out_vec_na)) +test(2121.39, fcase(test_vec_na1, TRUE, test_vec_na2, FALSE), as.logical(out_vec_na)) +test(2121.40, fcase(test_vec_na1, 1+0i, test_vec_na2, 0+0i), as.complex(out_vec_na)) +test(2121.41, fcase(test_vec_na1, list(1), test_vec_na2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0,NULL)) +test(2121.42, fcase(c(TRUE,TRUE,TRUE,FALSE,FALSE),factor(NA,levels=letters[1:5]),c(FALSE,FALSE,FALSE,TRUE,TRUE),factor(letters[1:5])),factor(c(NA,NA,NA,"d","e"),levels=letters[1:5])) +test(2121.43, fcase(c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(NA,levels=letters[1:6]),c(FALSE,FALSE,FALSE,TRUE,NA,TRUE),factor(letters[1:6])),factor(c(NA,NA,NA,"d",NA,"f"),levels=letters[1:6])) +test(2121.44, fcase(c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(letters[1:6]),c(FALSE,FALSE,FALSE,TRUE,NA,TRUE),factor(NA,levels = letters[1:6])),factor(c("a","b","c",NA,NA,NA),levels=letters[1:6])) +test(2121.45, fcase(c(TRUE,NA,TRUE,FALSE,FALSE,FALSE),factor(NA),c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(NA)),factor(c(NA,NA,NA,NA,NA,NA))) +test(2121.46, fcase(TRUE, list(data.table(1:5)), FALSE, list(data.table(5:1))), list(data.table(1:5))) +test(2121.47, fcase(FALSE, list(data.table(1:5)), TRUE, list(data.table(5:1))), list(data.table(5:1))) +test(2121.48, fcase(TRUE, list(data.frame(1:5)), FALSE, list(data.frame(5:1))), list(data.frame(1:5))) +test(2121.49, fcase(FALSE, list(data.frame(1:5)), TRUE, list(data.frame(5:1))), list(data.frame(5:1))) +test(2121.50, fcase(1L,1L,TRUE,0L),error = "Argument #1 must be logical.") +test(2121.51, fcase(TRUE,1L,5L,0L),error = "Argument #3 must be logical.") +test(2121.52, fcase(test_vec1, 1L, test_vec2, 0L, test_vec3, 2L), as.integer(out_vec_def)) +test(2121.53, fcase(test_vec1, 1, test_vec2, 0, test_vec3, 2), out_vec_def) +test(2121.54, fcase(test_vec1, "1", test_vec2, "0", test_vec3, "2"), as.character(out_vec_def)) +test(2121.55, fcase(test_vec1, TRUE, test_vec2, FALSE, test_vec3, TRUE), as.logical(out_vec_def)) +test(2121.56, fcase(test_vec1, 1+0i, test_vec2, 0+0i, test_vec3, 2+0i), as.complex(out_vec_def)) +test(2121.57, fcase(test_vec1, list(1), test_vec2, list(0), test_vec3, list(2)), list(1,1,1,1,1, 2, 0, 0, 0, 0, 0)) +test(2121.58, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"), test_vec3, as.Date("2019-10-15")), c(rep(as.Date("2019-10-11"),5),as.Date("2019-10-15"),rep(as.Date("2019-10-14"),5))) +test(2121.59, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3]), test_vec3, factor("c", levels=letters[1:3])), factor(c(rep("a",5),"c",rep("b",5)), levels=letters[1:3])) +test(2121.60, fcase(test_vec1, 1L), as.integer(out_vec_oc)) +test(2121.61, fcase(test_vec1, 1), out_vec_oc) +test(2121.62, fcase(test_vec1, "1"), as.character(out_vec_oc)) +test(2121.63, fcase(test_vec1, TRUE), as.logical(out_vec_oc)) +test(2121.64, fcase(test_vec1, 1+0i), as.complex(out_vec_oc)) +test(2121.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, NULL, NULL)) +test(2121.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) +test(2121.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) ################################### # Add new tests above this line # diff --git a/man/fcase.Rd b/man/fcase.Rd new file mode 100644 index 0000000000..23167eea71 --- /dev/null +++ b/man/fcase.Rd @@ -0,0 +1,59 @@ +\name{fcase} +\alias{fcase} +\title{felse } +\description{ +\code{fcase} is fast implementation of SQL \code{CASE WHEN} statement for R. This function is a nested version of \code{fifelse}. It is comparable to \code{dplyr::case_when} and supports \code{bit64}'s \code{integer64} and \code{nanotime} classes. +} +\usage{ + fcase(..., default=NA) +} +\arguments{ +\item{...}{A sequence consisting of logical condition \code{when}, resulting value \code{value} pairs in the following order \code{when1, value1, when2,value2,...,whenN,valueN}. Logical conditions \code{when1,when2,...whenN} must all have the same length, type and attributes. The length of \code{value} may either be of length of \code{when} or 1. Please see examples section for further details.} +\item{default}{Default value is \code{NA}. This is the value to return if none of the logical condition \code{when1,when2,...whenN} is not \code{TRUE}.} +} +\value{ + It returns a value with the same length as logical conditions in \code{...} filled with corresponding resulting value from \code{...} or eventually \code{default}. Attributes of logical conditions in \code{...} are preserved. + +} +\seealso{ + \code{\link{fifelse}} +} +\examples{ +set.seed(123) +x = sample(1:10, 5, replace = TRUE) +data.table::fcase( + x < 5L, 1L, + x > 5L, 3L +) +# [1] 1 3 NA 3 3 + +data.table::fcase( + x < 5L, 1L:5L, + x > 5L, 3L:7L +) +# [1] 1 4 NA 6 7 + +# fcase preserves attributes, example with dates +data.table::case{ + x < 5L, as.Date("2019-10-11"), + x > 5L, as.Date("2019-10-14") +} +# [1] "2019-10-11" "2019-10-14" NA "2019-10-14" "2019-10-14" + +# fcase example with factor +data.table::case{ + x < 5L, factor("a", levels=letters[1:3]), + x > 5L, factor("b", levels=letters[1:3]) +} +# [1] a b b b +# Levels: a b c + +# Example of using the 'default' arguement +data.table::fcase( + x < 5L, 1L, + x > 5L, 3L, + default = 5L +) +# [1] 1 3 5 3 3 +} +\keyword{ data } diff --git a/src/data.table.h b/src/data.table.h index 1f5c9cd5ff..a9fee23e26 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -232,3 +232,7 @@ SEXP coerceUtf8IfNeeded(SEXP x); char *end(char *start); void ansMsg(ans_t *ans, int n, bool verbose, const char *func); SEXP testMsgR(SEXP status, SEXP x, SEXP k); + +//fifelse.c +SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); +SEXP fcaseR(SEXP args); diff --git a/src/fifelse.c b/src/fifelse.c index f702f1c732..cf2743fbc0 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -140,3 +140,182 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { UNPROTECT(nprotect); return ans; } + +SEXP fcaseR(SEXP args) { + int n=length(args)-2; + if (n % 2) { + error("Please supply an even number of arguments in ..., consisting of logical condition," + " resulting value pairs (in that order); received %d inputs (including 'default' argument).", n+1); + } + args = CDR(args); + SEXP na = CAR(args); args = CDR(args); + int nprotect = 0, l = 0; + int64_t len0=0, len1=0, len2=0, idx=0; + SEXP ans = R_NilValue, value0 = R_NilValue, tracker = R_NilValue; + SEXPTYPE type0; + bool nonna = !isNull(na), imask = true; + int *restrict p = NULL; + n = n/2; + for (int i=0; i1 ? INT64_MAX : 0; + switch(TYPEOF(outs)) { + case LGLSXP: { + const int *restrict pouts = LOGICAL(outs); + int *restrict pans = LOGICAL(ans); + const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL; + for (int64_t j=0; j Date: Fri, 1 Nov 2019 23:31:44 +0000 Subject: [PATCH 02/15] fix fcase Rd --- man/fcase.Rd | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/man/fcase.Rd b/man/fcase.Rd index 23167eea71..b086cb9d51 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -1,6 +1,6 @@ \name{fcase} \alias{fcase} -\title{felse } +\title{fcase} \description{ \code{fcase} is fast implementation of SQL \code{CASE WHEN} statement for R. This function is a nested version of \code{fifelse}. It is comparable to \code{dplyr::case_when} and supports \code{bit64}'s \code{integer64} and \code{nanotime} classes. } @@ -13,39 +13,37 @@ } \value{ It returns a value with the same length as logical conditions in \code{...} filled with corresponding resulting value from \code{...} or eventually \code{default}. Attributes of logical conditions in \code{...} are preserved. - } \seealso{ \code{\link{fifelse}} } \examples{ -set.seed(123) -x = sample(1:10, 5, replace = TRUE) +x = 1:10 data.table::fcase( x < 5L, 1L, x > 5L, 3L ) -# [1] 1 3 NA 3 3 +# [1] 1 1 1 1 NA 3 3 3 3 3 data.table::fcase( - x < 5L, 1L:5L, - x > 5L, 3L:7L + x < 5L, 1L:10L, + x > 5L, 3L:12L ) -# [1] 1 4 NA 6 7 +# [1] 1 2 3 4 NA 8 9 10 11 12 # fcase preserves attributes, example with dates -data.table::case{ +data.table::fcase( x < 5L, as.Date("2019-10-11"), x > 5L, as.Date("2019-10-14") -} -# [1] "2019-10-11" "2019-10-14" NA "2019-10-14" "2019-10-14" +) +# [1] "2019-10-11" "2019-10-11" "2019-10-11" "2019-10-11" NA "2019-10-14" "2019-10-14" "2019-10-14" "2019-10-14" "2019-10-14" # fcase example with factor -data.table::case{ +data.table::fcase( x < 5L, factor("a", levels=letters[1:3]), x > 5L, factor("b", levels=letters[1:3]) -} -# [1] a b b b +) +# [1] a a a a b b b b b # Levels: a b c # Example of using the 'default' arguement @@ -54,6 +52,6 @@ data.table::fcase( x > 5L, 3L, default = 5L ) -# [1] 1 3 5 3 3 +# [1] 1 1 1 1 5 3 3 3 3 3 } \keyword{ data } From e4deb09d3557a5ca8faf446306ac02088ca93504 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 2 Nov 2019 11:17:59 +0000 Subject: [PATCH 03/15] add more tests --- inst/tests/tests.Rraw | 4 ++++ src/fifelse.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 745eb539e6..25a0a00f7a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16454,6 +16454,10 @@ test(2121.64, fcase(test_vec1, 1+0i), as.complex(out_vec_oc)) test(2121.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, NULL, NULL)) test(2121.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) test(2121.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) +test(2121.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' is 2 but must be 1.") +test(2121.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 is of length 12, however argument #1 is of length 11. Please make sure all logical conditions have the same length.") +test(2121.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") +test(2121.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #3 is 2 but must 1 or length of logical condition (11).") ################################### # Add new tests above this line # diff --git a/src/fifelse.c b/src/fifelse.c index cf2743fbc0..6aa61fa373 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -170,7 +170,7 @@ SEXP fcaseR(SEXP args) { value0 = outs; if (nonna) { if (xlength(na) != 1) { - error("Length of 'default' is %lld but must be 1", xlength(na)); + error("Length of 'default' is %lld but must be 1.", xlength(na)); } SEXPTYPE tn = TYPEOF(na); if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) { @@ -224,7 +224,7 @@ SEXP fcaseR(SEXP args) { } len1 = xlength(outs); if (len1 != len0 && len1 != 1) { - error("Length of output value #%d is %lld but must 1 or length of logical condition (%lld)", i*2+1, len1, len0); + error("Length of output value #%d is %lld but must 1 or length of logical condition (%lld).", i*2+1, len1, len0); } int64_t amask = len1>1 ? INT64_MAX : 0; switch(TYPEOF(outs)) { From f5c5b020e53e12ee85e0f5fdfe102bc6ad8b2ad0 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 2 Nov 2019 15:52:20 +0000 Subject: [PATCH 04/15] fix 32bits --- inst/tests/tests.Rraw | 10 +++++----- src/fifelse.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 25a0a00f7a..6fdf966729 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16378,8 +16378,8 @@ test_vec_na1 = c(test_vec1, NA) test_vec_na2 = c(test_vec2, NA) out_vec = c(1,1,1,1,1,NA,0,0,0,0,0) out_vec_def = c(1,1,1,1,1,2,0,0,0,0,0) -out_vec_na = c(1,1,1,1,1,NA,0,0,0,0,0,NA) -out_vec_oc = c(1,1,1,1,1,NA,NA,NA,NA,NA,NA) +out_vec_na= c(1,1,1,1,1,NA,0,0,0,0,0,NA) +out_vec_oc= c(1,1,1,1,1,NA,NA,NA,NA,NA,NA) test(2121.01, fcase(test_vec1, 1L, test_vec2, 0L), as.integer(out_vec)) test(2121.02, fcase(test_vec1, 1, test_vec2, 0), out_vec) @@ -16454,10 +16454,10 @@ test(2121.64, fcase(test_vec1, 1+0i), as.complex(out_vec_oc)) test(2121.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, NULL, NULL)) test(2121.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) test(2121.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) -test(2121.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' is 2 but must be 1.") -test(2121.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 is of length 12, however argument #1 is of length 11. Please make sure all logical conditions have the same length.") +test(2121.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.") +test(2121.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument. Please make sure all logical conditions have the same length.") test(2121.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") -test(2121.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #3 is 2 but must 1 or length of logical condition (11).") +test(2121.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #3 is must either 1 or length of logical condition.") ################################### # Add new tests above this line # diff --git a/src/fifelse.c b/src/fifelse.c index 6aa61fa373..c453a2b4c8 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -170,7 +170,7 @@ SEXP fcaseR(SEXP args) { value0 = outs; if (nonna) { if (xlength(na) != 1) { - error("Length of 'default' is %lld but must be 1.", xlength(na)); + error("Length of 'default' must be 1."); } SEXPTYPE tn = TYPEOF(na); if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) { @@ -201,9 +201,9 @@ SEXP fcaseR(SEXP args) { imask = false; l = 0; if (xlength(cons) != len0) { - error("Argument #%d is of length %lld, however argument #1 is of length %lld. " + error("Argument #%d has a different length than argument. " "Please make sure all logical conditions have the same length.", - i*2+1, xlength(cons), len0); + i*2+1); } if (TYPEOF(outs) != type0) { error("Argument #%d is of type %s, however argument #2 is of type %s. " @@ -224,7 +224,7 @@ SEXP fcaseR(SEXP args) { } len1 = xlength(outs); if (len1 != len0 && len1 != 1) { - error("Length of output value #%d is %lld but must 1 or length of logical condition (%lld).", i*2+1, len1, len0); + error("Length of output value #%d is must either 1 or length of logical condition.", i*2+1); } int64_t amask = len1>1 ? INT64_MAX : 0; switch(TYPEOF(outs)) { From 7c4bbe0dd21977c44b2761868cdea9d673fc8011 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 2 Nov 2019 16:37:39 +0000 Subject: [PATCH 05/15] NEWS file --- NEWS.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index e9d46d7f6b..cee540bfc1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,17 +6,20 @@ ## NEW FEATURES -1. New function `fcase(.,default)` has been implemented in C, [#3823](https://github.com/Rdatatable/data.table/issues/3823). It is comparable to `dplyr::case_when`. Please see `?data.table::fcase` for more details. +1. New function `fcase(...,default)` has been implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823). It is comparable to `dplyr::case_when`. Please see `?data.table::fcase` for more details. ```R -# default 4 threads on a laptop with 16GB RAM and 8 logical CPU -x = sample(1L:10L, 3e8, replace=TRUE) # 1GB +x = sample(1L:10L, 3e7, replace=TRUE) # 114Mb microbenchmark::microbenchmark( dplyr::case_when(x < 5L ~ 1L, x > 5L ~ 3L), - data.table::fcase((x < 5L, 1L, x > 5L, 3L)), - times = 5L, + data.table::fcase(x < 5L, 1L, x > 5L, 3L), + times = 10L, unit = "s" ) +# Unit: seconds +# expr min lq mean median uq max neval +# dplyr::case_when(x < 5L ~ 1L, x > 5L ~ 3L) 4.14 4.17 4.24 4.25 4.31 4.35 10 +# data.table::fcase(x < 5L, 1L, x > 5L, 3L) 0.55 0.60 0.63 0.61 0.62 0.80 10 ``` ## BUG FIXES From f8d05316438b4dbcf2334ecb5273731dce1b6539 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 3 Nov 2019 11:26:45 +0800 Subject: [PATCH 06/15] spacing --- R/wrappers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/wrappers.R b/R/wrappers.R index b581ea589d..597e803409 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -6,7 +6,7 @@ fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE) setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) -fcase = function(...,default=NA) .External(CfcaseR, default,...) +fcase = function(..., default=NA) .External(CfcaseR, default, ...) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) coerceFill = function(x) .Call(CcoerceFillR, x) From 278142b4ef70d9b2c7baad86b6d21c7af252cc94 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 3 Nov 2019 11:29:24 +0800 Subject: [PATCH 07/15] grammar/wording --- man/fcase.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/fcase.Rd b/man/fcase.Rd index b086cb9d51..74d1794a53 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -2,7 +2,7 @@ \alias{fcase} \title{fcase} \description{ -\code{fcase} is fast implementation of SQL \code{CASE WHEN} statement for R. This function is a nested version of \code{fifelse}. It is comparable to \code{dplyr::case_when} and supports \code{bit64}'s \code{integer64} and \code{nanotime} classes. +\code{fcase} is a fast implementation of SQL \code{CASE WHEN} statement for R. Conceptually, \code{fcase} is a nested version of \code{\link{fifelse}} (with smarter implementation than manual nesting). It is comparable to \code{dplyr::case_when} and supports \code{bit64}'s \code{integer64} and \code{nanotime} classes. } \usage{ fcase(..., default=NA) From 14dc0621d05b0b9829d4c4d9c23c2943d2ce104f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 3 Nov 2019 11:35:53 +0800 Subject: [PATCH 08/15] more grammar/exposition in man page --- man/fcase.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/fcase.Rd b/man/fcase.Rd index 74d1794a53..feb163d651 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -8,11 +8,11 @@ fcase(..., default=NA) } \arguments{ -\item{...}{A sequence consisting of logical condition \code{when}, resulting value \code{value} pairs in the following order \code{when1, value1, when2,value2,...,whenN,valueN}. Logical conditions \code{when1,when2,...whenN} must all have the same length, type and attributes. The length of \code{value} may either be of length of \code{when} or 1. Please see examples section for further details.} -\item{default}{Default value is \code{NA}. This is the value to return if none of the logical condition \code{when1,when2,...whenN} is not \code{TRUE}.} +\item{...}{ A sequence consisting of logical condition (\code{when})-resulting value (\code{value}) \emph{pairs} in the following order \code{when1, value1, when2, value2, ..., whenN, valueN}. Logical conditions \code{when1, when2, ..., whenN} must all have the same length, type and attributes. Each \code{value} may either share length with \code{when} or be length 1. Please see Examples section for further details.} +\item{default}{ Default return value, \code{NA} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} for some entries. } } \value{ - It returns a value with the same length as logical conditions in \code{...} filled with corresponding resulting value from \code{...} or eventually \code{default}. Attributes of logical conditions in \code{...} are preserved. + Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with corresponding resulting value from \code{...}, or eventually \code{default}. Attributes of logical conditions in \code{...} are preserved. } \seealso{ \code{\link{fifelse}} From 9375930f12ff74364f2b513f54401c6960e40f2d Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 3 Nov 2019 09:21:17 +0000 Subject: [PATCH 09/15] some corrections --- NEWS.md | 31 +++++++++++++++++++++---------- man/fcase.Rd | 2 +- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index cee540bfc1..ebfa36b42b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,20 +6,31 @@ ## NEW FEATURES -1. New function `fcase(...,default)` has been implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823). It is comparable to `dplyr::case_when`. Please see `?data.table::fcase` for more details. +1. New function `fcase(...,default)` implemented in C, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when`. Please see `?data.table::fcase` for more details. ```R -x = sample(1L:10L, 3e7, replace=TRUE) # 114Mb +x = sample(1:100, 3e8, replace = TRUE) # 1 GB microbenchmark::microbenchmark( - dplyr::case_when(x < 5L ~ 1L, x > 5L ~ 3L), - data.table::fcase(x < 5L, 1L, x > 5L, 3L), - times = 10L, - unit = "s" +dplyr::case_when( + x < 10L ~ 0L, + x < 20L ~ 10L, + x < 30L ~ 20L, + x < 40L ~ 30L, + x < 50L ~ 40L, + x < 60L ~ 50L, + x > 60L ~ 60L ) -# Unit: seconds -# expr min lq mean median uq max neval -# dplyr::case_when(x < 5L ~ 1L, x > 5L ~ 3L) 4.14 4.17 4.24 4.25 4.31 4.35 10 -# data.table::fcase(x < 5L, 1L, x > 5L, 3L) 0.55 0.60 0.63 0.61 0.62 0.80 10 +data.table::fcase( + x < 10L, 0L, + x < 20L, 10L, + x < 30L, 20L, + x < 40L, 30L, + x < 50L, 40L, + x < 60L, 50L, + x > 60L, 60L +), +times = 5L, +unit ="s") ``` ## BUG FIXES diff --git a/man/fcase.Rd b/man/fcase.Rd index feb163d651..32818a870a 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -12,7 +12,7 @@ \item{default}{ Default return value, \code{NA} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} for some entries. } } \value{ - Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with corresponding resulting value from \code{...}, or eventually \code{default}. Attributes of logical conditions in \code{...} are preserved. + Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with corresponding resulting value from \code{...}, or eventually \code{default}. Attributes of output values \code{value1, value2, ...valueN} in \code{...} are preserved. } \seealso{ \code{\link{fifelse}} From ee033f0e232020189fb9cb2cbc35e025241fc42a Mon Sep 17 00:00:00 2001 From: 2005m Date: Thu, 12 Dec 2019 20:13:16 +0000 Subject: [PATCH 10/15] lazy --- NEWS.md | 10 +++++++--- R/wrappers.R | 2 +- inst/tests/tests.Rraw | 11 ++++++----- man/fcase.Rd | 4 +++- src/data.table.h | 2 +- src/fifelse.c | 26 ++++++++++++++------------ src/init.c | 4 ++-- 7 files changed, 34 insertions(+), 25 deletions(-) diff --git a/NEWS.md b/NEWS.md index ebfa36b42b..9d7da7d043 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,10 +6,10 @@ ## NEW FEATURES -1. New function `fcase(...,default)` implemented in C, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when`. Please see `?data.table::fcase` for more details. +1. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when`. Please see `?data.table::fcase` for more details. ```R -x = sample(1:100, 3e8, replace = TRUE) # 1 GB +x = sample(1:100, 3e7, replace = TRUE) # 114 MB microbenchmark::microbenchmark( dplyr::case_when( x < 10L ~ 0L, @@ -19,7 +19,7 @@ dplyr::case_when( x < 50L ~ 40L, x < 60L ~ 50L, x > 60L ~ 60L -) +), data.table::fcase( x < 10L, 0L, x < 20L, 10L, @@ -31,6 +31,10 @@ data.table::fcase( ), times = 5L, unit ="s") +# Unit: seconds +# expr min lq mean median uq max neval +# dplyr::case_when 11.57 11.71 12.22 11.82 12.00 14.02 5 +# data.table::fcase 1.49 1.55 1.67 1.71 1.73 1.86 5 ``` ## BUG FIXES diff --git a/R/wrappers.R b/R/wrappers.R index 597e803409..94fd0fa895 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -6,7 +6,7 @@ fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE) setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) -fcase = function(..., default=NA) .External(CfcaseR, default, ...) +fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(...()))) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) coerceFill = function(x) .Call(CcoerceFillR, x) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6fdf966729..39e5473e05 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16400,7 +16400,7 @@ test(2121.16, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, fact test(2121.17, fcase(test_vec1, as.raw(1), test_vec2, as.raw(0)), error="Type raw is not supported.") test(2121.18, fcase(test_vec1, factor("a", levels=letters[1]), test_vec2, factor("b", levels=letters[1:3])), error="Argument #2 and argument #4 are both factor but their levels are different.") test(2121.19, fcase(test_vec1, factor("a", levels=letters[1:2]), test_vec2, factor("b", levels=letters[1:2]),default=factor("c", levels=letters[1:3])), error="Resulting value and 'default' are both type factor but their levels are different.") -test(2121.20, fcase(test_vec1, 1L:10L, test_vec2, 3L:12L, test_vec2), error="Please supply an even number of arguments in ..., consisting of logical condition, resulting value pairs (in that order); received 6 inputs (including 'default' argument).") +test(2121.20, fcase(test_vec1, 1L:10L, test_vec2, 3L:12L, test_vec2), error="Please supply an even number of arguments in ..., consisting of logical condition, resulting value pairs (in that order); received 5 inputs.") test(2121.21, fcase(test_vec1, 1L, test_vec2, 3), error="Argument #4 is of type double, however argument #2 is of type integer. Please make sure all output values have the same type.") test(2121.22, fcase(test_vec1, "FALSE", test_vec2, TRUE), error="Argument #4 is of type logical, however argument #2 is of type character. Please make sure all output values have the same type.") test(2121.23, fcase(test_vec1, "FALSE", test_vec2, 5L), error="Argument #4 is of type integer, however argument #2 is of type character. Please make sure all output values have the same type.") @@ -16436,8 +16436,8 @@ test(2121.46, fcase(TRUE, list(data.table(1:5)), FALSE, list(data.table(5:1))), test(2121.47, fcase(FALSE, list(data.table(1:5)), TRUE, list(data.table(5:1))), list(data.table(5:1))) test(2121.48, fcase(TRUE, list(data.frame(1:5)), FALSE, list(data.frame(5:1))), list(data.frame(1:5))) test(2121.49, fcase(FALSE, list(data.frame(1:5)), TRUE, list(data.frame(5:1))), list(data.frame(5:1))) -test(2121.50, fcase(1L,1L,TRUE,0L),error = "Argument #1 must be logical.") -test(2121.51, fcase(TRUE,1L,5L,0L),error = "Argument #3 must be logical.") +test(2121.50, fcase(1L,1L,TRUE,0L), error = "Argument #1 must be logical.") +test(2121.51, fcase(TRUE,1L,5L,0L), 1L) test(2121.52, fcase(test_vec1, 1L, test_vec2, 0L, test_vec3, 2L), as.integer(out_vec_def)) test(2121.53, fcase(test_vec1, 1, test_vec2, 0, test_vec3, 2), out_vec_def) test(2121.54, fcase(test_vec1, "1", test_vec2, "0", test_vec3, "2"), as.character(out_vec_def)) @@ -16455,9 +16455,10 @@ test(2121.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, test(2121.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) test(2121.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) test(2121.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.") -test(2121.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument. Please make sure all logical conditions have the same length.") +test(2121.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument #1. Please make sure all logical conditions have the same length.") test(2121.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") -test(2121.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #3 is must either 1 or length of logical condition.") +test(2121.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 must either be 1 or length of logical condition.") +test(2121.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L) ################################### # Add new tests above this line # diff --git a/man/fcase.Rd b/man/fcase.Rd index 32818a870a..8b2bc9b632 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -36,7 +36,9 @@ data.table::fcase( x < 5L, as.Date("2019-10-11"), x > 5L, as.Date("2019-10-14") ) -# [1] "2019-10-11" "2019-10-11" "2019-10-11" "2019-10-11" NA "2019-10-14" "2019-10-14" "2019-10-14" "2019-10-14" "2019-10-14" +# [1] "2019-10-11" "2019-10-11" "2019-10-11" +# [4] "2019-10-11" NA "2019-10-14" "2019-10-14" +# [8] "2019-10-14" "2019-10-14" "2019-10-14" # fcase example with factor data.table::fcase( diff --git a/src/data.table.h b/src/data.table.h index a9fee23e26..1643fe57ee 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -235,4 +235,4 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k); //fifelse.c SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); -SEXP fcaseR(SEXP args); +SEXP fcaseR(SEXP na, SEXP rho, SEXP args); diff --git a/src/fifelse.c b/src/fifelse.c index c453a2b4c8..71993b59bb 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -141,26 +141,27 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { return ans; } -SEXP fcaseR(SEXP args) { - int n=length(args)-2; +SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { + int n=length(args); if (n % 2) { error("Please supply an even number of arguments in ..., consisting of logical condition," - " resulting value pairs (in that order); received %d inputs (including 'default' argument).", n+1); + " resulting value pairs (in that order); received %d inputs.", n); } - args = CDR(args); - SEXP na = CAR(args); args = CDR(args); int nprotect = 0, l = 0; int64_t len0=0, len1=0, len2=0, idx=0; - SEXP ans = R_NilValue, value0 = R_NilValue, tracker = R_NilValue; + SEXP ans = R_NilValue, value0 = R_NilValue, tracker = R_NilValue, cons = R_NilValue, outs = R_NilValue; + PROTECT_INDEX Icons, Iouts; + PROTECT_WITH_INDEX(cons, &Icons); nprotect++; + PROTECT_WITH_INDEX(outs, &Iouts); nprotect++; SEXPTYPE type0; bool nonna = !isNull(na), imask = true; int *restrict p = NULL; n = n/2; - for (int i=0; i1 ? INT64_MAX : 0; switch(TYPEOF(outs)) { @@ -314,6 +315,7 @@ SEXP fcaseR(SEXP args) { default: error("Type %s is not supported.", type2char(TYPEOF(outs))); } + if(0 == l) break; len2 = l; } UNPROTECT(nprotect); diff --git a/src/init.c b/src/init.c index 1c645870e0..8d4b7c3d1f 100644 --- a/src/init.c +++ b/src/init.c @@ -19,6 +19,7 @@ SEXP chmatch_R(); SEXP chmatchdup_R(); SEXP chin_R(); SEXP fifelseR(); +SEXP fcaseR(); SEXP freadR(); SEXP fwriteR(); SEXP reorder(); @@ -88,7 +89,6 @@ SEXP allNAR(); // .Externals SEXP fastmean(); -SEXP fcaseR(); static const R_CallMethodDef callMethods[] = { @@ -171,6 +171,7 @@ R_CallMethodDef callMethods[] = { {"Ccj", (DL_FUNC) &cj, -1}, {"Ccoalesce", (DL_FUNC) &coalesce, -1}, {"CfifelseR", (DL_FUNC) &fifelseR, -1}, +{"CfcaseR", (DL_FUNC) &fcaseR, -1}, {"C_lock", (DL_FUNC) &lock, -1}, // _ for these 3 to avoid Clock as in time {"C_unlock", (DL_FUNC) &unlock, -1}, {"C_islocked", (DL_FUNC) &islockedR, -1}, @@ -183,7 +184,6 @@ R_CallMethodDef callMethods[] = { static const R_ExternalMethodDef externalMethods[] = { {"Cfastmean", (DL_FUNC) &fastmean, -1}, -{"CfcaseR", (DL_FUNC) &fcaseR, -1}, {NULL, NULL, 0} }; From 8b408978c074e4761e15f93395cb7f191b8474cb Mon Sep 17 00:00:00 2001 From: 2005m Date: Fri, 13 Dec 2019 06:22:36 +0000 Subject: [PATCH 11/15] corrections --- NEWS.md | 22 ++++++- inst/tests/tests.Rraw | 144 +++++++++++++++++++++--------------------- man/fcase.Rd | 17 +++-- src/fifelse.c | 18 +++--- src/init.c | 3 - 5 files changed, 109 insertions(+), 95 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9d7da7d043..99d7e43ef4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,9 +6,27 @@ ## NEW FEATURES -1. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when`. Please see `?data.table::fcase` for more details. +1. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when` however it evaluates its arguments in a lazy way (i.e. only when needed) as shown below. Please see `?data.table::fcase` for more details. ```R +# Lazy evaluation +x = 1:10 +data.table::fcase( + x < 5L, 1L, + x >= 5L, 3L, + x == 5L, stop("provided value is an unexpected one!") +) +# [1] 1 1 1 1 3 3 3 3 3 3 + +dplyr::case_when( + x < 5L ~ 1L, + x >= 5L ~ 3L, + x == 5L ~ stop("provided value is an unexpected one!") +) +# Error in eval_tidy(pair$rhs, env = default_env) : +# provided value is an unexpected one! + +# Benchmark x = sample(1:100, 3e7, replace = TRUE) # 114 MB microbenchmark::microbenchmark( dplyr::case_when( @@ -30,7 +48,7 @@ data.table::fcase( x > 60L, 60L ), times = 5L, -unit ="s") +unit = "s") # Unit: seconds # expr min lq mean median uq max neval # dplyr::case_when 11.57 11.71 12.22 11.82 12.00 14.02 5 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 39e5473e05..929fa75eca 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16381,84 +16381,84 @@ out_vec_def = c(1,1,1,1,1,2,0,0,0,0,0) out_vec_na= c(1,1,1,1,1,NA,0,0,0,0,0,NA) out_vec_oc= c(1,1,1,1,1,NA,NA,NA,NA,NA,NA) -test(2121.01, fcase(test_vec1, 1L, test_vec2, 0L), as.integer(out_vec)) -test(2121.02, fcase(test_vec1, 1, test_vec2, 0), out_vec) -test(2121.03, fcase(test_vec1, "1", test_vec2, "0"), as.character(out_vec)) -test(2121.04, fcase(test_vec1, TRUE, test_vec2, FALSE), as.logical(out_vec)) -test(2121.05, fcase(test_vec1, 1+0i, test_vec2, 0+0i), as.complex(out_vec)) -test(2121.06, fcase(test_vec1, list(1), test_vec2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) -test(2121.07, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14")), c(rep(as.Date("2019-10-11"),5),NA,rep(as.Date("2019-10-14"),5))) -test(2121.08, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3])), factor(c(rep("a",5),NA,rep("b",5)), levels=letters[1:3])) -test(2121.09, fcase(test_vec1, 1L, test_vec2, 0L, default=2L), as.integer(out_vec_def)) -test(2121.10, fcase(test_vec1, 1, test_vec2, 0,default=2), out_vec_def) -test(2121.11, fcase(test_vec1, "1", test_vec2, "0", default ="2"), as.character(out_vec_def)) -test(2121.12, fcase(test_vec1, TRUE, test_vec2, FALSE, default=TRUE), as.logical(out_vec_def)) -test(2121.13, fcase(test_vec1, 1+0i, test_vec2, 0+0i, default=2+0i), as.complex(out_vec_def)) -test(2121.14, fcase(test_vec1, list(1), test_vec2, list(0),default=list(2)), list(1,1,1,1,1, 2, 0, 0, 0, 0, 0)) -test(2121.15, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default=as.Date("2019-10-15")), c(rep(as.Date("2019-10-11"),5),as.Date("2019-10-15"),rep(as.Date("2019-10-14"),5))) -test(2121.16, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3]),default=factor("c", levels=letters[1:3])), factor(c(rep("a",5),"c",rep("b",5)), levels=letters[1:3])) -test(2121.17, fcase(test_vec1, as.raw(1), test_vec2, as.raw(0)), error="Type raw is not supported.") -test(2121.18, fcase(test_vec1, factor("a", levels=letters[1]), test_vec2, factor("b", levels=letters[1:3])), error="Argument #2 and argument #4 are both factor but their levels are different.") -test(2121.19, fcase(test_vec1, factor("a", levels=letters[1:2]), test_vec2, factor("b", levels=letters[1:2]),default=factor("c", levels=letters[1:3])), error="Resulting value and 'default' are both type factor but their levels are different.") -test(2121.20, fcase(test_vec1, 1L:10L, test_vec2, 3L:12L, test_vec2), error="Please supply an even number of arguments in ..., consisting of logical condition, resulting value pairs (in that order); received 5 inputs.") -test(2121.21, fcase(test_vec1, 1L, test_vec2, 3), error="Argument #4 is of type double, however argument #2 is of type integer. Please make sure all output values have the same type.") -test(2121.22, fcase(test_vec1, "FALSE", test_vec2, TRUE), error="Argument #4 is of type logical, however argument #2 is of type character. Please make sure all output values have the same type.") -test(2121.23, fcase(test_vec1, "FALSE", test_vec2, 5L), error="Argument #4 is of type integer, however argument #2 is of type character. Please make sure all output values have the same type.") -test(2121.24, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default="2019-10-15"), error="Resulting value is of type double but 'default' is of type character. Please make sure that both arguments have the same type.") -test(2121.25, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default=123), error="Resulting value has different class than 'default'. Please make sure that both arguments have the same class.") +test(2124.01, fcase(test_vec1, 1L, test_vec2, 0L), as.integer(out_vec)) +test(2124.02, fcase(test_vec1, 1, test_vec2, 0), out_vec) +test(2124.03, fcase(test_vec1, "1", test_vec2, "0"), as.character(out_vec)) +test(2124.04, fcase(test_vec1, TRUE, test_vec2, FALSE), as.logical(out_vec)) +test(2124.05, fcase(test_vec1, 1+0i, test_vec2, 0+0i), as.complex(out_vec)) +test(2124.06, fcase(test_vec1, list(1), test_vec2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) +test(2124.07, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14")), c(rep(as.Date("2019-10-11"),5),NA,rep(as.Date("2019-10-14"),5))) +test(2124.08, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3])), factor(c(rep("a",5),NA,rep("b",5)), levels=letters[1:3])) +test(2124.09, fcase(test_vec1, 1L, test_vec2, 0L, default=2L), as.integer(out_vec_def)) +test(2124.10, fcase(test_vec1, 1, test_vec2, 0,default=2), out_vec_def) +test(2124.11, fcase(test_vec1, "1", test_vec2, "0", default ="2"), as.character(out_vec_def)) +test(2124.12, fcase(test_vec1, TRUE, test_vec2, FALSE, default=TRUE), as.logical(out_vec_def)) +test(2124.13, fcase(test_vec1, 1+0i, test_vec2, 0+0i, default=2+0i), as.complex(out_vec_def)) +test(2124.14, fcase(test_vec1, list(1), test_vec2, list(0),default=list(2)), list(1,1,1,1,1, 2, 0, 0, 0, 0, 0)) +test(2124.15, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default=as.Date("2019-10-15")), c(rep(as.Date("2019-10-11"),5),as.Date("2019-10-15"),rep(as.Date("2019-10-14"),5))) +test(2124.16, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3]),default=factor("c", levels=letters[1:3])), factor(c(rep("a",5),"c",rep("b",5)), levels=letters[1:3])) +test(2124.17, fcase(test_vec1, as.raw(1), test_vec2, as.raw(0)), error="Type raw is not supported.") +test(2124.18, fcase(test_vec1, factor("a", levels=letters[1]), test_vec2, factor("b", levels=letters[1:3])), error="Argument #2 and argument #4 are both factor but their levels are different.") +test(2124.19, fcase(test_vec1, factor("a", levels=letters[1:2]), test_vec2, factor("b", levels=letters[1:2]),default=factor("c", levels=letters[1:3])), error="Resulting value and 'default' are both type factor but their levels are different.") +test(2124.20, fcase(test_vec1, 1L:10L, test_vec2, 3L:12L, test_vec2), error="Please supply an even number of arguments in ..., consisting of logical condition, resulting value pairs (in that order); received 5 inputs.") +test(2124.21, fcase(test_vec1, 1L, test_vec2, 3), error="Argument #4 is of type double, however argument #2 is of type integer. Please make sure all output values have the same type.") +test(2124.22, fcase(test_vec1, "FALSE", test_vec2, TRUE), error="Argument #4 is of type logical, however argument #2 is of type character. Please make sure all output values have the same type.") +test(2124.23, fcase(test_vec1, "FALSE", test_vec2, 5L), error="Argument #4 is of type integer, however argument #2 is of type character. Please make sure all output values have the same type.") +test(2124.24, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default="2019-10-15"), error="Resulting value is of type double but 'default' is of type character. Please make sure that both arguments have the same type.") +test(2124.25, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"),default=123), error="Resulting value has different class than 'default'. Please make sure that both arguments have the same class.") if(test_bit64) { i=as.integer64(1:12)+3e9 - test(2121.26, fcase(test_vec_na1, i, test_vec_na2, i+100), c(i[1L:5L], as.integer64(NA),i[7L:12L]+100)) + test(2124.26, fcase(test_vec_na1, i, test_vec_na2, i+100), c(i[1L:5L], as.integer64(NA),i[7L:12L]+100)) } if(test_nanotime) { n=nanotime(1:12) - test(2121.27, fcase(test_vec_na1, n, test_vec_na2, n+100), c(n[1L:5L], nanotime(NA),n[7L:12L]+100)) + test(2124.27, fcase(test_vec_na1, n, test_vec_na2, n+100), c(n[1L:5L], nanotime(NA),n[7L:12L]+100)) } -test(2121.28, fcase(test_vec1, rep(1L,11L), test_vec2, rep(0L,11L)), as.integer(out_vec)) -test(2121.29, fcase(test_vec1, rep(1,11L), test_vec2, rep(0,11L)), out_vec) -test(2121.30, fcase(test_vec1, rep("1",11L), test_vec2, rep("0",11L)), as.character(out_vec)) -test(2121.31, fcase(test_vec1, rep(TRUE,11L), test_vec2, rep(FALSE,11L)), as.logical(out_vec)) -test(2121.32, fcase(test_vec1, rep(1+0i,11L), test_vec2, rep(0+0i,11L)), as.complex(out_vec)) -test(2121.33, fcase(test_vec1, rep(list(1),11L), test_vec2, rep(list(0),11L)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) -test(2121.34, fcase(test_vec1, rep(as.Date("2019-10-11"),11L), test_vec2, rep(as.Date("2019-10-14"),11L)), c(rep(as.Date("2019-10-11"),5),NA,rep(as.Date("2019-10-14"),5))) -test(2121.35, fcase(test_vec1, rep(factor("a", levels=letters[1:3]),11L), test_vec2, rep(factor("b", levels=letters[1:3]),11L)), factor(c(rep("a",5),NA,rep("b",5)), levels=letters[1:3])) -test(2121.36, fcase(test_vec_na1, 1L, test_vec_na2, 0L), as.integer(out_vec_na)) -test(2121.37, fcase(test_vec_na1, 1, test_vec_na2, 0), out_vec_na) -test(2121.38, fcase(test_vec_na1, "1", test_vec_na2, "0"), as.character(out_vec_na)) -test(2121.39, fcase(test_vec_na1, TRUE, test_vec_na2, FALSE), as.logical(out_vec_na)) -test(2121.40, fcase(test_vec_na1, 1+0i, test_vec_na2, 0+0i), as.complex(out_vec_na)) -test(2121.41, fcase(test_vec_na1, list(1), test_vec_na2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0,NULL)) -test(2121.42, fcase(c(TRUE,TRUE,TRUE,FALSE,FALSE),factor(NA,levels=letters[1:5]),c(FALSE,FALSE,FALSE,TRUE,TRUE),factor(letters[1:5])),factor(c(NA,NA,NA,"d","e"),levels=letters[1:5])) -test(2121.43, fcase(c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(NA,levels=letters[1:6]),c(FALSE,FALSE,FALSE,TRUE,NA,TRUE),factor(letters[1:6])),factor(c(NA,NA,NA,"d",NA,"f"),levels=letters[1:6])) -test(2121.44, fcase(c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(letters[1:6]),c(FALSE,FALSE,FALSE,TRUE,NA,TRUE),factor(NA,levels = letters[1:6])),factor(c("a","b","c",NA,NA,NA),levels=letters[1:6])) -test(2121.45, fcase(c(TRUE,NA,TRUE,FALSE,FALSE,FALSE),factor(NA),c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(NA)),factor(c(NA,NA,NA,NA,NA,NA))) -test(2121.46, fcase(TRUE, list(data.table(1:5)), FALSE, list(data.table(5:1))), list(data.table(1:5))) -test(2121.47, fcase(FALSE, list(data.table(1:5)), TRUE, list(data.table(5:1))), list(data.table(5:1))) -test(2121.48, fcase(TRUE, list(data.frame(1:5)), FALSE, list(data.frame(5:1))), list(data.frame(1:5))) -test(2121.49, fcase(FALSE, list(data.frame(1:5)), TRUE, list(data.frame(5:1))), list(data.frame(5:1))) -test(2121.50, fcase(1L,1L,TRUE,0L), error = "Argument #1 must be logical.") -test(2121.51, fcase(TRUE,1L,5L,0L), 1L) -test(2121.52, fcase(test_vec1, 1L, test_vec2, 0L, test_vec3, 2L), as.integer(out_vec_def)) -test(2121.53, fcase(test_vec1, 1, test_vec2, 0, test_vec3, 2), out_vec_def) -test(2121.54, fcase(test_vec1, "1", test_vec2, "0", test_vec3, "2"), as.character(out_vec_def)) -test(2121.55, fcase(test_vec1, TRUE, test_vec2, FALSE, test_vec3, TRUE), as.logical(out_vec_def)) -test(2121.56, fcase(test_vec1, 1+0i, test_vec2, 0+0i, test_vec3, 2+0i), as.complex(out_vec_def)) -test(2121.57, fcase(test_vec1, list(1), test_vec2, list(0), test_vec3, list(2)), list(1,1,1,1,1, 2, 0, 0, 0, 0, 0)) -test(2121.58, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"), test_vec3, as.Date("2019-10-15")), c(rep(as.Date("2019-10-11"),5),as.Date("2019-10-15"),rep(as.Date("2019-10-14"),5))) -test(2121.59, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3]), test_vec3, factor("c", levels=letters[1:3])), factor(c(rep("a",5),"c",rep("b",5)), levels=letters[1:3])) -test(2121.60, fcase(test_vec1, 1L), as.integer(out_vec_oc)) -test(2121.61, fcase(test_vec1, 1), out_vec_oc) -test(2121.62, fcase(test_vec1, "1"), as.character(out_vec_oc)) -test(2121.63, fcase(test_vec1, TRUE), as.logical(out_vec_oc)) -test(2121.64, fcase(test_vec1, 1+0i), as.complex(out_vec_oc)) -test(2121.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, NULL, NULL)) -test(2121.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) -test(2121.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) -test(2121.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.") -test(2121.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument #1. Please make sure all logical conditions have the same length.") -test(2121.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") -test(2121.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 must either be 1 or length of logical condition.") -test(2121.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L) +test(2124.28, fcase(test_vec1, rep(1L,11L), test_vec2, rep(0L,11L)), as.integer(out_vec)) +test(2124.29, fcase(test_vec1, rep(1,11L), test_vec2, rep(0,11L)), out_vec) +test(2124.30, fcase(test_vec1, rep("1",11L), test_vec2, rep("0",11L)), as.character(out_vec)) +test(2124.31, fcase(test_vec1, rep(TRUE,11L), test_vec2, rep(FALSE,11L)), as.logical(out_vec)) +test(2124.32, fcase(test_vec1, rep(1+0i,11L), test_vec2, rep(0+0i,11L)), as.complex(out_vec)) +test(2124.33, fcase(test_vec1, rep(list(1),11L), test_vec2, rep(list(0),11L)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) +test(2124.34, fcase(test_vec1, rep(as.Date("2019-10-11"),11L), test_vec2, rep(as.Date("2019-10-14"),11L)), c(rep(as.Date("2019-10-11"),5),NA,rep(as.Date("2019-10-14"),5))) +test(2124.35, fcase(test_vec1, rep(factor("a", levels=letters[1:3]),11L), test_vec2, rep(factor("b", levels=letters[1:3]),11L)), factor(c(rep("a",5),NA,rep("b",5)), levels=letters[1:3])) +test(2124.36, fcase(test_vec_na1, 1L, test_vec_na2, 0L), as.integer(out_vec_na)) +test(2124.37, fcase(test_vec_na1, 1, test_vec_na2, 0), out_vec_na) +test(2124.38, fcase(test_vec_na1, "1", test_vec_na2, "0"), as.character(out_vec_na)) +test(2124.39, fcase(test_vec_na1, TRUE, test_vec_na2, FALSE), as.logical(out_vec_na)) +test(2124.40, fcase(test_vec_na1, 1+0i, test_vec_na2, 0+0i), as.complex(out_vec_na)) +test(2124.41, fcase(test_vec_na1, list(1), test_vec_na2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0,NULL)) +test(2124.42, fcase(c(TRUE,TRUE,TRUE,FALSE,FALSE),factor(NA,levels=letters[1:5]),c(FALSE,FALSE,FALSE,TRUE,TRUE),factor(letters[1:5])),factor(c(NA,NA,NA,"d","e"),levels=letters[1:5])) +test(2124.43, fcase(c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(NA,levels=letters[1:6]),c(FALSE,FALSE,FALSE,TRUE,NA,TRUE),factor(letters[1:6])),factor(c(NA,NA,NA,"d",NA,"f"),levels=letters[1:6])) +test(2124.44, fcase(c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(letters[1:6]),c(FALSE,FALSE,FALSE,TRUE,NA,TRUE),factor(NA,levels = letters[1:6])),factor(c("a","b","c",NA,NA,NA),levels=letters[1:6])) +test(2124.45, fcase(c(TRUE,NA,TRUE,FALSE,FALSE,FALSE),factor(NA),c(TRUE,TRUE,TRUE,FALSE,NA,FALSE),factor(NA)),factor(c(NA,NA,NA,NA,NA,NA))) +test(2124.46, fcase(TRUE, list(data.table(1:5)), FALSE, list(data.table(5:1))), list(data.table(1:5))) +test(2124.47, fcase(FALSE, list(data.table(1:5)), TRUE, list(data.table(5:1))), list(data.table(5:1))) +test(2124.48, fcase(TRUE, list(data.frame(1:5)), FALSE, list(data.frame(5:1))), list(data.frame(1:5))) +test(2124.49, fcase(FALSE, list(data.frame(1:5)), TRUE, list(data.frame(5:1))), list(data.frame(5:1))) +test(2124.50, fcase(1L,1L,TRUE,0L), error = "Argument #1 must be logical.") +test(2124.51, fcase(TRUE,1L,5L,0L), 1L) +test(2124.52, fcase(test_vec1, 1L, test_vec2, 0L, test_vec3, 2L), as.integer(out_vec_def)) +test(2124.53, fcase(test_vec1, 1, test_vec2, 0, test_vec3, 2), out_vec_def) +test(2124.54, fcase(test_vec1, "1", test_vec2, "0", test_vec3, "2"), as.character(out_vec_def)) +test(2124.55, fcase(test_vec1, TRUE, test_vec2, FALSE, test_vec3, TRUE), as.logical(out_vec_def)) +test(2124.56, fcase(test_vec1, 1+0i, test_vec2, 0+0i, test_vec3, 2+0i), as.complex(out_vec_def)) +test(2124.57, fcase(test_vec1, list(1), test_vec2, list(0), test_vec3, list(2)), list(1,1,1,1,1, 2, 0, 0, 0, 0, 0)) +test(2124.58, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, as.Date("2019-10-14"), test_vec3, as.Date("2019-10-15")), c(rep(as.Date("2019-10-11"),5),as.Date("2019-10-15"),rep(as.Date("2019-10-14"),5))) +test(2124.59, fcase(test_vec1, factor("a", levels=letters[1:3]), test_vec2, factor("b", levels=letters[1:3]), test_vec3, factor("c", levels=letters[1:3])), factor(c(rep("a",5),"c",rep("b",5)), levels=letters[1:3])) +test(2124.60, fcase(test_vec1, 1L), as.integer(out_vec_oc)) +test(2124.61, fcase(test_vec1, 1), out_vec_oc) +test(2124.62, fcase(test_vec1, "1"), as.character(out_vec_oc)) +test(2124.63, fcase(test_vec1, TRUE), as.logical(out_vec_oc)) +test(2124.64, fcase(test_vec1, 1+0i), as.complex(out_vec_oc)) +test(2124.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, NULL, NULL)) +test(2124.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) +test(2124.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) +test(2124.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.") +test(2124.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument #1. Please make sure all logical conditions have the same length.") +test(2124.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") +test(2124.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 must either be 1 or length of logical condition.") +test(2124.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L) ################################### # Add new tests above this line # diff --git a/man/fcase.Rd b/man/fcase.Rd index 8b2bc9b632..f866d1560c 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -18,35 +18,35 @@ \code{\link{fifelse}} } \examples{ -x = 1:10 +x = 1:10 data.table::fcase( x < 5L, 1L, x > 5L, 3L ) -# [1] 1 1 1 1 NA 3 3 3 3 3 data.table::fcase( x < 5L, 1L:10L, x > 5L, 3L:12L ) -# [1] 1 2 3 4 NA 8 9 10 11 12 + +# Lazy evaluation example +data.table::fcase( + x < 5L, 1L, + x >= 5L, 3L, + x == 5L, stop("provided value is an unexpected one!") +) # fcase preserves attributes, example with dates data.table::fcase( x < 5L, as.Date("2019-10-11"), x > 5L, as.Date("2019-10-14") ) -# [1] "2019-10-11" "2019-10-11" "2019-10-11" -# [4] "2019-10-11" NA "2019-10-14" "2019-10-14" -# [8] "2019-10-14" "2019-10-14" "2019-10-14" # fcase example with factor data.table::fcase( x < 5L, factor("a", levels=letters[1:3]), x > 5L, factor("b", levels=letters[1:3]) ) -# [1] a a a a b b b b b -# Levels: a b c # Example of using the 'default' arguement data.table::fcase( @@ -54,6 +54,5 @@ data.table::fcase( x > 5L, 3L, default = 5L ) -# [1] 1 1 1 1 5 3 3 3 3 3 } \keyword{ data } diff --git a/src/fifelse.c b/src/fifelse.c index 71993b59bb..ea701475bf 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -161,7 +161,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { REPROTECT(cons = eval(VECTOR_PTR(args)[2*i], rho), Icons); REPROTECT(outs = eval(VECTOR_PTR(args)[2*i+1], rho), Iouts); if (!isLogical(cons)) { - error("Argument #%d must be logical.",2*i+1); + error("Argument #%d must be logical.", 2*i+1); } const int *restrict pcons = LOGICAL(cons); if (i == 0) { @@ -213,7 +213,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { } if (!R_compute_identical(PROTECT(getAttrib(value0,R_ClassSymbol)), PROTECT(getAttrib(outs,R_ClassSymbol)), 0)) { error("Argument #%d has different class than argument #2, " - "Please make sure all output values have the same class.",i*2+2); + "Please make sure all output values have the same class.", i*2+2); } UNPROTECT(2); if (isFactor(value0)) { @@ -238,7 +238,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if(imask) pans[j] = pna; + if (imask) pans[j] = pna; p[l++] = idx; } } @@ -252,7 +252,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if(imask) pans[j] = pna; + if (imask) pans[j] = pna; p[l++] = idx; } } @@ -267,7 +267,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if(imask) pans[j] = pna; + if (imask) pans[j] = pna; p[l++] = idx; } } @@ -281,7 +281,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if(imask) pans[j] = pna; + if (imask) pans[j] = pna; p[l++] = idx; } } @@ -294,7 +294,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { SET_STRING_ELT(ans, idx, pouts[idx & amask]); } else { - if(imask) SET_STRING_ELT(ans, idx, pna); + if (imask) SET_STRING_ELT(ans, idx, pna); p[l++] = idx; } } @@ -307,7 +307,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { SET_VECTOR_ELT(ans, idx, pouts[idx & amask]); } else { - if(imask && nonna) SET_VECTOR_ELT(ans, idx, pna); + if (imask && nonna) SET_VECTOR_ELT(ans, idx, pna); p[l++] = idx; } } @@ -315,7 +315,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { default: error("Type %s is not supported.", type2char(TYPEOF(outs))); } - if(0 == l) break; + if (0 == l) break; len2 = l; } UNPROTECT(nprotect); diff --git a/src/init.c b/src/init.c index 8d4b7c3d1f..430f7db1fc 100644 --- a/src/init.c +++ b/src/init.c @@ -311,9 +311,6 @@ void attribute_visible R_init_datatable(DllInfo *info) initDTthreads(); avoid_openmp_hang_within_fork(); - - R_RegisterCCallable("data.table", "CfifelseR", (DL_FUNC) &fifelseR); - R_RegisterCCallable("data.table", "CfcaseR", (DL_FUNC) &fcaseR); } inline long long DtoLL(double x) { From 37c467cbf3ede0ce1e640048e276923a944291d2 Mon Sep 17 00:00:00 2001 From: 2005m Date: Fri, 13 Dec 2019 13:41:20 +0000 Subject: [PATCH 12/15] adjustments --- NEWS.md | 2 +- man/fcase.Rd | 12 ++++++------ src/fifelse.c | 28 +++++++++++++++++++++------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1c1e0b1a3f..221ec2660d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,7 @@ ## NEW FEATURES -1. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when` however it evaluates its arguments in a lazy way (i.e. only when needed) as shown below. Please see `?data.table::fcase` for more details. +1. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when` however it evaluates its arguments in a lazy way (i.e. only when needed) as shown below. Please see `?fcase` for more details. ```R # Lazy evaluation diff --git a/man/fcase.Rd b/man/fcase.Rd index f866d1560c..6f82161516 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -19,37 +19,37 @@ } \examples{ x = 1:10 -data.table::fcase( +fcase( x < 5L, 1L, x > 5L, 3L ) -data.table::fcase( +fcase( x < 5L, 1L:10L, x > 5L, 3L:12L ) # Lazy evaluation example -data.table::fcase( +fcase( x < 5L, 1L, x >= 5L, 3L, x == 5L, stop("provided value is an unexpected one!") ) # fcase preserves attributes, example with dates -data.table::fcase( +fcase( x < 5L, as.Date("2019-10-11"), x > 5L, as.Date("2019-10-14") ) # fcase example with factor -data.table::fcase( +fcase( x < 5L, factor("a", levels=letters[1:3]), x > 5L, factor("b", levels=letters[1:3]) ) # Example of using the 'default' arguement -data.table::fcase( +fcase( x < 5L, 1L, x > 5L, 3L, default = 5L diff --git a/src/fifelse.c b/src/fifelse.c index 14b394b60b..69f7b46b12 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -238,7 +238,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if (imask) pans[j] = pna; + if (imask) { + pans[j] = pna; + } p[l++] = idx; } } @@ -252,7 +254,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if (imask) pans[j] = pna; + if (imask) { + pans[j] = pna; + } p[l++] = idx; } } @@ -267,7 +271,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if (imask) pans[j] = pna; + if (imask) { + pans[j] = pna; + } p[l++] = idx; } } @@ -281,7 +287,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { pans[idx] = pouts[idx & amask]; } else { - if (imask) pans[j] = pna; + if (imask) { + pans[j] = pna; + } p[l++] = idx; } } @@ -294,7 +302,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { SET_STRING_ELT(ans, idx, pouts[idx & amask]); } else { - if (imask) SET_STRING_ELT(ans, idx, pna); + if (imask) { + SET_STRING_ELT(ans, idx, pna); + } p[l++] = idx; } } @@ -307,7 +317,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { if (pcons[idx]==1) { SET_VECTOR_ELT(ans, idx, pouts[idx & amask]); } else { - if (imask && nonna) SET_VECTOR_ELT(ans, idx, pna); + if (imask && nonna) { + SET_VECTOR_ELT(ans, idx, pna); + } p[l++] = idx; } } @@ -315,7 +327,9 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { default: error("Type %s is not supported.", type2char(TYPEOF(outs))); } - if (0 == l) break; + if (l==0) { + break; + } len2 = l; } UNPROTECT(nprotect); From 4f1151ed1a4ca0302151d0614daed718380c0b2d Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 13 Dec 2019 19:38:06 +0530 Subject: [PATCH 13/15] avoid using ...() --- R/wrappers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/wrappers.R b/R/wrappers.R index 94fd0fa895..5fec33a92f 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -6,7 +6,7 @@ fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE) setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) -fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(...()))) +fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) coerceFill = function(x) .Call(CcoerceFillR, x) From db51a9a4ef698a96e09eb99e90e1bd6d720a90ea Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 18 Dec 2019 19:14:10 +0800 Subject: [PATCH 14/15] Update fcase.Rd --- man/fcase.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/fcase.Rd b/man/fcase.Rd index 6f82161516..e004e52a2d 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -12,7 +12,7 @@ \item{default}{ Default return value, \code{NA} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} for some entries. } } \value{ - Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with corresponding resulting value from \code{...}, or eventually \code{default}. Attributes of output values \code{value1, value2, ...valueN} in \code{...} are preserved. + Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with the corresponding values (\code{value}) from \code{...}, or eventually \code{default}. Attributes of output values \code{value1, value2, ...valueN} in \code{...} are preserved. } \seealso{ \code{\link{fifelse}} @@ -42,13 +42,13 @@ fcase( x > 5L, as.Date("2019-10-14") ) -# fcase example with factor +# fcase example with factor; note the matching levels fcase( x < 5L, factor("a", levels=letters[1:3]), x > 5L, factor("b", levels=letters[1:3]) ) -# Example of using the 'default' arguement +# Example of using the 'default' argument fcase( x < 5L, 1L, x > 5L, 3L, From 3c3ea404a7b309469d47bf91398dc295ea29cf1f Mon Sep 17 00:00:00 2001 From: 2005m Date: Wed, 18 Dec 2019 19:26:58 +0000 Subject: [PATCH 15/15] more tests+Rd --- inst/tests/tests.Rraw | 15 ++++++++++++++- man/fcase.Rd | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index cd66e75f95..0ad1df762c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16649,7 +16649,20 @@ test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a test(2127.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") test(2127.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 must either be 1 or length of logical condition.") test(2127.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L) - +test(2127.73, fcase(test_vec1, 1L, test_vec2, 0:10), as.integer(c( 1, 1, 1, 1, 1, NA, 6, 7, 8, 9, 10))) +test(2127.74, fcase(test_vec1, 0:10, test_vec2, 0L), as.integer(c( 0, 1, 2, 3, 4, NA, 0, 0, 0, 0, 0))) +test(2127.75, fcase(test_vec1, 1, test_vec2, as.numeric(0:10)), as.numeric(c( 1, 1, 1, 1, 1, NA, 6, 7, 8, 9, 10))) +test(2127.76, fcase(test_vec1, as.numeric(0:10), test_vec2, 0), as.numeric(c( 0, 1, 2, 3, 4, NA, 0, 0, 0, 0, 0))) +test(2127.77, fcase(test_vec1, "1", test_vec2, as.character(0:10)), as.character(c( 1, 1, 1, 1, 1, NA, 6, 7, 8, 9, 10))) +test(2127.78, fcase(test_vec1, as.character(0:10), test_vec2, "0"), as.character(c( 0, 1, 2, 3, 4, NA, 0, 0, 0, 0, 0))) +test(2127.79, fcase(test_vec1, TRUE, test_vec2, rep(FALSE, 11L)), as.logical(out_vec)) +test(2127.80, fcase(test_vec1, rep(TRUE, 11L), test_vec2, FALSE), as.logical(out_vec)) +test(2127.81, fcase(test_vec1, 1+0i, test_vec2, rep(0+0i, 11L)), as.complex(out_vec)) +test(2127.82, fcase(test_vec1, rep(1+0i, 11L), test_vec2, 0+0i), as.complex(out_vec)) +test(2127.83, fcase(test_vec1, list(rep(1, 11L)), test_vec2, list(0)), list(rep(1, 11L),rep(1, 11L),rep(1, 11L),rep(1, 11L),rep(1, 11L), NULL, 0, 0, 0, 0, 0)) +test(2127.84, fcase(test_vec1, list(1), test_vec2, list(rep(0,11L))), list(1,1,1,1,1, NULL, rep(0,11L), rep(0,11L), rep(0,11L), rep(0,11L), rep(0,11L))) +test(2127.85, fcase(test_vec1, list(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), test_vec2, list(0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) +test(2127.86, fcase(test_vec1, list(1), test_vec2, list(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), list(1,1,1,1,1, NULL, 0, 0, 0, 0, 0)) ################################### # Add new tests above this line # diff --git a/man/fcase.Rd b/man/fcase.Rd index e004e52a2d..82e582ca43 100644 --- a/man/fcase.Rd +++ b/man/fcase.Rd @@ -9,7 +9,7 @@ } \arguments{ \item{...}{ A sequence consisting of logical condition (\code{when})-resulting value (\code{value}) \emph{pairs} in the following order \code{when1, value1, when2, value2, ..., whenN, valueN}. Logical conditions \code{when1, when2, ..., whenN} must all have the same length, type and attributes. Each \code{value} may either share length with \code{when} or be length 1. Please see Examples section for further details.} -\item{default}{ Default return value, \code{NA} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} for some entries. } +\item{default}{ Default return value, \code{NA} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} or missing for some entries. } } \value{ Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with the corresponding values (\code{value}) from \code{...}, or eventually \code{default}. Attributes of output values \code{value1, value2, ...valueN} in \code{...} are preserved.