From 196f420b50181b92036538776956ddf2c5b7a5a1 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sun, 24 May 2020 01:15:32 +0100 Subject: [PATCH 01/37] nafill rework for more robust coerce --- R/wrappers.R | 3 +- inst/tests/nafill.Rraw | 30 +++++++++------- src/data.table.h | 4 +-- src/init.c | 2 +- src/nafill.c | 78 ++++++++++++++++++++++++++-------------- src/utils.c | 82 ++++++++++++++---------------------------- 6 files changed, 101 insertions(+), 98 deletions(-) diff --git a/R/wrappers.R b/R/wrappers.R index 5fec33a92f..5a67dfbae5 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -9,6 +9,7 @@ fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) -coerceFill = function(x) .Call(CcoerceFillR, x) testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) + +coerceAs = function(x, as) .Call(CcoerceAsR, x, as) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 99a404b4d9..56ad6585ee 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -7,7 +7,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { test = data.table:::test INT = data.table:::INT colnamesInt = data.table:::colnamesInt - coerceFill = data.table:::coerceFill + coerceAs = data.table:::coerceAs } sugg = c( @@ -120,7 +120,7 @@ test(3.03, setnafill(x, "locf"), error="in-place update is supported only for li test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table") test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table") test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1") -test(3.07, nafill(x, fill="asd"), error="fill argument must be numeric") +#test(3.07, nafill(x, fill="asd"), error="fill argument must be numeric") # colnamesInt helper dt = data.table(a=1, b=2, d=3) @@ -160,32 +160,33 @@ if (test_bit64) { } options(old) -# coerceFill +# coerceAs if (test_bit64) { - test(6.01, coerceFill(1:2), error="fill argument must be length 1") - test(6.02, coerceFill("a"), error="fill argument must be numeric") + coerceFill = function(x) lapply(list(1L, 1.0, as.integer64(1)), coerceAs, x=x) + #test(6.01, coerceFill(1:2), error="fill argument must be length 1") + #test(6.02, coerceFill("a"), error="fill argument must be numeric") test(6.11, identical(coerceFill(NA), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.21, identical(coerceFill(3L), list(3L, 3, as.integer64(3)))) test(6.22, identical(coerceFill(0L), list(0L, 0, as.integer64(0)))) test(6.23, identical(coerceFill(NA_integer_), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.31, identical(coerceFill(as.integer64(3)), list(3L, 3, as.integer64(3)))) - test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003")))) + test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003"))), warning="memrecycle raises warning.*out-of-range") test(6.33, identical(coerceFill(as.integer64(0)), list(0L, 0, as.integer64(0)))) test(6.34, identical(coerceFill(as.integer64(NA)), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.41, identical(coerceFill(3), list(3L, 3, as.integer64(3)))) test(6.42, identical(coerceFill(0), list(0L, 0, as.integer64(0)))) test(6.43, identical(coerceFill(NA_real_), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.44, identical(coerceFill(NaN), list(NA_integer_, NaN, as.integer64(NA)))) - test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA)))) - test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA)))) - test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904")))) - test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA)))) + test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) + test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) + test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) + test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x))) - test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x))) - test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x))) + test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="memrecycle raises warning.*out-of-range") + test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="memrecycle raises warning.*out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) - test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649")))) + test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) } # nan argument to treat NaN as NA in nafill, #4020 @@ -203,3 +204,6 @@ test(7.07, setnafill(DT, fill=0, cols=1L), copy(DT)[ , a := ans1]) test(7.08, setnafill(DT, fill=0, nan=NaN), copy(DT)[ , c('a', 'b') := .(ans1, ans2)]) test(7.09, nafill(x, fill=0, nan=c(NA, NaN)), error="Argument 'nan' must be length 1") test(7.10, nafill(x, fill=0, nan=Inf), error="Argument 'nan' must be NA or NaN") + +# new tests for fill list +#TODO diff --git a/src/data.table.h b/src/data.table.h index 9be142086b..857ce08fc4 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -217,8 +217,6 @@ bool isRealReallyInt(SEXP x); SEXP isReallyReal(SEXP x); bool allNA(SEXP x, bool errorForBadType); SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); -void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill); -SEXP coerceFillR(SEXP fill); bool INHERITS(SEXP x, SEXP char_); bool Rinherits(SEXP x, SEXP char_); SEXP copyAsPlain(SEXP x); @@ -229,6 +227,8 @@ bool islocked(SEXP x); SEXP islockedR(SEXP x); bool need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); +void coerceAs(SEXP x, SEXP as, SEXP out); +SEXP coerceAsR(SEXP x, SEXP as); // types.c char *end(char *start); diff --git a/src/init.c b/src/init.c index aed2da3dbd..2fbf8fc174 100644 --- a/src/init.c +++ b/src/init.c @@ -199,7 +199,6 @@ R_CallMethodDef callMethods[] = { {"CdllVersion", (DL_FUNC) &dllVersion, -1}, {"CnafillR", (DL_FUNC) &nafillR, -1}, {"CcolnamesInt", (DL_FUNC) &colnamesInt, -1}, -{"CcoerceFillR", (DL_FUNC) &coerceFillR, -1}, {"CinitLastUpdated", (DL_FUNC) &initLastUpdated, -1}, {"Ccj", (DL_FUNC) &cj, -1}, {"Ccoalesce", (DL_FUNC) &coalesce, -1}, @@ -211,6 +210,7 @@ R_CallMethodDef callMethods[] = { {"CfrollapplyR", (DL_FUNC) &frollapplyR, -1}, {"CtestMsgR", (DL_FUNC) &testMsgR, -1}, {"C_allNAR", (DL_FUNC) &allNAR, -1}, +{"CcoerceAsR", (DL_FUNC) &coerceAsR, -1}, {NULL, NULL, 0} }; diff --git a/src/nafill.c b/src/nafill.c index eb4e5c0e20..d3df00af73 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -92,7 +92,15 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S if (!xlength(obj)) return(obj); + double tic=0.0; + if (verbose) + tic = omp_get_wtime(); + bool binplace = LOGICAL(inplace)[0]; + if (!IS_TRUE_OR_FALSE(nan_is_na_arg)) + error("nan_is_na must be TRUE or FALSE"); // # nocov + bool nan_is_na = LOGICAL(nan_is_na_arg)[0]; + SEXP x = R_NilValue; if (isVectorAtomic(obj)) { if (binplace) @@ -123,7 +131,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S for (R_len_t i=0; i1) num_threads(getDTthreads()) for (R_len_t i=0; iINT32_MAX || rfill<=INT32_MIN) ? NA_INTEGER : (int32_t)rfill; - dfill[0] = (double)rfill; - i64fill[0] = rfill; - } - } else { - double rfill = REAL(fill)[0]; - if (ISNAN(rfill)) { - // NA -> NA, NaN -> NaN - ifill[0] = NA_INTEGER; dfill[0] = rfill; i64fill[0] = NA_INTEGER64; - } else { - ifill[0] = (!R_FINITE(rfill) || rfill>INT32_MAX || rfill<=INT32_MIN) ? NA_INTEGER : (int32_t)rfill; - dfill[0] = rfill; - i64fill[0] = (!R_FINITE(rfill) || rfill>(double)INT64_MAX || rfill<=(double)INT64_MIN) ? NA_INTEGER64 : (int64_t)rfill; - } - } - } else if (isLogical(fill) && LOGICAL(fill)[0]==NA_LOGICAL) { - ifill[0] = NA_INTEGER; dfill[0] = NA_REAL; i64fill[0] = NA_INTEGER64; - } else { - error(_("%s: fill argument must be numeric"), __func__); - } -} -SEXP coerceFillR(SEXP fill) { - int protecti=0; - double dfill=NA_REAL; - int32_t ifill=NA_INTEGER; - int64_t i64fill=NA_INTEGER64; - coerceFill(fill, &dfill, &ifill, &i64fill); - SEXP ans = PROTECT(allocVector(VECSXP, 3)); protecti++; - SET_VECTOR_ELT(ans, 0, allocVector(INTSXP, 1)); - SET_VECTOR_ELT(ans, 1, allocVector(REALSXP, 1)); - SET_VECTOR_ELT(ans, 2, allocVector(REALSXP, 1)); - INTEGER(VECTOR_ELT(ans, 0))[0] = ifill; - REAL(VECTOR_ELT(ans, 1))[0] = dfill; - ((int64_t *)REAL(VECTOR_ELT(ans, 2)))[0] = i64fill; - setAttrib(VECTOR_ELT(ans, 2), R_ClassSymbol, ScalarString(char_integer64)); - UNPROTECT(protecti); - return ans; -} - inline bool INHERITS(SEXP x, SEXP char_) { // Thread safe inherits() by pre-calling install() in init.c and then // passing those char_* in here for simple and fast non-API pointer compare. @@ -363,3 +308,30 @@ SEXP coerceUtf8IfNeeded(SEXP x) { return(ans); } +void coerceAs(SEXP x, SEXP as, SEXP out) { + if (!isVectorAtomic(x)) + error("'x' is not atomic"); + if (!isVectorAtomic(as)) + error("'as' is not atomic"); + if (TYPEOF(out)!=TYPEOF(as)) + error("type of 'out' is not the same as type of 'as'"); + if (LENGTH(out)!=LENGTH(x)) + error("length of 'out' is not the same as length of 'x'"); + const char *ret = memrecycle(/*target=*/out, /*where=*/R_NilValue, /*start=*/0, /*len=*/LENGTH(x), /*source=*/x, /*sourceStart=*/0, /*sourceLen=*/-1, /*colnum=*/-1, /*colname=*/"not applicable"); + if (ret) warning(_("memrecycle raises warning: %s"), ret); +} + +SEXP coerceAsR(SEXP x, SEXP as) { + if (!isVectorAtomic(x)) + error("'x' is not atomic"); + if (!isVectorAtomic(as)) + error("'as' is not atomic"); + if (isNewList(as)) + error("'as' is a list"); + int protecti=0; + int len = LENGTH(x); + SEXP ans = PROTECT(allocNAVectorLike(as, len)); protecti++; + coerceAs(x, as, ans); + UNPROTECT(protecti); + return ans; +} From 1fa20695064f94f441491e5ac241d7d26f2b1a68 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sun, 24 May 2020 11:36:32 +0100 Subject: [PATCH 02/37] initial change for #4101 --- src/fifelse.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index 3a05fce6d3..cf81cded0a 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -16,14 +16,12 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { int nprotect = 0; if (ta != tb) { - if (ta == INTSXP && tb == REALSXP) { - SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++; - a = tmp; - ta = REALSXP; - } else if (ta == REALSXP && tb == INTSXP) { - SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++; - b = tmp; - tb = REALSXP; + if (TYPEORDER(ta)TYPEORDER(tb)) { + b = PROTECT(coerceAsR(b, a)); nprotect++; + tb = ta; } else { error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb)); } From 22f98f7f52a1a5ca87fa01fdb177703bf93f639f Mon Sep 17 00:00:00 2001 From: jangorecki Date: Mon, 25 May 2020 21:26:29 +0100 Subject: [PATCH 03/37] nafill simple fill coerce --- inst/tests/nafill.Rraw | 2 +- src/nafill.c | 29 ++++++----------------------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 56ad6585ee..0c30a656a9 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -28,7 +28,7 @@ test(1.04, nafill(x, fill=5), INT(5,5,3,4,5,5,7,8,5,5)) test(1.05, nafill(x, fill=NA_integer_), x) test(1.06, nafill(x, fill=NA), x) test(1.07, nafill(x, fill=NA_real_), x) -test(1.08, nafill(x, fill=Inf), x) +test(1.08, nafill(x, fill=Inf), x, warning="memrecycle raises warning: inf") test(1.09, nafill(x, fill=NaN), x) y = x/2 test(1.11, nafill(y, "locf"), c(NA,NA,3,4,4,4,7,8,8,8)/2) diff --git a/src/nafill.c b/src/nafill.c index d3df00af73..94a9725a45 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -169,15 +169,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S bool *isInt64 = (bool *)R_alloc(nx, sizeof(bool)); for (R_len_t i=0; i1) num_threads(getDTthreads()) @@ -208,13 +191,13 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S switch (TYPEOF(VECTOR_ELT(x, i))) { case REALSXP : { if (isInt64[i]) { - nafillInteger64(i64x[i], inx[i], itype, fi64[i], &vans[i], verbose); + nafillInteger64(i64x[i], inx[i], itype, itype==0 ? ((int64_t *)fillp[i])[0] : (int64_t) 0, &vans[i], verbose); } else { - nafillDouble(dx[i], inx[i], itype, fd[i], nan_is_na, &vans[i], verbose); + nafillDouble(dx[i], inx[i], itype, itype==0 ? ((double *)fillp[i])[0] : (double) 0, nan_is_na, &vans[i], verbose); } } break; case INTSXP : { - nafillInteger(ix[i], inx[i], itype, fi32[i], &vans[i], verbose); + nafillInteger(ix[i], inx[i], itype, itype==0 ? ((int32_t *)fillp[i])[0] : (int32_t) 0, &vans[i], verbose); } break; } } From 7747c3dc24f7ab65828a0dc16992399416c4b018 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Mon, 25 May 2020 22:06:01 +0100 Subject: [PATCH 04/37] nafill const works for locf and nocb as well, closes #3594 --- R/shift.R | 4 ---- inst/tests/nafill.Rraw | 26 ++++++++++++++++++++++++-- man/nafill.Rd | 2 +- src/nafill.c | 23 +++++++++++++---------- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/R/shift.R b/R/shift.R index 63a1cdec42..c73d8b0840 100644 --- a/R/shift.R +++ b/R/shift.R @@ -26,14 +26,10 @@ shift = function(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FA nafill = function(x, type=c("const","locf","nocb"), fill=NA, nan=NA) { type = match.arg(type) - if (type!="const" && !missing(fill)) - warning("argument 'fill' ignored, only make sense for type='const'") .Call(CnafillR, x, type, fill, nan_is_na(nan), FALSE, NULL) } setnafill = function(x, type=c("const","locf","nocb"), fill=NA, nan=NA, cols=seq_along(x)) { type = match.arg(type) - if (type!="const" && !missing(fill)) - warning("argument 'fill' ignored, only make sense for type='const'") invisible(.Call(CnafillR, x, type, fill, nan_is_na(nan), TRUE, cols)) } diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 0c30a656a9..9382f37467 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -114,8 +114,8 @@ test(2.08, unname(l), list(c(1:2,18052L,4:5), structure(c(1,2,18052,4,5), class= # exceptions test coverage x = 1:10 -test(3.01, nafill(x, "locf", fill=0L), nafill(x, "locf"), warning="argument 'fill' ignored") -test(3.02, setnafill(list(copy(x)), "locf", fill=0L), setnafill(list(copy(x)), "locf"), warning="argument 'fill' ignored") +test(3.01, nafill(x, "locf", fill=0L), x) +test(3.02, setnafill(list(copy(x)), "locf", fill=0L), list(x)) test(3.03, setnafill(x, "locf"), error="in-place update is supported only for list") test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table") test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table") @@ -207,3 +207,25 @@ test(7.10, nafill(x, fill=0, nan=Inf), error="Argument 'nan' must be NA or NaN") # new tests for fill list #TODO + +# Extend functionality of nafill to use 'fill' argument for all types #3594 +test(9.01, nafill(c(NA,1,NA,NA,5,3,NA,0), type="locf", fill=-1), `[<-`(nafill(c(NA,1,NA,NA,5,3,NA,0), type="locf"), 1L, -1)) +x = xx = c(rep(NA,2),3:4,rep(NA,2)) +test(9.11, nafill(x, "locf", 0), `[<-`(nafill(x, "locf"), 1:2, 0L)) +test(9.12, nafill(x, "nocb", 0), `[<-`(nafill(x, "nocb"), 5:6, 0L)) +test(9.13, nafill(x, "locf", -1), `[<-`(nafill(x, "locf"), 1:2, -1L)) +test(9.14, nafill(x, "nocb", -1), `[<-`(nafill(x, "nocb"), 5:6, -1L)) +x = as.double(xx) +test(9.21, nafill(x, "locf", 0), `[<-`(nafill(x, "locf"), 1:2, 0)) +test(9.22, nafill(x, "nocb", 0), `[<-`(nafill(x, "nocb"), 5:6, 0)) +test(9.23, nafill(x, "locf", -1), `[<-`(nafill(x, "locf"), 1:2, -1)) +test(9.24, nafill(x, "nocb", -1), `[<-`(nafill(x, "nocb"), 5:6, -1)) +if (test_bit64) { + x = as.integer64(xx) + # `[<-.integer64` does not work + seti64 = function(x, i, value) {x[i] = value; x} + test(9.31, nafill(x, "locf", 0), seti64(nafill(x, "locf"), 1:2, as.integer64(0))) + test(9.32, nafill(x, "nocb", 0), seti64(nafill(x, "nocb"), 5:6, as.integer64(0))) + test(9.33, nafill(x, "locf", -1), seti64(nafill(x, "locf"), 1:2, as.integer64(-1))) + test(9.34, nafill(x, "nocb", -1), seti64(nafill(x, "nocb"), 5:6, as.integer64(-1))) +} diff --git a/man/nafill.Rd b/man/nafill.Rd index f8afb1dcfa..480f6ae118 100644 --- a/man/nafill.Rd +++ b/man/nafill.Rd @@ -16,7 +16,7 @@ setnafill(x, type=c("const","locf","nocb"), fill=NA, nan=NA, cols=seq_along(x)) \arguments{ \item{x}{ vector, list, data.frame or data.table of numeric columns. } \item{type}{ character, one of \emph{"const"}, \emph{"locf"} or \emph{"nocb"}. Defaults to \code{"const"}. } - \item{fill}{ numeric or integer, value to be used to fill when \code{type=="const"}. } + \item{fill}{ numeric or integer, value to be used to fill. } \item{nan}{ (numeric \code{x} only) Either \code{NaN} or \code{NA}; if the former, \code{NaN} is treated as distinct from \code{NA}, otherwise, they are treated the same during replacement? } \item{cols}{ numeric or character vector specifying columns to be updated. } } diff --git a/src/nafill.c b/src/nafill.c index 94a9725a45..ed46c54ed0 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -15,23 +15,25 @@ void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, b } } } else if (type==1) { // locf - ans->dbl_v[0] = x[0]; if (nan_is_na) { + ans->dbl_v[0] = ISNAN(x[0]) ? fill : x[0]; for (uint_fast64_t i=1; idbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i-1] : x[i]; } } else { + ans->dbl_v[0] = ISNA(x[0]) ? fill : x[0]; for (uint_fast64_t i=1; idbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i-1] : x[i]; } } } else if (type==2) { // nocb - ans->dbl_v[nx-1] = x[nx-1]; if (nan_is_na) { + ans->dbl_v[nx-1] = ISNAN(x[nx-1]) ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->dbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i+1] : x[i]; } } else { + ans->dbl_v[nx-1] = ISNA(x[nx-1]) ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->dbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i+1] : x[i]; } @@ -49,12 +51,12 @@ void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill ans->int_v[i] = x[i]==NA_INTEGER ? fill : x[i]; } } else if (type==1) { // locf - ans->int_v[0] = x[0]; + ans->int_v[0] = x[0]==NA_INTEGER ? fill : x[0]; for (uint_fast64_t i=1; iint_v[i] = x[i]==NA_INTEGER ? ans->int_v[i-1] : x[i]; } } else if (type==2) { // nocb - ans->int_v[nx-1] = x[nx-1]; + ans->int_v[nx-1] = x[nx-1]==NA_INTEGER ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->int_v[i] = x[i]==NA_INTEGER ? ans->int_v[i+1] : x[i]; } @@ -71,12 +73,12 @@ void nafillInteger64(int64_t *x, uint_fast64_t nx, unsigned int type, int64_t fi ans->int64_v[i] = x[i]==NA_INTEGER64 ? fill : x[i]; } } else if (type==1) { // locf - ans->int64_v[0] = x[0]; + ans->int64_v[0] = x[0]==NA_INTEGER64 ? fill : x[0]; for (uint_fast64_t i=1; iint64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i-1] : x[i]; } } else if (type==2) { // nocb - ans->int64_v[nx-1] = x[nx-1]; + ans->int64_v[nx-1] = x[nx-1]==NA_INTEGER64 ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->int64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i+1] : x[i]; } @@ -166,11 +168,12 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S else error(_("Internal error: invalid type argument in nafillR function, should have been caught before. Please report to data.table issue tracker.")); // # nocov + bool hasFill = !isLogical(fill) || LOGICAL(fill)[0]!=NA_LOGICAL; bool *isInt64 = (bool *)R_alloc(nx, sizeof(bool)); for (R_len_t i=0; i Date: Mon, 25 May 2020 22:17:33 +0100 Subject: [PATCH 05/37] fix tests for #4101 --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed17470383..79b1d2e64b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15711,7 +15711,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.") -test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please") +test(2072.013, fifelse(test_vec, TRUE, "FALSE"), as.character(fifelse(test_vec, TRUE, FALSE))) # no error anymore #4101 test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).") test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).") test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec))) @@ -15737,7 +15737,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na) test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na)) test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na)) test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na)) -test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please") +test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), fifelse(test_vec, as.Date("2011-01-01"), as.Date(as.double(FALSE), origin="1970-01-01"))) # no error anymore #4101 test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na)) test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na)) test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na)) From 4dfa3074083508bafbaeca430dace8c1e2bef048 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Mon, 25 May 2020 23:17:44 +0100 Subject: [PATCH 06/37] coverage --- src/fifelse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index cf81cded0a..7df8a54eb0 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -22,9 +22,9 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { } else if (TYPEORDER(ta)>TYPEORDER(tb)) { b = PROTECT(coerceAsR(b, a)); nprotect++; tb = ta; - } else { - error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb)); } + if (TYPEOF(a)!=TYPEOF(b)) + error(_("internal error: 'yes' is of type %s but 'no' is of type %s. Type should have been coerced already."), type2char(TYPEOF(a)), type2char(TYPEOF(b))); // # nocov } if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) From 5c1fa6b0fb5956784e8ab2ccfc2d0686a2f753fe Mon Sep 17 00:00:00 2001 From: jangorecki Date: Mon, 25 May 2020 23:24:41 +0100 Subject: [PATCH 07/37] placeholder for nafill #3992 --- inst/tests/nafill.Rraw | 12 +++++++++++- src/types.h | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 9382f37467..6c8ff0d142 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -28,7 +28,7 @@ test(1.04, nafill(x, fill=5), INT(5,5,3,4,5,5,7,8,5,5)) test(1.05, nafill(x, fill=NA_integer_), x) test(1.06, nafill(x, fill=NA), x) test(1.07, nafill(x, fill=NA_real_), x) -test(1.08, nafill(x, fill=Inf), x, warning="memrecycle raises warning: inf") +test(1.08, nafill(x, fill=Inf), x, warning="precision lost") test(1.09, nafill(x, fill=NaN), x) y = x/2 test(1.11, nafill(y, "locf"), c(NA,NA,3,4,4,4,7,8,8,8)/2) @@ -229,3 +229,13 @@ if (test_bit64) { test(9.33, nafill(x, "locf", -1), seti64(nafill(x, "locf"), 1:2, as.integer64(-1))) test(9.34, nafill(x, "nocb", -1), seti64(nafill(x, "nocb"), 5:6, as.integer64(-1))) } + +# nafill, setnafill for character, factor and other types #3992 +## logical +## character +## factor +## Date +## POSIXct +## IDate +## ITime +## nanotime diff --git a/src/types.h b/src/types.h index 05f6c2247a..d1b9e78b5c 100644 --- a/src/types.h +++ b/src/types.h @@ -9,7 +9,8 @@ typedef struct ans_t { int32_t *int_v; // used in nafill double *dbl_v; // used in froll, nafill - int64_t *int64_v; // not used yet + int64_t *int64_v; // used in nafill + //void *char_v; // to be used in nafill but then must escape parallelism uint8_t status; // 0:ok, 1:message, 2:warning, 3:error; unix return signal: {0,1,2}=0, {3}=1 char message[4][ANS_MSG_SIZE]; // STDOUT: output, STDERR: message, warning, error // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586 From 7e8331d586075e07ceb104f4000b9a83fa6f444c Mon Sep 17 00:00:00 2001 From: jangorecki Date: Mon, 25 May 2020 23:25:11 +0100 Subject: [PATCH 08/37] use coerceAs in froll --- inst/tests/froll.Rraw | 12 ++++---- man/froll.Rd | 2 +- src/assign.c | 6 ++-- src/data.table.h | 2 +- src/dogroups.c | 14 ++++----- src/frollR.c | 67 ++++++++++++------------------------------- src/rbindlist.c | 2 +- src/utils.c | 2 +- 8 files changed, 39 insertions(+), 68 deletions(-) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 62c16801ca..9f2ecc1c89 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -71,15 +71,15 @@ test(6000.011, frollmean(x, n, adaptive=TRUE), list(c(NA, 1, 1.25), c(NA, 1, 1.2 #### error on unsupported type dx = data.table(real=1:10/2, char=letters[1:10]) -test(6000.012, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric or logical types") +test(6000.012, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") dx = data.table(real=1:10/2, fact=factor(letters[1:10])) -test(6000.013, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric or logical types") +test(6000.013, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") #dx = data.table(real=1:10/2, logi=logical(10)) #test(6000.014, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric types") # commented out as support added in #3749, tested in .009 dx = data.table(real=1:10/2, list=rep(list(NA), 10)) -test(6000.015, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric or logical types") +test(6000.015, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") x = letters[1:10] -test(6000.016, frollmean(x, 3), error="x must be of type numeric or logical") +test(6000.016, frollmean(x, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") x = 1:10/2 test(6000.017, frollmean(x, "a"), error="n must be integer") test(6000.018, frollmean(x, factor("a")), error="n must be integer") @@ -348,8 +348,8 @@ test(6000.074, frollmean(1:3, 2, fill=0L), c(0, 1.5, 2.5)) test(6000.075, frollmean(1:3, 2, fill=NA_integer_), c(NA_real_, 1.5, 2.5)) test(6000.076, frollmean(1:3, 2, fill=1:2), error="fill must be a vector of length 1") test(6000.077, frollmean(1:3, 2, fill=NA), c(NA_real_, 1.5, 2.5)) -test(6000.078, frollmean(1:3, 2, fill=TRUE), error="fill must be numeric") -test(6000.079, frollmean(1:3, 2, fill=FALSE), error="fill must be numeric") +test(6000.078, frollmean(1:3, 2, fill=TRUE), frollmean(1:3, 2, fill=1)) #error="fill must be numeric") # fill already coerced, as 'x' arg +test(6000.079, frollmean(1:3, 2, fill=FALSE), frollmean(1:3, 2, fill=0)) #error="fill must be numeric") test(6000.080, frollmean(1:3, 2, fill="a"), error="fill must be numeric") test(6000.081, frollmean(1:3, 2, fill=factor("a")), error="fill must be numeric") test(6000.082, frollmean(1:3, 2, fill=list(NA)), error="fill must be numeric") diff --git a/man/froll.Rd b/man/froll.Rd index 070d28696d..388c47c485 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -25,7 +25,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) \item{x}{ vector, list, data.frame or data.table of numeric or logical columns. } \item{n}{ integer vector, for adaptive rolling function also list of integer vectors, rolling window size. } - \item{fill}{ numeric, value to pad by. Defaults to \code{NA}. } + \item{fill}{ numeric or logical, value to pad by. Defaults to \code{NA}. } \item{algo}{ character, default \code{"fast"}. When set to \code{"exact"}, then slower algorithm is used. It suffers less from floating point rounding error, performs extra pass to adjust rounding error diff --git a/src/assign.c b/src/assign.c index 1392079e72..c67004949f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -502,7 +502,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) } else { // existing column targetcol = VECTOR_ELT(dt,coln); } - const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln))); + const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln)), /*quiet=*/false); if (ret) warning(ret); } @@ -668,7 +668,7 @@ static bool anyNamed(SEXP x) { #define MSGSIZE 1000 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects -const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname) +const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname, bool quiet) // like memcpy but recycles single-item source // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer() // assigns to target[start:start+len-1] or target[where[start:start+len-1]] where start is 0-based @@ -844,7 +844,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con type2char(TYPEOF(target)), colnum, colname); source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { - if (GetVerbose()) { + if (!quiet && GetVerbose()) { // only take the (small) cost of GetVerbose() (search of options() list) when types don't match Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"), sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), diff --git a/src/data.table.h b/src/data.table.h index 857ce08fc4..bb439f9b9a 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -154,7 +154,7 @@ SEXP dt_na(SEXP x, SEXP cols); // assign.c SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose); -const char *memrecycle(const SEXP target, const SEXP where, const int r, const int len, SEXP source, const int sourceStart, const int sourceLen, const int coln, const char *colname); +const char *memrecycle(const SEXP target, const SEXP where, const int r, const int len, SEXP source, const int sourceStart, const int sourceLen, const int coln, const char *colname, bool quiet); SEXP shallowwrapper(SEXP dt, SEXP cols); SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, diff --git a/src/dogroups.c b/src/dogroups.c index e07057b325..65b441e7da 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -126,7 +126,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } for (int j=0; j=0 && nrowgroups) for (int j=0; j=0) { for (int j=0; j1 && thislen!=maxn && grpn>0) { // grpn>0 for grouping empty tables; test 1986 error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn); } - memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, ""); + memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, "", /*quiet=*/false); } } ansloc += maxn; diff --git a/src/frollR.c b/src/frollR.c index 46ddea0dfa..67dafc4dee 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -4,27 +4,21 @@ SEXP coerceToRealListR(SEXP obj) { // accept atomic/list of integer/logical/real returns list of real int protecti = 0; - SEXP x = R_NilValue; if (isVectorAtomic(obj)) { - x = PROTECT(allocVector(VECSXP, 1)); protecti++; - if (isReal(obj)) { - SET_VECTOR_ELT(x, 0, obj); - } else if (isInteger(obj) || isLogical(obj)) { - SET_VECTOR_ELT(x, 0, coerceVector(obj, REALSXP)); + SEXP obj1 = obj; + obj = PROTECT(allocVector(VECSXP, 1)); protecti++; + SET_VECTOR_ELT(obj, 0, obj1); + } + R_len_t nobj = length(obj); + SEXP x = PROTECT(allocVector(VECSXP, nobj)); protecti++; + for (R_len_t i=0; i Date: Mon, 25 May 2020 23:29:59 +0100 Subject: [PATCH 09/37] enable disabled test --- inst/tests/nafill.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 6c8ff0d142..dd821a2fcf 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -120,7 +120,7 @@ test(3.03, setnafill(x, "locf"), error="in-place update is supported only for li test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table") test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table") test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1") -#test(3.07, nafill(x, fill="asd"), error="fill argument must be numeric") +test(3.07, nafill(x, fill="asd"), x, warning=c("Coercing 'character' RHS to 'integer'","NAs introduced by coercion")) # colnamesInt helper dt = data.table(a=1, b=2, d=3) From 0bf15646278cc4ddd92c6b859c7a38a475c33af4 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Tue, 26 May 2020 01:31:03 +0100 Subject: [PATCH 10/37] nafill retain names, tests for fill list --- NEWS.md | 2 ++ inst/tests/nafill.Rraw | 60 ++++++++++++++++++++++++------------------ src/nafill.c | 36 +++++++++++++++---------- 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7ecd4bc5af..a323334f14 100644 --- a/NEWS.md +++ b/NEWS.md @@ -81,6 +81,8 @@ unit = "s") 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. +15. In `nafill` function it is now possible to use `type="locf|nocb"` together with `fill` argument, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for feature request. It will also now return named object based on the input names. + ## BUG FIXES 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index dd821a2fcf..1e7611c92b 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -47,31 +47,31 @@ z[9L] = -Inf test(1.21, nafill(z, "locf"), c(NA,Inf,3,4,4,4,7,8,-Inf,-Inf)/2) test(1.22, nafill(z, "nocb"), c(Inf,Inf,3,4,7,7,7,8,-Inf,NA)/2) dt = data.table(x, y, z) -test(1.31, nafill(dt, "locf"), unname(lapply(dt, nafill, "locf"))) -test(1.32, nafill(dt, "nocb"), unname(lapply(dt, nafill, "nocb"))) -test(1.33, nafill(dt, fill=0), unname(lapply(dt, nafill, fill=0))) +test(1.31, nafill(dt, "locf"), lapply(dt, nafill, "locf")) +test(1.32, nafill(dt, "nocb"), lapply(dt, nafill, "nocb")) +test(1.33, nafill(dt, fill=0), lapply(dt, nafill, fill=0)) l = list(x, y[1:8], z[1:6]) test(1.41, nafill(l, "locf"), lapply(l, nafill, "locf")) test(1.42, nafill(l, "nocb"), lapply(l, nafill, "nocb")) test(1.43, nafill(l, fill=0), lapply(l, nafill, fill=0)) l = list(a=c(1:2,NA,4:5), b=as.Date(c(1:2,NA,4:5), origin="1970-01-01"), d=c(NA,2L,NA,4L,NA), e=as.Date(c(NA,2L,NA,4L,NA), origin="1970-01-01")) # Date retain class #3617 -test(1.44, nafill(l, "locf"), list(c(1:2,2L,4:5), structure(c(1,2,2,4,5), class="Date"), c(NA,2L,2L,4L,4L), structure(c(NA,2,2,4,4), class="Date"))) -test(1.45, nafill(l, "nocb"), list(c(1:2,4L,4:5), structure(c(1,2,4,4,5), class="Date"), c(2L,2L,4L,4L,NA), structure(c(2,2,4,4,NA), class="Date"))) -test(1.46, nafill(l, fill=0), list(c(1:2,0L,4:5), structure(c(1,2,0,4,5), class="Date"), c(0L,2L,0L,4L,0L), structure(c(0,2,0,4,0), class="Date"))) -test(1.47, nafill(l, fill=as.Date(0, origin="1970-01-01")), list(c(1:2,0L,4:5), structure(c(1,2,0,4,5), class="Date"), c(0L,2L,0L,4L,0L), structure(c(0,2,0,4,0), class="Date"))) -test(1.48, nafill(l, fill=as.Date("2019-06-05")), list(c(1:2,18052L,4:5), structure(c(1,2,18052,4,5), class="Date"), c(18052L,2L,18052L,4L,18052L), structure(c(18052,2,18052,4,18052), class="Date"))) +test(1.44, nafill(l, "locf"), list(a=c(1:2,2L,4:5), b=structure(c(1,2,2,4,5), class="Date"), d=c(NA,2L,2L,4L,4L), e=structure(c(NA,2,2,4,4), class="Date"))) +test(1.45, nafill(l, "nocb"), list(a=c(1:2,4L,4:5), b=structure(c(1,2,4,4,5), class="Date"), d=c(2L,2L,4L,4L,NA), e=structure(c(2,2,4,4,NA), class="Date"))) +test(1.46, nafill(l, fill=0), list(a=c(1:2,0L,4:5), b=structure(c(1,2,0,4,5), class="Date"), d=c(0L,2L,0L,4L,0L), e=structure(c(0,2,0,4,0), class="Date"))) +test(1.47, nafill(l, fill=as.Date(0, origin="1970-01-01")), list(a=c(1:2,0L,4:5), b=structure(c(1,2,0,4,5), class="Date"), d=c(0L,2L,0L,4L,0L), e=structure(c(0,2,0,4,0), class="Date"))) +test(1.48, nafill(l, fill=as.Date("2019-06-05")), list(a=c(1:2,18052L,4:5), b=structure(c(1,2,18052,4,5), class="Date"), d=c(18052L,2L,18052L,4L,18052L), e=structure(c(18052,2,18052,4,18052), class="Date"))) test(1.49, nafill(numeric()), numeric()) if (test_bit64) { l = list(a=as.integer64(c(1:2,NA,4:5)), b=as.integer64(c(NA,2L,NA,4L,NA))) - test(1.61, lapply(nafill(l, "locf"), as.character), lapply(list(c(1:2,2L,4:5), c(NA,2L,2L,4L,4L)), as.character)) - test(1.62, lapply(nafill(l, "nocb"), as.character), lapply(list(c(1:2,4L,4:5), c(2L,2L,4L,4L,NA)), as.character)) - test(1.63, lapply(nafill(l, fill=0), as.character), lapply(list(c(1:2,0L,4:5), c(0L,2L,0L,4L,0L)), as.character)) - test(1.64, lapply(nafill(l, fill=as.integer64(0)), as.character), lapply(list(c(1:2,0L,4:5), c(0L,2L,0L,4L,0L)), as.character)) - test(1.65, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(c("1","2","3000000000","4","5"), c("3000000000","2","3000000000","4","3000000000"))) + test(1.61, lapply(nafill(l, "locf"), as.character), lapply(list(a=c(1:2,2L,4:5), b=c(NA,2L,2L,4L,4L)), as.character)) + test(1.62, lapply(nafill(l, "nocb"), as.character), lapply(list(a=c(1:2,4L,4:5), b=c(2L,2L,4L,4L,NA)), as.character)) + test(1.63, lapply(nafill(l, fill=0), as.character), lapply(list(a=c(1:2,0L,4:5), b=c(0L,2L,0L,4L,0L)), as.character)) + test(1.64, lapply(nafill(l, fill=as.integer64(0)), as.character), lapply(list(a=c(1:2,0L,4:5), b=c(0L,2L,0L,4L,0L)), as.character)) + test(1.65, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(a=c("1","2","3000000000","4","5"), b=c("3000000000","2","3000000000","4","3000000000"))) l = lapply(l, `+`, as.integer64("3000000000")) - test(1.66, lapply(nafill(l, "locf"), as.character), list(c("3000000001","3000000002","3000000002","3000000004","3000000005"), c(NA_character_,"3000000002","3000000002","3000000004","3000000004"))) - test(1.67, lapply(nafill(l, "nocb"), as.character), list(c("3000000001","3000000002","3000000004","3000000004","3000000005"), c("3000000002","3000000002","3000000004","3000000004",NA_character_))) - test(1.68, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(c("3000000001","3000000002","3000000000","3000000004","3000000005"), c("3000000000","3000000002","3000000000","3000000004","3000000000"))) + test(1.66, lapply(nafill(l, "locf"), as.character), list(a=c("3000000001","3000000002","3000000002","3000000004","3000000005"), b=c(NA_character_,"3000000002","3000000002","3000000004","3000000004"))) + test(1.67, lapply(nafill(l, "nocb"), as.character), list(a=c("3000000001","3000000002","3000000004","3000000004","3000000005"), b=c("3000000002","3000000002","3000000004","3000000004",NA_character_))) + test(1.68, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(a=c("3000000001","3000000002","3000000000","3000000004","3000000005"), b=c("3000000000","3000000002","3000000000","3000000004","3000000000"))) test(1.69, nafill(c(1L,2L,NA,4L), fill=as.integer64(3L)), 1:4) test(1.70, nafill(c(1L,2L,NA,4L), fill=as.integer64(NA)), c(1:2,NA,4L)) test(1.71, nafill(c(1,2,NA,4), fill=as.integer64(3)), c(1,2,3,4)) @@ -84,10 +84,10 @@ if (test_bit64) { } if (test_nanotime) { l = list(a=nanotime(c(1:2,NA,4:5)), b=nanotime(c(NA,2L,NA,4L,NA))) - test(1.91, lapply(nafill(l, "locf"), as.character), lapply(list(nanotime(c(1:2,2L,4:5)), nanotime(c(NA,2L,2L,4L,4L))), as.character)) - test(1.92, lapply(nafill(l, "nocb"), as.character), lapply(list(nanotime(c(1:2,4L,4:5)), nanotime(c(2L,2L,4L,4L,NA))), as.character)) - test(1.93, lapply(nafill(l, fill=0), as.character), lapply(list(nanotime(c(1:2,0L,4:5)), nanotime(c(0L,2L,0L,4L,0L))), as.character)) - test(1.94, lapply(nafill(l, fill=nanotime(0)), as.character), lapply(list(nanotime(c(1:2,0L,4:5)), nanotime(c(0L,2L,0L,4L,0L))), as.character)) + test(1.91, lapply(nafill(l, "locf"), as.character), lapply(list(a=nanotime(c(1:2,2L,4:5)), b=nanotime(c(NA,2L,2L,4L,4L))), as.character)) + test(1.92, lapply(nafill(l, "nocb"), as.character), lapply(list(a=nanotime(c(1:2,4L,4:5)), b=nanotime(c(2L,2L,4L,4L,NA))), as.character)) + test(1.93, lapply(nafill(l, fill=0), as.character), lapply(list(a=nanotime(c(1:2,0L,4:5)), b=nanotime(c(0L,2L,0L,4L,0L))), as.character)) + test(1.94, lapply(nafill(l, fill=nanotime(0)), as.character), lapply(list(a=nanotime(c(1:2,0L,4:5)), b=nanotime(c(0L,2L,0L,4L,0L))), as.character)) } # setnafill @@ -162,7 +162,7 @@ options(old) # coerceAs if (test_bit64) { - coerceFill = function(x) lapply(list(1L, 1.0, as.integer64(1)), coerceAs, x=x) + coerceFill = function(x) lapply(list(1L, 1.0, as.integer64(1)), coerceAs, x=x) # old function used before #4491 #test(6.01, coerceFill(1:2), error="fill argument must be length 1") #test(6.02, coerceFill("a"), error="fill argument must be numeric") test(6.11, identical(coerceFill(NA), list(NA_integer_, NA_real_, as.integer64(NA)))) @@ -170,7 +170,7 @@ if (test_bit64) { test(6.22, identical(coerceFill(0L), list(0L, 0, as.integer64(0)))) test(6.23, identical(coerceFill(NA_integer_), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.31, identical(coerceFill(as.integer64(3)), list(3L, 3, as.integer64(3)))) - test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003"))), warning="memrecycle raises warning.*out-of-range") + test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003"))), warning="out-of-range") test(6.33, identical(coerceFill(as.integer64(0)), list(0L, 0, as.integer64(0)))) test(6.34, identical(coerceFill(as.integer64(NA)), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.41, identical(coerceFill(3), list(3L, 3, as.integer64(3)))) @@ -182,8 +182,8 @@ if (test_bit64) { test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x))) - test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="memrecycle raises warning.*out-of-range") - test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="memrecycle raises warning.*out-of-range") + test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") + test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) @@ -206,7 +206,17 @@ test(7.09, nafill(x, fill=0, nan=c(NA, NaN)), error="Argument 'nan' must be leng test(7.10, nafill(x, fill=0, nan=Inf), error="Argument 'nan' must be NA or NaN") # new tests for fill list -#TODO +d = data.table(x = c(1:2,NA,4L), y = c(1,2,NA,4)) +test(8.01, nafill(d, fill=3), list(x=1:4, y=c(1,2,3,4))) +test(8.02, nafill(d, fill=3L), list(x=1:4, y=c(1,2,3,4))) +test(8.03, nafill(d, fill=list(3L,3)), list(x=1:4, y=c(1,2,3,4))) +test(8.04, nafill(d, fill=list(3,3L)), list(x=1:4, y=c(1,2,3,4))) +test(8.05, nafill(d, fill=list(3,NA)), list(x=1:4, y=c(1,2,NA,4))) +test(8.06, nafill(d, fill=list(1,9L)), list(x=c(1:2,1L,4L), y=c(1,2,9,4))) +d = as.data.table(setNames(as.list(seq_along(letters)), letters)) ## test names and scalar returned +test(8.11, names(nafill(d, fill=3)), letters) +test(8.12, nafill(c(1:2,NA,4L), "locf"), c(1:2,2L,4L)) +test(8.13, nafill(list(x=c(1:2,NA,4L)), "locf"), list(x=c(1:2,2L,4L))) # Extend functionality of nafill to use 'fill' argument for all types #3594 test(9.01, nafill(c(NA,1,NA,NA,5,3,NA,0), type="locf", fill=-1), `[<-`(nafill(c(NA,1,NA,NA,5,3,NA,0), type="locf"), 1L, -1)) diff --git a/src/nafill.c b/src/nafill.c index ed46c54ed0..ebe327b52c 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -104,23 +104,24 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S bool nan_is_na = LOGICAL(nan_is_na_arg)[0]; SEXP x = R_NilValue; - if (isVectorAtomic(obj)) { + bool obj_scalar = isVectorAtomic(obj); + if (obj_scalar) { if (binplace) error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table")); else if (!isReal(obj) && !isInteger(obj)) error(_("'x' argument must be numeric type, or list/data.table of numeric types")); - x = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list - SET_VECTOR_ELT(x, 0, obj); - } else { - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj) - x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; - int *icols = INTEGER(ricols); - for (int i=0; i Date: Tue, 26 May 2020 02:22:00 +0100 Subject: [PATCH 11/37] coerceAs gets copyArg so now can return its input --- R/wrappers.R | 2 +- src/data.table.h | 10 ++++++++-- src/fifelse.c | 4 ++-- src/frollR.c | 12 ++++-------- src/init.c | 16 ++++++++++++++- src/nafill.c | 2 +- src/utils.c | 51 +++++++++++++++++++++++++++++++++++------------- 7 files changed, 68 insertions(+), 29 deletions(-) diff --git a/R/wrappers.R b/R/wrappers.R index 5a67dfbae5..0c226b9f30 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -12,4 +12,4 @@ colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, c testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) -coerceAs = function(x, as) .Call(CcoerceAsR, x, as) +coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) diff --git a/src/data.table.h b/src/data.table.h index bb439f9b9a..7dc9105fda 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -79,6 +79,13 @@ extern SEXP char_ordered; extern SEXP char_datatable; extern SEXP char_dataframe; extern SEXP char_NULL; +extern SEXP char_logical; +extern SEXP char_integer; +extern SEXP char_double; +extern SEXP char_character; +extern SEXP char_complex; +extern SEXP char_list; +extern SEXP char_raw; extern SEXP sym_sorted; extern SEXP sym_index; extern SEXP sym_BY; @@ -227,8 +234,7 @@ bool islocked(SEXP x); SEXP islockedR(SEXP x); bool need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); -void coerceAs(SEXP x, SEXP as, SEXP out); -SEXP coerceAsR(SEXP x, SEXP as); +SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg); // types.c char *end(char *start); diff --git a/src/fifelse.c b/src/fifelse.c index 7df8a54eb0..c1941893fe 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -17,10 +17,10 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { if (ta != tb) { if (TYPEORDER(ta)TYPEORDER(tb)) { - b = PROTECT(coerceAsR(b, a)); nprotect++; + b = PROTECT(coerceAs(b, a, ScalarLogical(TRUE))); nprotect++; tb = ta; } if (TYPEOF(a)!=TYPEOF(b)) diff --git a/src/frollR.c b/src/frollR.c index 67dafc4dee..18a6d3807d 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -13,13 +13,9 @@ SEXP coerceToRealListR(SEXP obj) { SEXP x = PROTECT(allocVector(VECSXP, nobj)); protecti++; for (R_len_t i=0; i Date: Tue, 26 May 2020 18:46:36 +0100 Subject: [PATCH 12/37] better control of verbose, and better find class for coerceAs --- R/data.table.R | 3 +- inst/tests/tests.Rraw | 2 +- src/assign.c | 6 ++-- src/data.table.h | 4 +-- src/dogroups.c | 14 +++++----- src/init.c | 4 +-- src/rbindlist.c | 2 +- src/utils.c | 64 ++++++++++++++++++++----------------------- 8 files changed, 47 insertions(+), 52 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 98651d6e0b..55fa1a5fca 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -141,7 +141,8 @@ replace_dot_alias = function(e) { return(ans) } if (!missing(verbose)) { - stopifnot(isTRUEorFALSE(verbose)) + if (!is.integer(verbose) && !is.logical(verbose)) stop("verbose must be logical or integer") + if (length(verbose)!=1 || anyNA(verbose)) stop("verbose must be length 1 non-NA") # set the global verbose option because that is fetched from C code without having to pass it through oldverbose = options(datatable.verbose=verbose) on.exit(options(oldverbose)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 79b1d2e64b..93ba46677c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14269,7 +14269,7 @@ test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") test(2005.12, set(DT, 1L, "f", expression(x+2)), error="type 'expression' cannot be coerced to 'complex'") -test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=TRUE]$c, as.raw(INT(7,1,0)), +test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=3L]$c, as.raw(INT(7,1,0)), ## note verbose=3L for more deeper verbose output due to memrecycle messages when it is being re-used internally #4491 output="Zero-copy coerce when assigning 'logical' to 'raw' column 3 named 'c'") test(2005.31, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) test(2005.32, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0)), diff --git a/src/assign.c b/src/assign.c index c67004949f..2d4caaf4cf 100644 --- a/src/assign.c +++ b/src/assign.c @@ -502,7 +502,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) } else { // existing column targetcol = VECTOR_ELT(dt,coln); } - const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln)), /*quiet=*/false); + const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln))); if (ret) warning(ret); } @@ -668,7 +668,7 @@ static bool anyNamed(SEXP x) { #define MSGSIZE 1000 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects -const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname, bool quiet) +const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname) // like memcpy but recycles single-item source // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer() // assigns to target[start:start+len-1] or target[where[start:start+len-1]] where start is 0-based @@ -844,7 +844,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con type2char(TYPEOF(target)), colnum, colname); source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { - if (!quiet && GetVerbose()) { + if (GetVerbose()>=3) { // only take the (small) cost of GetVerbose() (search of options() list) when types don't match Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"), sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), diff --git a/src/data.table.h b/src/data.table.h index 7dc9105fda..7c15512444 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -104,7 +104,7 @@ extern size_t typeorder[100]; long long DtoLL(double x); double LLtoD(long long x); -bool GetVerbose(); +int GetVerbose(); // cj.c SEXP cj(SEXP base_list); @@ -161,7 +161,7 @@ SEXP dt_na(SEXP x, SEXP cols); // assign.c SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose); -const char *memrecycle(const SEXP target, const SEXP where, const int r, const int len, SEXP source, const int sourceStart, const int sourceLen, const int coln, const char *colname, bool quiet); +const char *memrecycle(const SEXP target, const SEXP where, const int r, const int len, SEXP source, const int sourceStart, const int sourceLen, const int coln, const char *colname); SEXP shallowwrapper(SEXP dt, SEXP cols); SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, diff --git a/src/dogroups.c b/src/dogroups.c index 65b441e7da..e07057b325 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -126,7 +126,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } for (int j=0; j=0 && nrowgroups) for (int j=0; j=0) { for (int j=0; j1 && thislen!=maxn && grpn>0) { // grpn>0 for grouping empty tables; test 1986 error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn); } - memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, "", /*quiet=*/false); + memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, ""); } } ansloc += maxn; diff --git a/src/init.c b/src/init.c index c7997c4202..005b2da0b7 100644 --- a/src/init.c +++ b/src/init.c @@ -385,10 +385,10 @@ inline double LLtoD(long long x) { return u.d; } -bool GetVerbose() { +int GetVerbose() { // don't call repetitively; save first in that case SEXP opt = GetOption(sym_verbose, R_NilValue); - return isLogical(opt) && LENGTH(opt)==1 && LOGICAL(opt)[0]==1; + return (isLogical(opt) || isInteger(opt)) && LENGTH(opt)==1 && INTEGER(opt)[0]; } // # nocov start diff --git a/src/rbindlist.c b/src/rbindlist.c index fc86fe2087..f39b63f0a7 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -519,7 +519,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); nprotect++; } // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) - const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName, /*quiet=*/false); + const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn diff --git a/src/utils.c b/src/utils.c index b8e542e472..ec458feeea 100644 --- a/src/utils.c +++ b/src/utils.c @@ -308,37 +308,33 @@ SEXP coerceUtf8IfNeeded(SEXP x) { return(ans); } -static inline SEXP CLASSOF(SEXP x) { - switch(TYPEOF(x)) { - case NILSXP: { - return(ScalarString(char_NULL)); - } break; - case LGLSXP: { - return(ScalarString(char_logical)); - } break; - case INTSXP: { - return(ScalarString(INHERITS(x, char_ITime) ? char_ITime : (INHERITS(x, char_IDate) ? char_IDate : (INHERITS(x, char_ordered) ? char_ordered : (INHERITS(x, char_factor) ? char_factor : char_integer))))); - } break; - case REALSXP: { - return(ScalarString(Rinherits(x, char_nanotime) ? char_nanotime : (Rinherits(x, char_integer64) ? char_integer64 : (INHERITS(x, char_POSIXct) ? char_POSIXct : (INHERITS(x, char_Date) ? char_Date : char_double))))); - } break; - case STRSXP : { - return(ScalarString(char_character)); - } break; - case CPLXSXP : { - return(ScalarString(char_complex)); - } break; - case RAWSXP : { - return(ScalarString(char_raw)); - } break; - case VECSXP : { - return(ScalarString(INHERITS(x, char_datatable) ? char_datatable : (INHERITS(x, char_dataframe) ? char_dataframe : char_list))); - } break; +SEXP class1(SEXP x) { + SEXP cl = getAttrib(x, R_ClassSymbol); + if (length(cl)) + return(STRING_ELT(cl, 0)); + SEXP d = getAttrib(x, R_DimSymbol); + int nd = length(d); + if (nd) { + if (nd==2) + return(mkChar("matrix")); + else + return(mkChar("array")); + } + SEXPTYPE t = TYPEOF(x); + switch(t) { + case CLOSXP: case SPECIALSXP: case BUILTINSXP: + return(mkChar("function")); + case REALSXP: + return(mkChar("numeric")); + case SYMSXP: + return(mkChar("name")); + case LANGSXP: + return(mkChar("call")); default: - error(_("Type %s is not supported."), type2char(TYPEOF(x))); // # nocov + return(type2str(t)); } - return(ScalarString(NA_STRING)); // # nocov } + SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) { // copyArg does not update in place, but only IF an object is of the same type-class as class to be coerced, it will return with no copy if (!isVectorAtomic(x)) @@ -347,14 +343,12 @@ SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) { error("'as' is not atomic"); if (isNewList(as)) error("'as' is a list"); - int protecti=0; - bool copy = LOGICAL(copyArg)[0]; - if (!copy && TYPEOF(x)==TYPEOF(as) && CLASSOF(x)==CLASSOF(as)) + if (!LOGICAL(copyArg)[0] && TYPEOF(x)==TYPEOF(as) && class1(x)==class1(as)) return(x); int len = LENGTH(x); - SEXP ans = PROTECT(allocNAVectorLike(as, len)); protecti++; - const char *ret = memrecycle(/*target=*/ans, /*where=*/R_NilValue, /*start=*/0, /*len=*/LENGTH(x), /*source=*/x, /*sourceStart=*/0, /*sourceLen=*/-1, /*colnum=*/0, /*colname=*/"", /*quiet=*/true); - if (ret) warning(_("memrecycle raises warning: %s"), ret); - UNPROTECT(protecti); + SEXP ans = PROTECT(allocNAVectorLike(as, len)); + const char *ret = memrecycle(/*target=*/ans, /*where=*/R_NilValue, /*start=*/0, /*len=*/LENGTH(x), /*source=*/x, /*sourceStart=*/0, /*sourceLen=*/-1, /*colnum=*/0, /*colname=*/""); + if (ret) warning(_("%s"), ret); + UNPROTECT(1); return ans; } From be8347bfb5e7c32ae4fe4a751c70a0f69e2c2f74 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 11:01:21 +0100 Subject: [PATCH 13/37] proper verbose to int --- src/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index 005b2da0b7..ec4f7deaf9 100644 --- a/src/init.c +++ b/src/init.c @@ -388,7 +388,9 @@ inline double LLtoD(long long x) { int GetVerbose() { // don't call repetitively; save first in that case SEXP opt = GetOption(sym_verbose, R_NilValue); - return (isLogical(opt) || isInteger(opt)) && LENGTH(opt)==1 && INTEGER(opt)[0]; + if (!(isLogical(opt) || isInteger(opt)) && LENGTH(opt)==1 && INTEGER(opt)[0]!=NA_INTEGER) + error("invalid verbose argument"); + return INTEGER(opt)[0]; } // # nocov start From b6486615f158cb8b862012e15b38441845eb0f5f Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 11:49:57 +0100 Subject: [PATCH 14/37] verbose changes coverage --- inst/tests/nafill.Rraw | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 1e7611c92b..2271e503b0 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -249,3 +249,9 @@ if (test_bit64) { ## IDate ## ITime ## nanotime + +# related to !is.integer(verbose) +test(99.1, data.table(a=1,b=2)[1,1, verbose=1], error="verbose must be logical or integer") +test(99.2, data.table(a=1,b=2)[1,1, verbose=1:2], error="verbose must be length 1 non-NA") +test(99.3, data.table(a=1,b=2)[1,1, verbose=NA], error="verbose must be length 1 non-NA") + From 008b37ccabcecc09e865ef92611c887be39b9f6f Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 11:53:45 +0100 Subject: [PATCH 15/37] rm unused anymore --- src/data.table.h | 7 ------- src/init.c | 14 -------------- 2 files changed, 21 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 7c15512444..f2bc4a9668 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -79,13 +79,6 @@ extern SEXP char_ordered; extern SEXP char_datatable; extern SEXP char_dataframe; extern SEXP char_NULL; -extern SEXP char_logical; -extern SEXP char_integer; -extern SEXP char_double; -extern SEXP char_character; -extern SEXP char_complex; -extern SEXP char_list; -extern SEXP char_raw; extern SEXP sym_sorted; extern SEXP sym_index; extern SEXP sym_BY; diff --git a/src/init.c b/src/init.c index ec4f7deaf9..2f73f34bd7 100644 --- a/src/init.c +++ b/src/init.c @@ -20,13 +20,6 @@ SEXP char_ordered; SEXP char_datatable; SEXP char_dataframe; SEXP char_NULL; -SEXP char_logical; -SEXP char_integer; -SEXP char_double; -SEXP char_character; -SEXP char_complex; -SEXP char_list; -SEXP char_raw; SEXP sym_sorted; SEXP sym_index; SEXP sym_BY; @@ -329,13 +322,6 @@ void attribute_visible R_init_datatable(DllInfo *info) char_datatable = PRINTNAME(install("data.table")); char_dataframe = PRINTNAME(install("data.frame")); char_NULL = PRINTNAME(install("NULL")); - char_logical = PRINTNAME(install("logical")); - char_integer = PRINTNAME(install("integer")); - char_double = PRINTNAME(install("double")); - char_character = PRINTNAME(install("character")); - char_complex = PRINTNAME(install("complex")); - char_list = PRINTNAME(install("list")); - char_raw = PRINTNAME(install("raw")); if (TYPEOF(char_integer64) != CHARSXP) { // checking one is enough in case of any R-devel changes From 07c9d2dc5d77719d563d98fa613b9cd2649486ee Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 11:56:15 +0100 Subject: [PATCH 16/37] more precise verbose arg check --- inst/tests/nafill.Rraw | 3 +++ src/init.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 2271e503b0..8f5a3e1bc8 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -254,4 +254,7 @@ if (test_bit64) { test(99.1, data.table(a=1,b=2)[1,1, verbose=1], error="verbose must be logical or integer") test(99.2, data.table(a=1,b=2)[1,1, verbose=1:2], error="verbose must be length 1 non-NA") test(99.3, data.table(a=1,b=2)[1,1, verbose=NA], error="verbose must be length 1 non-NA") +options(datatable.verbose=1) +test(99.4, coerceAs(1, 2L), error="verbose option must be length 1 non-NA logical or integer") +options(datatable.verbose=FALSE) diff --git a/src/init.c b/src/init.c index 2f73f34bd7..128ce18617 100644 --- a/src/init.c +++ b/src/init.c @@ -374,8 +374,8 @@ inline double LLtoD(long long x) { int GetVerbose() { // don't call repetitively; save first in that case SEXP opt = GetOption(sym_verbose, R_NilValue); - if (!(isLogical(opt) || isInteger(opt)) && LENGTH(opt)==1 && INTEGER(opt)[0]!=NA_INTEGER) - error("invalid verbose argument"); + if ((!isLogical(opt) && !isInteger(opt)) || LENGTH(opt)!=1 || INTEGER(opt)[0]==NA_INTEGER) + error("verbose option must be length 1 non-NA logical or integer"); return INTEGER(opt)[0]; } From 9467ff5bf539f18eccf075aed9e3bebd7b32625d Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 12:32:44 +0100 Subject: [PATCH 17/37] memrecycle escape warnings and simple verbose for numcol==0 --- src/assign.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/src/assign.c b/src/assign.c index 2d4caaf4cf..3ae9ee3f62 100644 --- a/src/assign.c +++ b/src/assign.c @@ -690,6 +690,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con // for 5647 this used to limit slen to len, but no longer if (colname==NULL) error(_("Internal error: memrecycle has received NULL colname")); // # nocov + bool nocol = colnum==0; *memrecycle_message = '\0'; int protecti=0; if (isNewList(source)) { @@ -705,7 +706,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con // duplicate unnecessarily, hence checking for named rather than duplicating always. // See #481, #1270 and tests 1341.* fail without this copy. // ********** This might go away now that we copy properly in dogroups.c ********** - if (anyNamed(source)) { + if (anyNamed(source)) { // this is always true since 4.0.0? see #4424 source = PROTECT(copyAsPlain(source)); protecti++; } } @@ -729,7 +730,10 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con for (int i=0; inlevel) { - error(_("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]"), colnum, colname, val, nlevel); + if (nocol) + error(_("Assigning factor numbers. But %d is outside the level range [1,%d]"), val, nlevel); + else + error(_("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]"), colnum, colname, val, nlevel); } } } else { @@ -737,7 +741,10 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con for (int i=0; inlevel)) { - error(_("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number."), colnum, colname, val, nlevel); + if (nocol) + error(_("Assigning factor numbers. But %f is outside the level range [1,%d], or is not a whole number."), val, nlevel); + else + error(_("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number."), colnum, colname, val, nlevel); } } } @@ -829,27 +836,37 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con } } } else if (isString(source) && !isString(target) && !isNewList(target)) { - warning(_("Coercing 'character' RHS to '%s' to match the type of the target column (column %d named '%s')."), - type2char(TYPEOF(target)), colnum, colname); + if (!nocol) + warning(_("Coercing 'character' RHS to '%s' to match the type of the target column (column %d named '%s')."), + type2char(TYPEOF(target)), colnum, colname); // this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion' source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if (isNewList(source) && !isNewList(target)) { if (targetIsI64) { - error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s')."), colnum, colname); + if (nocol) + error(_("Cannot coerce 'list' RHS to 'integer64' to match the target type.")); + else + error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s')."), colnum, colname); // because R's coerceVector doesn't know about integer64 } // as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3)) // relied on by NNS, simstudy and table.express; tests 1294.* - warning(_("Coercing 'list' RHS to '%s' to match the type of the target column (column %d named '%s')."), - type2char(TYPEOF(target)), colnum, colname); + if (!nocol) + warning(_("Coercing 'list' RHS to '%s' to match the type of the target column (column %d named '%s')."), + type2char(TYPEOF(target)), colnum, colname); source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { if (GetVerbose()>=3) { // only take the (small) cost of GetVerbose() (search of options() list) when types don't match - Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"), - sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), - targetIsI64 ? "integer64" : type2char(TYPEOF(target)), - colnum, colname); + if (nocol) + Rprintf(_("Zero-copy coerce when assigning '%s' to '%s'.\n"), + sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), + targetIsI64 ? "integer64" : type2char(TYPEOF(target))); + else + Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"), + sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), + targetIsI64 ? "integer64" : type2char(TYPEOF(target)), + colnum, colname); } // The following checks are up front here, otherwise we'd need them twice in the two branches // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future. @@ -1061,7 +1078,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con break; } if (sourceIsI64) - error(_("To assign integer64 to a character column, please use as.character() for clarity.")); + error(_("To assign integer64 to a character, please use as.character() for clarity.")); // TODO: handle that here as well source = PROTECT(coerceVector(source, STRSXP)); protecti++; } BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) From 1edae4ffb0933f2b086443f11c4570b98036d490 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 12:40:52 +0100 Subject: [PATCH 18/37] coerceAs does not emit extra warning anymore --- inst/tests/nafill.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 8f5a3e1bc8..63939291c0 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -120,7 +120,7 @@ test(3.03, setnafill(x, "locf"), error="in-place update is supported only for li test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table") test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table") test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1") -test(3.07, nafill(x, fill="asd"), x, warning=c("Coercing 'character' RHS to 'integer'","NAs introduced by coercion")) +test(3.07, nafill(x, fill="asd"), x, warning="NAs introduced by coercion") # colnamesInt helper dt = data.table(a=1, b=2, d=3) From 71ab119c13dae44988fa3ee254631a92af26cdfb Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 17:17:17 +0100 Subject: [PATCH 19/37] coerceAs verbose, more tests --- inst/tests/nafill.Rraw | 56 +++++++++++++++++++++++++++++++++++++++++- src/assign.c | 2 +- src/utils.c | 12 +++++++-- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 63939291c0..80e601b544 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -160,7 +160,7 @@ if (test_bit64) { } options(old) -# coerceAs +# coerceAs int/numeric/int64 as used in nafill if (test_bit64) { coerceFill = function(x) lapply(list(1L, 1.0, as.integer64(1)), coerceAs, x=x) # old function used before #4491 #test(6.01, coerceFill(1:2), error="fill argument must be length 1") @@ -240,6 +240,60 @@ if (test_bit64) { test(9.34, nafill(x, "nocb", -1), seti64(nafill(x, "nocb"), 5:6, as.integer64(-1))) } +# coerceAs verbose +options(datatable.verbose=2L) +input = 1 +test(10.01, ans<-coerceAs(input, 1), 1, output="double[numeric] into double[numeric]") +test(10.02, address(input)!=address(ans)) +test(10.03, ans<-coerceAs(input, 1, copy=FALSE), 1, output="copy=false and input already of expected type and class double[numeric]") +test(10.04, address(input), address(ans)) +test(10.05, ans<-coerceAs(input, 1L), 1L, output="double[numeric] into integer[integer]") +test(10.06, address(input)!=address(ans)) +test(10.07, ans<-coerceAs(input, 1L, copy=FALSE), 1L, output="double[numeric] into integer[integer]", notOutput="copy=false") +test(10.08, address(input)!=address(ans)) +test(10.09, coerceAs("1", 1L), 1L, output="character[character] into integer[integer]") +test(10.10, coerceAs("1", 1), 1, output="character[character] into double[numeric]") +test(10.11, coerceAs("a", factor("x")), factor("a", levels=c("x","a")), output="character[character] into integer[factor]") ## levels of 'as' are retained! +test(10.12, coerceAs("a", factor()), factor("a"), output="character[character] into integer[factor]") +test(10.13, coerceAs(1, factor("x")), factor("x"), output="double[numeric] into integer[factor]") +test(10.14, coerceAs(1, factor("x", levels=c("x","y"))), factor("x", levels=c("x","y")), output="double[numeric] into integer[factor]") +test(10.15, coerceAs(2, factor("x", levels=c("x","y"))), factor("y", levels=c("x","y")), output="double[numeric] into integer[factor]") +test(10.16, coerceAs(1:2, factor(c("x","y"))), factor(c("x","y")), output="integer[integer] into integer[factor]") +test(10.17, coerceAs(1:3, factor(c("x","y"))), output="integer[integer] into integer[factor]", error="factor numbers.*3 is outside the level range") +test(10.18, coerceAs(factor("x"), factor(c("x","y"))), factor("x", levels=c("x","y"))) +test(10.19, coerceAs(factor("x"), factor(c("x","y")), copy=FALSE), factor("x", levels=c("x","y"))) ## copy=F has copyMostAttrib +a = structure("a", class="a") +b = structure("b", class="b") +test(10.21, coerceAs(a, b), structure("a", class="b"), output="character[a] into character[b]") +a = structure(1L, class="a") +b = structure(2L, class="b") +test(10.22, coerceAs(a, b), structure(1L, class="b"), output="integer[a] into integer[b]") +a = structure(1, class="a") +b = structure(2, class="b") +test(10.23, coerceAs(a, b), structure(1, class="b"), output="double[a] into double[b]") +a = structure(1, class="a") +b = structure(2L, class="b") +test(10.24, coerceAs(a, b), structure(1L, class="b"), output="double[a] into integer[b]") +if (test_bit64) { + x = as.integer64(1L) + test(10.81, coerceAs(x, 1), 1, output="double[integer64] into double[numeric]") + test(10.82, coerceAs(x, 1L), 1L, output="double[integer64] into integer[integer]") + test(10.83, coerceAs(x, "1"), error="please use as.character") #output="double[integer64] into character[character]") # not yet implemented + test(10.84, coerceAs(1, x), x, output="double[numeric] into double[integer64]") + test(10.85, coerceAs(1L, x), x, output="integer[integer] into double[integer64]") + test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]") +} +if (test_nanotime) { + x = as.nanotime(1L) + test(10.91, coerceAs(x, 1), 1, output="double[nanotime] into double[numeric]") + test(10.92, coerceAs(x, 1L), 1L, output="double[nanotime] into integer[integer]") + test(10.93, coerceAs(x, "1"), error="please use as.character") #output="double[nanotime] into character[character]") # not yet implemented + test(10.94, coerceAs(1, x), x, output="double[numeric] into double[nanotime]") + test(10.95, coerceAs(1L, x), x, output="integer[integer] into double[nanotime]") + test(10.96, coerceAs("1", x), x, output="character[character] into double[nanotime]") +} +options(datatable.verbose=FALSE) + # nafill, setnafill for character, factor and other types #3992 ## logical ## character diff --git a/src/assign.c b/src/assign.c index 3ae9ee3f62..11063d4e3e 100644 --- a/src/assign.c +++ b/src/assign.c @@ -881,7 +881,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \ int n = snprintf(memrecycle_message, MSGSIZE, \ "%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s'", val, sType, i+1, tType); \ - if (colnum>0 && n>0 && n0 && n=2; // verbose level 2 required + if (!LOGICAL(copyArg)[0] && TYPEOF(x)==TYPEOF(as) && class1(x)==class1(as)) { + if (verbose) + Rprintf("copy=false and input already of expected type and class %s[%s]\n", type2char(TYPEOF(x)), CHAR(class1(x))); + copyMostAttrib(as, x); // factor levels are same for copy=T|F return(x); + } int len = LENGTH(x); SEXP ans = PROTECT(allocNAVectorLike(as, len)); + if (verbose) + Rprintf("Coercing %s[%s] into %s[%s]\n", type2char(TYPEOF(x)), CHAR(class1(x)), type2char(TYPEOF(as)), CHAR(class1(as))); const char *ret = memrecycle(/*target=*/ans, /*where=*/R_NilValue, /*start=*/0, /*len=*/LENGTH(x), /*source=*/x, /*sourceStart=*/0, /*sourceLen=*/-1, /*colnum=*/0, /*colname=*/""); - if (ret) warning(_("%s"), ret); + if (ret) + warning(_("%s"), ret); UNPROTECT(1); return ans; } From 858d7a72f4ebfe6d70b17e458e8debadf94bcece Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 18:11:18 +0100 Subject: [PATCH 20/37] use older nanotime api --- inst/tests/nafill.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 80e601b544..1df377f3ec 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -284,7 +284,7 @@ if (test_bit64) { test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]") } if (test_nanotime) { - x = as.nanotime(1L) + x = nanotime(1L) test(10.91, coerceAs(x, 1), 1, output="double[nanotime] into double[numeric]") test(10.92, coerceAs(x, 1L), 1L, output="double[nanotime] into integer[integer]") test(10.93, coerceAs(x, "1"), error="please use as.character") #output="double[nanotime] into character[character]") # not yet implemented From a2f9adcd4d0fdce9f32a9ee20852afc106e0a40c Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 18:36:54 +0100 Subject: [PATCH 21/37] update error msg --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 93ba46677c..d97231e76b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14305,7 +14305,7 @@ if (test_bit64) { warning="-1.*integer64.*position 1 taken as 0 when assigning.*raw.*column 3 named 'c'") test(2005.66, DT[2:3, f:=as.integer64(c(NA,"2147483648"))]$f, as.complex(c(-42,NA,2147483648))) DT[,h:=LETTERS[1:3]] - test(2005.67, DT[2:3, h:=as.integer64(1:2)], error="To assign integer64 to a character column, please use as.character.") + test(2005.67, DT[2:3, h:=as.integer64(1:2)], error="To assign integer64 to a character, please use as.character.") } # rbindlist raw type, #2819 From df758a67ce73ce30c5f8756d3088c28d3d9676c1 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 27 May 2020 20:04:08 +0100 Subject: [PATCH 22/37] coverage --- inst/tests/nafill.Rraw | 9 +++++++-- src/assign.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 1df377f3ec..e302281736 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -260,8 +260,9 @@ test(10.14, coerceAs(1, factor("x", levels=c("x","y"))), factor("x", levels=c("x test(10.15, coerceAs(2, factor("x", levels=c("x","y"))), factor("y", levels=c("x","y")), output="double[numeric] into integer[factor]") test(10.16, coerceAs(1:2, factor(c("x","y"))), factor(c("x","y")), output="integer[integer] into integer[factor]") test(10.17, coerceAs(1:3, factor(c("x","y"))), output="integer[integer] into integer[factor]", error="factor numbers.*3 is outside the level range") -test(10.18, coerceAs(factor("x"), factor(c("x","y"))), factor("x", levels=c("x","y"))) -test(10.19, coerceAs(factor("x"), factor(c("x","y")), copy=FALSE), factor("x", levels=c("x","y"))) ## copy=F has copyMostAttrib +test(10.18, coerceAs(c(1,2,3), factor(c("x","y"))), output="double[numeric] into integer[factor]", error="factor numbers.*3.000000 is outside the level range") +test(10.19, coerceAs(factor("x"), factor(c("x","y"))), factor("x", levels=c("x","y"))) +test(10.20, coerceAs(factor("x"), factor(c("x","y")), copy=FALSE), factor("x", levels=c("x","y"))) ## copy=F has copyMostAttrib a = structure("a", class="a") b = structure("b", class="b") test(10.21, coerceAs(a, b), structure("a", class="b"), output="character[a] into character[b]") @@ -282,6 +283,10 @@ if (test_bit64) { test(10.84, coerceAs(1, x), x, output="double[numeric] into double[integer64]") test(10.85, coerceAs(1L, x), x, output="integer[integer] into double[integer64]") test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]") + options(datatable.verbose=3L) + test(10.87, coerceAs(x, 1L), 1L, output=c("double[integer64] into integer[integer]","Zero-copy coerce when assigning 'integer64' to 'integer'")) + test(10.88, coerceAs(1L, x), x, output=c("integer[integer] into double[integer64]","Zero-copy coerce when assigning 'integer' to 'integer64'")) + options(datatable.verbose=2L) } if (test_nanotime) { x = nanotime(1L) diff --git a/src/assign.c b/src/assign.c index 11063d4e3e..f979f924f2 100644 --- a/src/assign.c +++ b/src/assign.c @@ -844,7 +844,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con } else if (isNewList(source) && !isNewList(target)) { if (targetIsI64) { if (nocol) - error(_("Cannot coerce 'list' RHS to 'integer64' to match the target type.")); + error(_("Cannot coerce 'list' RHS to 'integer64' to match the target type.")); // # nocov because coerceAs does not accept lists else error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s')."), colnum, colname); // because R's coerceVector doesn't know about integer64 From 45e26dbd4e31543fa28ee5499cd1cc82b5277999 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sat, 30 May 2020 17:33:15 +0100 Subject: [PATCH 23/37] codecov of coerceAs --- inst/tests/nafill.Rraw | 11 +++++++++++ src/utils.c | 14 ++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index e302281736..2adbb884b6 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -298,6 +298,17 @@ if (test_nanotime) { test(10.96, coerceAs("1", x), x, output="character[character] into double[nanotime]") } options(datatable.verbose=FALSE) +test(11.01, coerceAs(list(a=1), 1), error="is not atomic") +test(11.02, coerceAs(1, list(a=1)), error="is not atomic") +test(11.03, coerceAs(sum, 1), error="is not atomic") +test(11.04, coerceAs(quote(1+1), 1), error="is not atomic") +test(11.05, coerceAs(as.name("x"), 1), error="is not atomic") +m = matrix(1:4, 2, 2) +a = array(1:8, c(2,2,2)) +test(11.06, coerceAs(m, 1L), error="must not be matrix or array") +test(11.07, coerceAs(1L, m), error="must not be matrix or array") +test(11.08, coerceAs(a, 1L), error="must not be matrix or array") +test(11.09, coerceAs(1L, a), error="must not be matrix or array") # nafill, setnafill for character, factor and other types #3992 ## logical diff --git a/src/utils.c b/src/utils.c index ce0d86ee2e..e799a744aa 100644 --- a/src/utils.c +++ b/src/utils.c @@ -323,13 +323,13 @@ SEXP class1(SEXP x) { SEXPTYPE t = TYPEOF(x); switch(t) { case CLOSXP: case SPECIALSXP: case BUILTINSXP: - return(mkChar("function")); + return(mkChar("function")); // # nocov case REALSXP: return(mkChar("numeric")); case SYMSXP: - return(mkChar("name")); + return(mkChar("name")); // # nocov case LANGSXP: - return(mkChar("call")); + return(mkChar("call")); // # nocov default: return(type2str(t)); } @@ -341,13 +341,15 @@ SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) { error("'x' is not atomic"); if (!isVectorAtomic(as)) error("'as' is not atomic"); - if (isNewList(as)) - error("'as' is a list"); + if (!isNull(getAttrib(x, R_DimSymbol))) + error("'x' must not be matrix or array"); + if (!isNull(getAttrib(as, R_DimSymbol))) + error("'as' must not be matrix or array"); bool verbose = GetVerbose()>=2; // verbose level 2 required if (!LOGICAL(copyArg)[0] && TYPEOF(x)==TYPEOF(as) && class1(x)==class1(as)) { if (verbose) Rprintf("copy=false and input already of expected type and class %s[%s]\n", type2char(TYPEOF(x)), CHAR(class1(x))); - copyMostAttrib(as, x); // factor levels are same for copy=T|F + copyMostAttrib(as, x); // so attrs like factor levels are same for copy=T|F return(x); } int len = LENGTH(x); From 98f7cd37de91928752d7566953bf0590e8fa8de1 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sat, 30 May 2020 18:03:07 +0100 Subject: [PATCH 24/37] catch all verbose --- inst/tests/nafill.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 2adbb884b6..634861239f 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -261,8 +261,8 @@ test(10.15, coerceAs(2, factor("x", levels=c("x","y"))), factor("y", levels=c("x test(10.16, coerceAs(1:2, factor(c("x","y"))), factor(c("x","y")), output="integer[integer] into integer[factor]") test(10.17, coerceAs(1:3, factor(c("x","y"))), output="integer[integer] into integer[factor]", error="factor numbers.*3 is outside the level range") test(10.18, coerceAs(c(1,2,3), factor(c("x","y"))), output="double[numeric] into integer[factor]", error="factor numbers.*3.000000 is outside the level range") -test(10.19, coerceAs(factor("x"), factor(c("x","y"))), factor("x", levels=c("x","y"))) -test(10.20, coerceAs(factor("x"), factor(c("x","y")), copy=FALSE), factor("x", levels=c("x","y"))) ## copy=F has copyMostAttrib +test(10.19, coerceAs(factor("x"), factor(c("x","y"))), factor("x", levels=c("x","y")), output="integer[factor] into integer[factor]") +test(10.20, coerceAs(factor("x"), factor(c("x","y")), copy=FALSE), factor("x", levels=c("x","y")), output="input already of expected type and class") ## copy=F has copyMostAttrib a = structure("a", class="a") b = structure("b", class="b") test(10.21, coerceAs(a, b), structure("a", class="b"), output="character[a] into character[b]") @@ -279,7 +279,7 @@ if (test_bit64) { x = as.integer64(1L) test(10.81, coerceAs(x, 1), 1, output="double[integer64] into double[numeric]") test(10.82, coerceAs(x, 1L), 1L, output="double[integer64] into integer[integer]") - test(10.83, coerceAs(x, "1"), error="please use as.character") #output="double[integer64] into character[character]") # not yet implemented + test(10.83, coerceAs(x, "1"), error="please use as.character", output="double[integer64] into character[character]") # not yet implemented test(10.84, coerceAs(1, x), x, output="double[numeric] into double[integer64]") test(10.85, coerceAs(1L, x), x, output="integer[integer] into double[integer64]") test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]") @@ -292,7 +292,7 @@ if (test_nanotime) { x = nanotime(1L) test(10.91, coerceAs(x, 1), 1, output="double[nanotime] into double[numeric]") test(10.92, coerceAs(x, 1L), 1L, output="double[nanotime] into integer[integer]") - test(10.93, coerceAs(x, "1"), error="please use as.character") #output="double[nanotime] into character[character]") # not yet implemented + test(10.93, coerceAs(x, "1"), error="please use as.character", output="double[nanotime] into character[character]") # not yet implemented test(10.94, coerceAs(1, x), x, output="double[numeric] into double[nanotime]") test(10.95, coerceAs(1L, x), x, output="integer[integer] into double[nanotime]") test(10.96, coerceAs("1", x), x, output="character[character] into double[nanotime]") From 834afba0369550278838b397047b09e801ad43c7 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sat, 30 May 2020 18:07:40 +0100 Subject: [PATCH 25/37] Revert "initial change for #4101" This reverts commit 1fa20695064f94f441491e5ac241d7d26f2b1a68. --- src/fifelse.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index c1941893fe..3a05fce6d3 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -16,15 +16,17 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { int nprotect = 0; if (ta != tb) { - if (TYPEORDER(ta)TYPEORDER(tb)) { - b = PROTECT(coerceAs(b, a, ScalarLogical(TRUE))); nprotect++; - tb = ta; + if (ta == INTSXP && tb == REALSXP) { + SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++; + a = tmp; + ta = REALSXP; + } else if (ta == REALSXP && tb == INTSXP) { + SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++; + b = tmp; + tb = REALSXP; + } else { + error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb)); } - if (TYPEOF(a)!=TYPEOF(b)) - error(_("internal error: 'yes' is of type %s but 'no' is of type %s. Type should have been coerced already."), type2char(TYPEOF(a)), type2char(TYPEOF(b))); // # nocov } if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0)) From f2ef98003bcc564566f36cfb7beecd352dc53a95 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sat, 30 May 2020 18:28:41 +0100 Subject: [PATCH 26/37] Revert "fix tests for #4101" This reverts commit cc2cc0e094a92c7b4ddaece251b470f4d01fe566. --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d97231e76b..13f4bcdcaf 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15711,7 +15711,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length o test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).") test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.") -test(2072.013, fifelse(test_vec, TRUE, "FALSE"), as.character(fifelse(test_vec, TRUE, FALSE))) # no error anymore #4101 +test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please") test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).") test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).") test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec))) @@ -15737,7 +15737,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na) test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na)) test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na)) test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na)) -test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), fifelse(test_vec, as.Date("2011-01-01"), as.Date(as.double(FALSE), origin="1970-01-01"))) # no error anymore #4101 +test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please") test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na)) test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na)) test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na)) From db95b084ba9488b09ae6d65f611dd0693c6bbefd Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 1 Jul 2020 11:01:49 +0800 Subject: [PATCH 27/37] use coerceAs in fcast.c --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 12 +++++++++++ src/fcast.c | 50 +++++++++++++++++++++++-------------------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index a323334f14..35b3a25c55 100644 --- a/NEWS.md +++ b/NEWS.md @@ -111,6 +111,8 @@ unit = "s") 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). +14. `dcast` handles coercion of `fill` to `integer64` correctly, [#4561](https://github.com/Rdatatable/data.table/issues/4561). Thanks to @emallickhossain for the bug report. + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 13f4bcdcaf..b695e500b3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16853,3 +16853,15 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# fill converts to int64 in dcast, #4561 +if (test_bit64) { + i64v = as.integer64(c(12345678901234, 70, 20, NA)) + apple = data.table( + id = c("a", "b", "b"), + time = c(1L, 1L, 2L), + y = i64v[1:3] + ) + test(2139, dcast(apple, id ~ time, value.var = "y"), + data.table(id = c('a', 'b'), `1` = i64v[1:2], `2` = i64v[4:3], key='id')) +} diff --git a/src/fcast.c b/src/fcast.c index b8d6fa4c2a..08bc002727 100644 --- a/src/fcast.c +++ b/src/fcast.c @@ -15,67 +15,71 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil SET_VECTOR_ELT(ans, i, VECTOR_ELT(lhs, i)); } // get val cols + bool is_agg_bool = LOGICAL(is_agg)[0]; for (int i=0; i Date: Mon, 19 Feb 2024 23:45:21 -0800 Subject: [PATCH 28/37] restore actual fix --- src/fcast.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/fcast.c b/src/fcast.c index a4e90bd504..16dc1bcd3c 100644 --- a/src/fcast.c +++ b/src/fcast.c @@ -20,16 +20,13 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil const SEXP thiscol = VECTOR_ELT(val, i); const SEXPTYPE thistype = TYPEOF(thiscol); SEXP thisfill = fill; - SEXPTYPE thistype = TYPEOF(thiscol); int nprotect = 0; if (isNull(fill)) { if (LOGICAL(is_agg)[0]) { thisfill = PROTECT(allocNAVector(thistype, 1)); nprotect++; } else thisfill = VECTOR_ELT(fill_d, i); } - if (TYPEOF(thisfill) != thistype) { - thisfill = PROTECT(coerceVector(thisfill, thistype)); nprotect++; - } + thisfill = PROTECT(coerceAs(thisfill, thiscol, /*copyArg=*/ScalarLogical(false))); nprotect++; switch (thistype) { case INTSXP: case LGLSXP: { @@ -71,7 +68,8 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil } } } break; - case STRSXP: + case STRSXP: { + const SEXP sfill = STRING_ELT(thisfill, 0); for (int j=0; j Date: Mon, 19 Feb 2024 23:46:32 -0800 Subject: [PATCH 29/37] ws --- NEWS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 350fb0854a..32aa6a16c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,7 +20,6 @@ 1. `dcast` handles coercion of `fill` to `integer64` correctly, [#4561](https://github.com/Rdatatable/data.table/issues/4561). Thanks to @emallickhossain for the bug report and @MichaelChirico for the fix. - ## NOTES 1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1. From e4ae1d74e70ac41a041ee69f40292b5ef8d45c08 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 19 Feb 2024 23:47:25 -0800 Subject: [PATCH 30/37] incomplete merge --- inst/tests/nafill.Rraw | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 806ec3cbd6..b72c0b5063 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -181,23 +181,14 @@ if (test_bit64) { test(6.44, identical(coerceFill(NaN), list(NA_integer_, NaN, as.integer64(NA)))) test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) -<<<<<<< HEAD - test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) -======= test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning="precision lost") ->>>>>>> master test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x))) test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) -<<<<<<< HEAD - test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) - test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) -======= test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning="precision lost") test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning="precision lost") ->>>>>>> master } # nan argument to treat NaN as NA in nafill, #4020 @@ -262,13 +253,8 @@ test(10.05, ans<-coerceAs(input, 1L), 1L, output="double[numeric] into integer[i test(10.06, address(input)!=address(ans)) test(10.07, ans<-coerceAs(input, 1L, copy=FALSE), 1L, output="double[numeric] into integer[integer]", notOutput="copy=false") test(10.08, address(input)!=address(ans)) -<<<<<<< HEAD -test(10.09, coerceAs("1", 1L), 1L, output="character[character] into integer[integer]") -test(10.10, coerceAs("1", 1), 1, output="character[character] into double[numeric]") -======= test(10.09, coerceAs("1", 1L), 1L, output="character[character] into integer[integer]", warning="Coercing.*character.*integer") test(10.10, coerceAs("1", 1), 1, output="character[character] into double[numeric]", warning="Coercing.*character.*double") ->>>>>>> master test(10.11, coerceAs("a", factor("x")), factor("a", levels=c("x","a")), output="character[character] into integer[factor]") ## levels of 'as' are retained! test(10.12, coerceAs("a", factor()), factor("a"), output="character[character] into integer[factor]") test(10.13, coerceAs(1, factor("x")), factor("x"), output="double[numeric] into integer[factor]") @@ -295,40 +281,20 @@ if (test_bit64) { x = as.integer64(1L) test(10.81, coerceAs(x, 1), 1, output="double[integer64] into double[numeric]") test(10.82, coerceAs(x, 1L), 1L, output="double[integer64] into integer[integer]") -<<<<<<< HEAD - test(10.83, coerceAs(x, "1"), error="please use as.character", output="double[integer64] into character[character]") # not yet implemented - test(10.84, coerceAs(1, x), x, output="double[numeric] into double[integer64]") - test(10.85, coerceAs(1L, x), x, output="integer[integer] into double[integer64]") - test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]") -======= test(10.83, coerceAs(x, "1"), "1", output="double[integer64] into character[character]") test(10.84, coerceAs(1, x), x, output="double[numeric] into double[integer64]") test(10.85, coerceAs(1L, x), x, output="integer[integer] into double[integer64]") test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]", warning="Coercing.*character") ->>>>>>> master options(datatable.verbose=3L) test(10.87, coerceAs(x, 1L), 1L, output=c("double[integer64] into integer[integer]","Zero-copy coerce when assigning 'integer64' to 'integer'")) test(10.88, coerceAs(1L, x), x, output=c("integer[integer] into double[integer64]","Zero-copy coerce when assigning 'integer' to 'integer64'")) options(datatable.verbose=2L) -<<<<<<< HEAD -======= test(10.89, coerceAs(-2147483649, x), as.integer64(-2147483649), output="double[numeric] into double[integer64]") ->>>>>>> master } if (test_nanotime) { x = nanotime(1L) test(10.91, coerceAs(x, 1), 1, output="double[nanotime] into double[numeric]") test(10.92, coerceAs(x, 1L), 1L, output="double[nanotime] into integer[integer]") -<<<<<<< HEAD - test(10.93, coerceAs(x, "1"), error="please use as.character", output="double[nanotime] into character[character]") # not yet implemented - test(10.94, coerceAs(1, x), x, output="double[numeric] into double[nanotime]") - test(10.95, coerceAs(1L, x), x, output="integer[integer] into double[nanotime]") - test(10.96, coerceAs("1", x), x, output="character[character] into double[nanotime]") -} -options(datatable.verbose=FALSE) -test(11.01, coerceAs(list(a=1), 1), error="is not atomic") -test(11.02, coerceAs(1, list(a=1)), error="is not atomic") -======= test(10.93, substring(coerceAs(x, "1"),1,11) %in% c("1","1970-01-01T"), output="double[nanotime] into character[character]") # ^ https://github.com/eddelbuettel/nanotime/issues/92; %in% so as not to break if nanotime adds as.character method test(10.94, coerceAs(1, x), x, output="double[numeric] into double[nanotime]") @@ -338,7 +304,6 @@ test(11.02, coerceAs(1, list(a=1)), error="is not atomic") options(datatable.verbose=FALSE) test(11.01, coerceAs(list(a=1), 1), error="is not atomic") test(11.02, coerceAs(1, list(a=1)), list(1)) ->>>>>>> master test(11.03, coerceAs(sum, 1), error="is not atomic") test(11.04, coerceAs(quote(1+1), 1), error="is not atomic") test(11.05, coerceAs(as.name("x"), 1), error="is not atomic") From 7167f18526e0852583ca562759573af216d08f59 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 19 Feb 2024 23:48:05 -0800 Subject: [PATCH 31/37] vestigial bad merge --- src/assign.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/assign.c b/src/assign.c index 9c0830553e..ef49fd230b 100644 --- a/src/assign.c +++ b/src/assign.c @@ -713,7 +713,6 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con // for 5647 this used to limit slen to len, but no longer if (colname==NULL) error(_("Internal error: memrecycle has received NULL colname")); // # nocov - bool nocol = colnum==0; *memrecycle_message = '\0'; int protecti=0; const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); From e3e1e63d82e10ed259fa8b06fd23ccaa0ed2ea5a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 19 Feb 2024 23:51:51 -0800 Subject: [PATCH 32/37] minimize diff --- src/fcast.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/fcast.c b/src/fcast.c index 16dc1bcd3c..65f509ba1c 100644 --- a/src/fcast.c +++ b/src/fcast.c @@ -15,11 +15,10 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil SET_VECTOR_ELT(ans, i, VECTOR_ELT(lhs, i)); } // get val cols - bool is_agg_bool = LOGICAL(is_agg)[0]; for (int i=0; i Date: Tue, 20 Feb 2024 12:03:56 -0800 Subject: [PATCH 33/37] coerceAs throws warning for string->double, and errors on list --- inst/tests/tests.Rraw | 2 +- src/fcast.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fca309ec62..6dc616986f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15653,7 +15653,7 @@ test(2071.09, any_na(data.table(expression(1))), error="Unsupported column type test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l'), data.table(a=1, `1`=list(list(1)), key='a')) test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'), - data.table(a=1, `2`=3, key='a')) + data.table(a=1, `2`=3, key='a'), warning="Coercing 'character' RHS to 'double'") # fifelse, #3657 test_vec = -5L:5L < 0L diff --git a/src/fcast.c b/src/fcast.c index 65f509ba1c..0421f85b8c 100644 --- a/src/fcast.c +++ b/src/fcast.c @@ -25,7 +25,9 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil thisfill = PROTECT(allocNAVector(thistype, 1)); nprotect++; } else thisfill = VECTOR_ELT(fill_d, i); } - thisfill = PROTECT(coerceAs(thisfill, thiscol, /*copyArg=*/ScalarLogical(false))); nprotect++; + if (isVectorAtomic(thiscol)) { + thisfill = PROTECT(coerceAs(thisfill, thiscol, /*copyArg=*/ScalarLogical(false))); nprotect++; + } switch (thistype) { case INTSXP: case LGLSXP: { From b9d739a68d5250acb383743ccb667a2a3287d14d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Feb 2024 15:58:52 -0800 Subject: [PATCH 34/37] comment --- src/fcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fcast.c b/src/fcast.c index 0421f85b8c..8c49c6fe28 100644 --- a/src/fcast.c +++ b/src/fcast.c @@ -25,7 +25,7 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil thisfill = PROTECT(allocNAVector(thistype, 1)); nprotect++; } else thisfill = VECTOR_ELT(fill_d, i); } - if (isVectorAtomic(thiscol)) { + if (isVectorAtomic(thiscol)) { // defer error handling to below, but also skip on list thisfill = PROTECT(coerceAs(thisfill, thiscol, /*copyArg=*/ScalarLogical(false))); nprotect++; } switch (thistype) { From b8c53d9a0c8edd363a7fe536e6060f28b749f1fe Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 1 Mar 2024 14:10:50 -0500 Subject: [PATCH 35/37] example without warning --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6dc616986f..b350f9a206 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15652,8 +15652,8 @@ test(2071.08, any_na(data.table(c(1+1i, NA)))) test(2071.09, any_na(data.table(expression(1))), error="Unsupported column type 'expression'") test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l'), data.table(a=1, `1`=list(list(1)), key='a')) -test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'), - data.table(a=1, `2`=3, key='a'), warning="Coercing 'character' RHS to 'double'") +test(2071.11, dcast(data.table(a = c(1, 1, 2), b = c(1, 2, 1), c = 3), a ~ b, value.var='c', fill='4'), + data.table(a=c(1, 2), `1`=c(3, 3), `2`=c(3, 4), key='a')) # fifelse, #3657 test_vec = -5L:5L < 0L From cf0ed8affd16f484feac1c657e1458d13d5c44a1 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 1 Mar 2024 19:20:37 +0000 Subject: [PATCH 36/37] bad NEWS merge --- NEWS.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 64ffa368b6..e32a5dbd11 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,15 +18,13 @@ 3. `split.data.table` also accepts a formula for `f`, [#5392](https://github.com/Rdatatable/data.table/issues/5392), mirroring the same in `base::split.data.frame` since R 4.1.0 (May 2021). Thanks to @XiangyunHuang for the request, and @ben-schwen for the PR. -3. Namespace-qualifying `data.table::shift()`, `data.table::first()`, or `data.table::last()` will not deactivate GForce, [#5942](https://github.com/Rdatatable/data.table/issues/5942). Thanks @MichaelChirico for the proposal and fix. Namespace-qualifying other calls like `stats::sum()`, `base::prod()`, etc., continue to work as an escape valve to avoid GForce, e.g. to ensure S3 method dispatch. +4. Namespace-qualifying `data.table::shift()`, `data.table::first()`, or `data.table::last()` will not deactivate GForce, [#5942](https://github.com/Rdatatable/data.table/issues/5942). Thanks @MichaelChirico for the proposal and fix. Namespace-qualifying other calls like `stats::sum()`, `base::prod()`, etc., continue to work as an escape valve to avoid GForce, e.g. to ensure S3 method dispatch. ## BUG FIXES 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix. -## BUG FIXES - -1. `dcast` handles coercion of `fill` to `integer64` correctly, [#4561](https://github.com/Rdatatable/data.table/issues/4561). Thanks to @emallickhossain for the bug report and @MichaelChirico for the fix. +2. `dcast` handles coercion of `fill` to `integer64` correctly, [#4561](https://github.com/Rdatatable/data.table/issues/4561). Thanks to @emallickhossain for the bug report and @MichaelChirico for the fix. ## NOTES From 9d7b196845ac07b8cd60d7bada652c4c8e1da59c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 1 Mar 2024 12:08:07 -0800 Subject: [PATCH 37/37] warning is still needed --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3f5ab8e4bd..af337deddb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15658,7 +15658,8 @@ test(2071.09, any_na(data.table(expression(1))), error="Unsupported column type test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l'), data.table(a=1, `1`=list(list(1)), key='a')) test(2071.11, dcast(data.table(a = c(1, 1, 2), b = c(1, 2, 1), c = 3), a ~ b, value.var='c', fill='4'), - data.table(a=c(1, 2), `1`=c(3, 3), `2`=c(3, 4), key='a')) + data.table(a=c(1, 2), `1`=c(3, 3), `2`=c(3, 4), key='a'), + warning="Coercing 'character' RHS to 'double'") # fifelse, #3657 test_vec = -5L:5L < 0L