Rdatatable · MichaelChirico · Mar 14, 2024 · Nov 30, 2022 · Nov 30, 2022 · Nov 30, 2022
@@ -3720,6 +3720,12 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2,
   DT = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE), z=sample(letters[1:2],20,TRUE), d1=runif(20), d2=1L)
   test(1102.38, names(dcast(DT, x ~ y + z, fun.aggregate=length, value.var = "d2", sep=".")),
                 c("x", "1.a", "1.b", "2.a", "2.b"))
+
+  # test for #5512, only compute default fill if needed.
+  DT = data.table(chr=c("a","b","b"), int=1:3)
+  test(1102.39, dcast(DT, int ~ chr, min, value.var="int"), data.table(int=1:3, a=c(1L,NA,NA), b=c(NA,2L,3L), key="int"), warning="NAs introduced by coercion to integer range")
+  test(1102.40, dcast(DT, . ~ chr, min, value.var="int"), data.table(.=".",a=1L,b=2L,key="."))
+
 }
 
 # test for freading commands

@@ -22,7 +22,7 @@
   \item{\dots}{Any other arguments that may be passed to the aggregating function.}
   \item{margins}{Not implemented yet. Should take variable names to compute margins on. A value of \code{TRUE} would compute all margins.}
   \item{subset}{Specified if casting should be done on a subset of the data. Ex: \code{subset = .(col1 <= 5)} or \code{subset = .(variable != "January")}.}
-  \item{fill}{Value with which to fill missing cells. If \code{fun.aggregate} is present, takes the value by applying the function on a 0-length vector.}
+  \item{fill}{Value with which to fill missing cells. If \code{fill=NULL} and missing cells are present, then \code{fun.aggregate} is used on a 0-length vector to obtain a fill value.}
   \item{drop}{\code{FALSE} will cast by including all missing combinations.
 
   \code{c(FALSE, TRUE)} will only include all missing combinations of formula \code{LHS}; \code{c(TRUE, FALSE)} will only include all missing combinations of formula RHS. See Examples.}

@@ -3,6 +3,13 @@
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 
+bool any_NA_int(int N_data, int *idx){
+  for (int data_i=0; data_i<N_data; ++data_i) {
+    if(idx[data_i] == NA_INTEGER)return true;
+  }
+  return false;
+}  
+
 // TO DO: margins
 SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fill, SEXP fill_d, SEXP is_agg) {
   int nrows=INTEGER(nrowArg)[0], ncols=INTEGER(ncolArg)[0];
@@ -15,57 +22,54 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil
     SET_VECTOR_ELT(ans, i, VECTOR_ELT(lhs, i));
   }
   // get val cols
+  bool some_fill = any_NA_int(nrows*ncols, idx);
   for (int i=0; i<nval; ++i) {
     SEXP thiscol = VECTOR_ELT(val, i);
     SEXP thisfill = fill;
     SEXPTYPE thistype = TYPEOF(thiscol);
     int nprotect = 0;
-    if (isNull(fill)) {
-      if (LOGICAL(is_agg)[0]) {
-        thisfill = PROTECT(allocNAVector(thistype, 1)); nprotect++;
-      } else thisfill = VECTOR_ELT(fill_d, i);
-    }
-    if (TYPEOF(thisfill) != thistype) {
-      thisfill = PROTECT(coerceVector(thisfill, thistype)); nprotect++;
+    if(some_fill){
+      if (isNull(fill)) {
+	if (LOGICAL(is_agg)[0]) {
+	  thisfill = PROTECT(allocNAVector(thistype, 1)); nprotect++;
+	} else thisfill = VECTOR_ELT(fill_d, i);
+      }
+      if (TYPEOF(thisfill) != thistype) {
+	thisfill = PROTECT(coerceVector(thisfill, thistype)); nprotect++;
+      }
     }
     switch (thistype) {
     case INTSXP:
     case LGLSXP: {
-      const int *ithiscol = INTEGER(thiscol);
-      const int *ithisfill = INTEGER(thisfill);
       for (int j=0; j<ncols; ++j) {
         SET_VECTOR_ELT(ans, nlhs+j+i*ncols, target=allocVector(thistype, nrows) );
         int *itarget = INTEGER(target);
         copyMostAttrib(thiscol, target);
         for (int k=0; k<nrows; ++k) {
           int thisidx = idx[k*ncols + j];
-          itarget[k] = (thisidx == NA_INTEGER) ? ithisfill[0] : ithiscol[thisidx-1];
+          itarget[k] = (thisidx == NA_INTEGER) ? INTEGER(thisfill)[0] : INTEGER(thiscol)[thisidx-1];
         }
       }
     } break;
     case REALSXP: {
-      const double *dthiscol = REAL(thiscol);
-      const double *dthisfill = REAL(thisfill);
       for (int j=0; j<ncols; ++j) {
         SET_VECTOR_ELT(ans, nlhs+j+i*ncols, target=allocVector(thistype, nrows) );
         double *dtarget = REAL(target);
         copyMostAttrib(thiscol, target);
         for (int k=0; k<nrows; ++k) {
           int thisidx = idx[k*ncols + j];
-          dtarget[k] = (thisidx == NA_INTEGER) ? dthisfill[0] : dthiscol[thisidx-1];
+          dtarget[k] = (thisidx == NA_INTEGER) ? REAL(thisfill)[0] : REAL(thiscol)[thisidx-1];
         }
       }
     } break;
     case CPLXSXP: {
-      const Rcomplex *zthiscol = COMPLEX(thiscol);
-      const Rcomplex *zthisfill = COMPLEX(thisfill);
       for (int j=0; j<ncols; ++j) {
         SET_VECTOR_ELT(ans, nlhs+j+i*ncols, target=allocVector(thistype, nrows) );
         Rcomplex *ztarget = COMPLEX(target);
         copyMostAttrib(thiscol, target);
         for (int k=0; k<nrows; ++k) {
           int thisidx = idx[k*ncols + j];
-          ztarget[k] = (thisidx == NA_INTEGER) ? zthisfill[0] : zthiscol[thisidx-1];
+          ztarget[k] = (thisidx == NA_INTEGER) ? COMPLEX(thisfill)[0] : COMPLEX(thiscol)[thisidx-1];
         }
       }
     } break;
@@ -96,92 +100,3 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil
   UNPROTECT(1);
   return(ans);
 }
-
-// commenting all unused functions, but not deleting it, just in case
-
-// // internal functions that are not used anymore..
-
-// // # nocov start
-// // Note: all these functions below are internal functions and are designed specific to fcast.
-// SEXP zero_init(R_len_t n) {
-//   SEXP ans;
-//   if (n < 0) error(_("Input argument 'n' to 'zero_init' must be >= 0"));
-//   ans = PROTECT(allocVector(INTSXP, n));
-//   for (int i=0; i<n; ++i) INTEGER(ans)[i] = 0;
-//   UNPROTECT(1);
-//   return(ans);
-// }
-
-// SEXP cast_order(SEXP v, SEXP env) {
-//   R_len_t len;
-//   SEXP call, ans;
-//   if (TYPEOF(env) != ENVSXP) error(_("Argument 'env' to (data.table internals) 'cast_order' must be an environment"));
-//   if (TYPEOF(v) == VECSXP) len = length(VECTOR_ELT(v, 0));
-//   else len = length(v);
-//   PROTECT(call = lang2(install("forder"), v)); // TODO: save the 'eval' by calling directly the C-function.
-//   ans = PROTECT(eval(call, env));
-//   if (length(ans) == 0) { // forder returns integer(0) if already sorted
-//     UNPROTECT(1); // ans
-//     ans = PROTECT(seq_int(len, 1));
-//   }
-//   UNPROTECT(2);
-//   return(ans);
-// }
-
-// SEXP cross_join(SEXP s, SEXP env) {
-//   // Calling CJ is faster and don't have to worry about sorting or setting key.
-//   SEXP call, r;
-//   if (!isNewList(s) || isNull(s)) error(_("Argument 's' to 'cross_join' must be a list of length > 0"));
-//   PROTECT(call = lang3(install("do.call"), install("CJ"), s));
-//   r = eval(call, env);
-//   UNPROTECT(1);
-//   return(r);
-// }
-
-// SEXP diff_int(SEXP x, R_len_t n) {
-//   SEXP ans;
-//   if (TYPEOF(x) != INTSXP) error(_("Argument 'x' to 'diff_int' must be an integer vector"));
-//   ans = PROTECT(allocVector(INTSXP, length(x)));
-//   for (int i=1; i<length(x); ++i)
-//     INTEGER(ans)[i-1] = INTEGER(x)[i] - INTEGER(x)[i-1];
-//   INTEGER(ans)[length(x)-1] = n - INTEGER(x)[length(x)-1] + 1;
-//   UNPROTECT(1);
-//   return(ans);
-// }
-
-// SEXP intrep(SEXP x, SEXP len) {
-//   R_len_t l=0, k=0;
-//   SEXP ans;
-//   if (TYPEOF(x) != INTSXP || TYPEOF(len) != INTSXP) error(_("Arguments 'x' and 'len' to 'intrep' should both be integer vectors"));
-//   if (length(x) != length(len)) error(_("'x' and 'len' must be of same length"));
-//   // assuming both are of length >= 1
-//   for (int i=0; i<length(len); ++i)
-//     l += INTEGER(len)[i]; // assuming positive values for len. internal use - can't bother to check.
-//   ans = PROTECT(allocVector(INTSXP, l));
-//   for (int i=0; i<length(len); ++i) {
-//     for (int j=0; j<INTEGER(len)[i]; ++j) {
-//       INTEGER(ans)[k++] = INTEGER(x)[i];
-//     }
-//   }
-//   UNPROTECT(1); // ans
-//   return(ans);
-// }
-
-// // taken match_transform() from base:::unique.c and modified
-// SEXP coerce_to_char(SEXP s, SEXP env)
-// {
-//   if (OBJECT(s)) {
-//     if (inherits(s, "factor")) return asCharacterFactor(s);
-//     else if(getAttrib(s, R_ClassSymbol) != R_NilValue) {
-//       SEXP call, r;
-//       PROTECT(call = lang2(install("as.character"), s));
-//       r = eval(call, env);
-//       UNPROTECT(1);
-//       return r;
-//     }
-//   }
-//   /* else */
-//   return coerceVector(s, STRSXP);
-// }
-
-// // # nocov end