From cca5b7912aeee1826792d983f531f36736c9fb91 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 18 Aug 2022 21:57:34 +0200 Subject: [PATCH 01/20] frollmax exact, buggy fast, no fast adaptive --- NAMESPACE | 1 + R/froll.R | 3 + src/data.table.h | 6 ++ src/froll.c | 231 ++++++++++++++++++++++++++++++++++++++++++++ src/frollR.c | 11 ++- src/frolladaptive.c | 163 +++++++++++++++++++++++++++++++ 6 files changed, 414 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index c22782440a..dec4beee18 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ S3method(cube, data.table) S3method(rollup, data.table) export(frollmean) export(frollsum) +export(frollmax) export(frollapply) export(nafill) export(setnafill) diff --git a/R/froll.R b/R/froll.R index df901f0b84..41a816be04 100644 --- a/R/froll.R +++ b/R/froll.R @@ -12,6 +12,9 @@ frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "l frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive) } +frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { + froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive) +} frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center")) { FUN = match.fun(FUN) align = match.arg(align) diff --git a/src/data.table.h b/src/data.table.h index b966e86c08..4109c52e9a 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -203,6 +203,9 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); +void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); +void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); +void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose); // frolladaptive.c @@ -212,6 +215,9 @@ void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double f void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // frollR.c SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive); diff --git a/src/froll.c b/src/froll.c index 3ab7bd927a..e2d4c802d9 100644 --- a/src/froll.c +++ b/src/froll.c @@ -397,6 +397,237 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool } } +/* fast rolling max */ +void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { + if (nx < k) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__); + for (int i=0; idbl_v[i] = fill; + } + return; + } + double tic = 0; + if (verbose) + tic = omp_get_wtime(); + if (algo==0) { + frollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); + } else if (algo==1) { + frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); + } + if (ans->status < 3 && align < 1) { + int k_ = align==-1 ? k-1 : floor(k/2); + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_); + memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double)); + for (uint64_t i=nx-k_; idbl_v[i] = fill; + } + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); +} +void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { + /* + * fast online algorithm do single pass over elements keeping track of recent max and its index + * if index of max is within progressing window then it keep running single pass + * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop + * new max is used to continue outer single pass + * + * should scale well for bigger window size, may carry overhead for small window, needs benchmarking + */ + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm); + double w = R_NegInf; + uint64_t iw = 0; + bool truehasna = hasna>0; + if (!truehasna) { + int i; + for (i=0; i= w) { // >= rather than > because we track most recent maximum using iw + iw = i; + w = x[iw]; + } + ans->dbl_v[i] = fill; + } + if (x[i] >= w) { + iw = i; + w = x[iw]; + } else { + // ensure iw ok, case when all window was -Inf or NA? + } + ans->dbl_v[i] = w; + if (R_FINITE(w)) { + for (uint64_t i=k; i= w) { + iw = i; + w = x[iw]; + } else { + if (iw > i-k) { // max is still within window + // w and iw are still ok + } else { // max left the window, need to find max in this window + for (uint64_t ii=0, w=R_NegInf; i= w) { // what if that is never satisfied? test! + iw = i+ii-k; + w = x[iw]; + } + } + } + } + ans->dbl_v[i] = w; + } + if (!R_FINITE(w)) { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + w = 0.0; + truehasna = true; + } + } else { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__); + w = 0.0; + truehasna = true; + } + } + if (truehasna) { + int nc = 0; + int i; + double w = R_NegInf; // could be NA from !truehasna section above? + for (i=0; i= w) { + iw = i; + w = x[iw]; + } + } else { + nc++; + } + ans->dbl_v[i] = fill; + } + if (R_FINITE(x[i])) { + if (x[i] >= w) { + iw = i; + w = x[iw]; + } + } else { + nc++; + } + if (nc == 0) { + ans->dbl_v[i] = w; + } else if (nc == k) { + ans->dbl_v[i] = narm ? R_NegInf : NA_REAL; + } else { + ans->dbl_v[i] = narm ? w : NA_REAL; + } + for (uint64_t i=k; i= w) { + iw = i; + w = x[iw]; + } else { + if (iw > i-k) { // max is still within window + // w and iw are still ok + } else { // max left the window, need to find max in this window + for (uint64_t ii=0, w=R_NegInf; i= w) { // what if that is never satisfied? test! + iw = i+ii-k; + w = x[iw]; + } + } else { + nc++; // this needs heavy testing + } + } + } + } + ans->dbl_v[i] = w; + } else { + nc++; + } + if (!R_FINITE(x[i-k])) { + nc--; + } + if (nc == 0) { + ans->dbl_v[i] = w; + } else if (nc == k) { + ans->dbl_v[i] = narm ? R_NegInf : NA_REAL; + } else { + ans->dbl_v[i] = narm ? w : NA_REAL; + } + } + } +} +void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxExact", (uint64_t)nx, k, hasna, (int)narm); + for (int i=0; idbl_v[i] = fill; + } + bool truehasna = hasna>0; + if (!truehasna || !narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=k-1; i w) + w = x[i+j]; + } + if (R_FINITE(w)) { + ans->dbl_v[i] = w; + } else { + if (!narm) { + ans->dbl_v[i] = w; + } + truehasna = true; + } + } + if (truehasna) { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) { + if (narm) { + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + } else { + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__); + } + } + } + } + if (truehasna && narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=k-1; i w) { + w = x[i+j]; + } + } + if (nc < k) { + ans->dbl_v[i] = w; + } else { + ans->dbl_v[i] = R_NegInf; + } + } + } +} + /* fast rolling any R function * not plain C, not thread safe * R eval() allocates diff --git a/src/frollR.c b/src/frollR.c index 74cc7dd4ef..3d70b0aa4b 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -132,11 +132,13 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX dx[i] = REAL(VECTOR_ELT(x, i)); // assign source columns to C pointers } - enum {MEAN, SUM} sfun; + enum {MEAN, SUM, MAX} sfun; if (!strcmp(CHAR(STRING_ELT(fun, 0)), "mean")) { sfun = MEAN; } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "sum")) { sfun = SUM; + } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "max")) { + sfun = MAX; } else { error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "fun", "rolling"); // # nocov } @@ -161,6 +163,7 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX ialgo = 1; // exact = 1 else error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "algo", "rolling"); // # nocov + if (badaptive && ialgo == 0 && sfun==MAX) error("frollmax adaptive algo fast not yet implemented"); int* iik = NULL; if (!badaptive) { @@ -193,6 +196,12 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX else fadaptiverollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose); break; + case MAX : + if (!badaptive) + frollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose); + else + fadaptiverollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose); + break; default: error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov } diff --git a/src/frolladaptive.c b/src/frolladaptive.c index e005cef693..46d24acd19 100644 --- a/src/frolladaptive.c +++ b/src/frolladaptive.c @@ -364,3 +364,166 @@ void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi } } } + +/* fast adaptive rolling sum */ +void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { + double tic = 0; + if (verbose) + tic = omp_get_wtime(); + if (algo==0) { + fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); + } else if (algo==1) { + fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); +} +void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { + // TODO + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumFast", (uint64_t)nx, hasna, (int) narm); + bool truehasna = hasna>0; + long double w = 0.0; + double *cs = malloc(nx*sizeof(double)); + if (!cs) { // # nocov start + ans->status = 3; + snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cumsum"), __func__); + free(cs); + return; + } // # nocov end + if (!truehasna) { + for (uint64_t i=0; idbl_v[i] = cs[i]; + } else if (i+1 > k[i]) { + ans->dbl_v[i] = cs[i]-cs[i-k[i]]; + } else { + ans->dbl_v[i] = fill; + } + } + } else { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + w = 0.0; + truehasna = true; + } + } + if (truehasna) { + uint64_t nc = 0; + uint64_t *cn = malloc(nx*sizeof(uint64_t)); + if (!cn) { // # nocov start + ans->status = 3; + snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cum NA counter"), __func__); + free(cs); + free(cn); + return; + } // # nocov end + for (uint64_t i=0; idbl_v[i] = fill; + } else if (!narm) { + if (i+1 == k[i]) { + ans->dbl_v[i] = cn[i]>0 ? NA_REAL : cs[i]; + } else if (i+1 > k[i]) { + ans->dbl_v[i] = (cn[i] - cn[i-k[i]])>0 ? NA_REAL : cs[i]-cs[i-k[i]]; + } + } else if (i+1 == k[i]) { + int thisk = k[i] - ((int) cn[i]); + ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i]; + } else if (i+1 > k[i]) { + int thisk = k[i] - ((int) (cn[i] - cn[i-k[i]])); + ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i]-cs[i-k[i]]; + } + } + free(cn); + } + free(cs); +} +void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm); + bool truehasna = hasna>0; + if (!truehasna || !narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; + } else { + double w = R_NegInf; + for (int j=-k[i]+1; j<=0; j++) { + // should be used with setDTthreads(1) + //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w); + if (x[i+j] > w) + w = x[i+j]; + } + if (R_FINITE(w)) { + ans->dbl_v[i] = w; + } else { + if (!narm) { + ans->dbl_v[i] = w; + } + truehasna = true; + } + } + } + if (truehasna) { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) { + if (narm) { + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + } else { + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__); + } + } + } + } + if (truehasna && narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; + } else { + double w = R_NegInf; + int nc = 0; + for (int j=-k[i]+1; j<=0; j++) { + if (ISNAN(x[i+j])) { + nc++; + } else if (x[i+j] > w) { + w = x[i+j]; + } + } + if (nc < k[i]) { + ans->dbl_v[i] = w; + } else { + ans->dbl_v[i] = R_NegInf; + } + } + } + } +} From 14555b2b3ef3498242110bfd64c3fc8570f4f7aa Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 19 Aug 2022 12:37:39 +0200 Subject: [PATCH 02/20] frollmax fast fixing bugs --- src/froll.c | 97 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 29 deletions(-) diff --git a/src/froll.c b/src/froll.c index e2d4c802d9..39e7194c32 100644 --- a/src/froll.c +++ b/src/froll.c @@ -427,6 +427,30 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int if (verbose) snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); } +inline void windowmax(double *x, uint64_t o, int k, double *w, uint64_t *iw) { + for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); + if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test! + iw[0] = o+i-k+1; + w[0] = x[iw[0]]; + } + } +} +inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, double *w, uint64_t *iw) { + for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); + if (R_FINITE(x[o+i-k+1])) { + if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test! + iw[0] = o+i-k+1; + w[0] = x[iw[0]]; + } + } else if (narm) { + nc[0]++; + } else { + w[0] = NA_REAL; + } + } +} void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { /* * fast online algorithm do single pass over elements keeping track of recent max and its index @@ -438,52 +462,71 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n */ if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm); - double w = R_NegInf; - uint64_t iw = 0; + double w = R_NegInf; // window max + uint64_t cmax = 0; // counter of nested loops for verbose + uint64_t iw = 0; // index of window max bool truehasna = hasna>0; if (!truehasna) { + bool hadna = false; int i; for (i=0; i= w) { // >= rather than > because we track most recent maximum using iw iw = i; w = x[iw]; } ans->dbl_v[i] = fill; } - if (x[i] >= w) { - iw = i; - w = x[iw]; + if (!hadna && !R_FINITE(x[i])) { + if (x[i] >= w) { + iw = i; + w = x[iw]; + } else { + // ensure iw ok, case when all window was -Inf or NA? + } + ans->dbl_v[i] = w; } else { - // ensure iw ok, case when all window was -Inf or NA? + hadna = true; } - ans->dbl_v[i] = w; - if (R_FINITE(w)) { + if (!hadna) { for (uint64_t i=k; i= w) { + //Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw); iw = i; w = x[iw]; } else { if (iw > i-k) { // max is still within window // w and iw are still ok + //Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]); } else { // max left the window, need to find max in this window - for (uint64_t ii=0, w=R_NegInf; i= w) { // what if that is never satisfied? test! - iw = i+ii-k; - w = x[iw]; - } - } + //Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k); + w = R_NegInf; + iw = i-k; + windowmax(x, i, k, &w, &iw); + //Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw); + cmax++; } } ans->dbl_v[i] = w; } - if (!R_FINITE(w)) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax); + if (hadna) { if (hasna==-1) { ans->status = 2; snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); } if (verbose) snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); - w = 0.0; + w = R_NegInf; + cmax = 0; truehasna = true; } } else { @@ -493,14 +536,14 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n } if (verbose) snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__); - w = 0.0; + w = R_NegInf; + cmax = 0; truehasna = true; } } if (truehasna) { int nc = 0; int i; - double w = R_NegInf; // could be NA from !truehasna section above? for (i=0; i= w) { @@ -536,17 +579,11 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n if (iw > i-k) { // max is still within window // w and iw are still ok } else { // max left the window, need to find max in this window - for (uint64_t ii=0, w=R_NegInf; i= w) { // what if that is never satisfied? test! - iw = i+ii-k; - w = x[iw]; - } - } else { - nc++; // this needs heavy testing - } - } + w = R_NegInf; + iw = i-k; + windowmaxnarm(x, i, k, narm, &nc, &w, &iw); + //Rprintf("iteration %d, windowmaxnarm found new max %f at %d\n", i, w, iw); + cmax++; } } ans->dbl_v[i] = w; @@ -564,6 +601,8 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n ans->dbl_v[i] = narm ? w : NA_REAL; } } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: windowmaxnarm called %"PRIu64" time(s)\n"), __func__, cmax); } } void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { From 437b928801f3509e5c02949caf545d63ef68e4a2 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 19 Aug 2022 12:39:38 +0200 Subject: [PATCH 03/20] frollmax man to fix CRAN check --- man/froll.Rd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/man/froll.Rd b/man/froll.Rd index 090b397a90..143c585c1d 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -7,7 +7,9 @@ \alias{rollmean} \alias{frollmean} \alias{rollsum} +\alias{rollmax} \alias{frollsum} +\alias{frollmax} \alias{rollapply} \alias{frollapply} \title{Rolling functions} @@ -19,6 +21,8 @@ frollmean(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) frollsum(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) +frollmax(x, n, fill=NA, algo=c("fast","exact"), + align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) } \arguments{ From 2fd0faf12b9c6e08caf0099c67e559177292f08d Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 19 Aug 2022 15:05:41 +0200 Subject: [PATCH 04/20] frollmax fast adaptive non NA, dev --- src/froll.c | 15 +++--- src/frollR.c | 1 - src/frolladaptive.c | 121 +++++++++++++++++++++----------------------- 3 files changed, 65 insertions(+), 72 deletions(-) diff --git a/src/froll.c b/src/froll.c index 39e7194c32..3b900077f8 100644 --- a/src/froll.c +++ b/src/froll.c @@ -467,11 +467,10 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n uint64_t iw = 0; // index of window max bool truehasna = hasna>0; if (!truehasna) { - bool hadna = false; int i; for (i=0; i= w) { // >= rather than > because we track most recent maximum using iw @@ -480,7 +479,7 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n } ans->dbl_v[i] = fill; } - if (!hadna && !R_FINITE(x[i])) { + if (!truehasna && !R_FINITE(x[i])) { if (x[i] >= w) { iw = i; w = x[iw]; @@ -489,12 +488,12 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n } ans->dbl_v[i] = w; } else { - hadna = true; + truehasna = true; } - if (!hadna) { + if (!truehasna) { for (uint64_t i=k; i= w) { @@ -518,7 +517,7 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n } if (verbose) snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax); - if (hadna) { + if (truehasna) { if (hasna==-1) { ans->status = 2; snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); @@ -527,7 +526,6 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); w = R_NegInf; cmax = 0; - truehasna = true; } } else { if (hasna==-1) { @@ -538,7 +536,6 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__); w = R_NegInf; cmax = 0; - truehasna = true; } } if (truehasna) { diff --git a/src/frollR.c b/src/frollR.c index 3d70b0aa4b..91e988c434 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -163,7 +163,6 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX ialgo = 1; // exact = 1 else error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "algo", "rolling"); // # nocov - if (badaptive && ialgo == 0 && sfun==MAX) error("frollmax adaptive algo fast not yet implemented"); int* iik = NULL; if (!badaptive) { diff --git a/src/frolladaptive.c b/src/frolladaptive.c index 46d24acd19..eb07dbfc74 100644 --- a/src/frolladaptive.c +++ b/src/frolladaptive.c @@ -365,7 +365,7 @@ void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi } } -/* fast adaptive rolling sum */ +/* fast adaptive rolling max */ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { double tic = 0; if (verbose) @@ -378,86 +378,83 @@ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int if (verbose) snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); } +inline void windowadaptivemax(double *x, uint64_t o, int *k, double *w, uint64_t *iw) { + for (int i=0; i= w[0]: x[%d+%d-%d+1] >= w[0]: %f >= %f: %d\n", + i, o, x[o], o, i, k[o+i], x[o+i-k[o+i]+1], w[0], x[o+i-k[o+i]+1] >= w[0]); + if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test! + iw[0] = o+i-k[o+i]+1; + w[0] = x[iw[0]]; + } + } +} +inline void windowadaptivemaxnarm(double *x, uint64_t o, int *k, bool narm, int *nc, double *w, uint64_t *iw) { + for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); + if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test! + iw[0] = o+i-k[o+i]+1; + w[0] = x[iw[0]]; + } + } +} void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { - // TODO if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumFast", (uint64_t)nx, hasna, (int) narm); + snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxFast", (uint64_t)nx, hasna, (int) narm); + double w = R_NegInf; // window max + uint64_t cmax = 0; // counter of nested loops for verbose + uint64_t iw = 0; // index of window max bool truehasna = hasna>0; - long double w = 0.0; - double *cs = malloc(nx*sizeof(double)); - if (!cs) { // # nocov start - ans->status = 3; - snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cumsum"), __func__); - free(cs); - return; - } // # nocov end if (!truehasna) { for (uint64_t i=0; idbl_v[i] = cs[i]; - } else if (i+1 > k[i]) { - ans->dbl_v[i] = cs[i]-cs[i-k[i]]; + if (!R_FINITE(x[i])) { + truehasna = true; + } else { + if (x[i] >= w) { + Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw); + iw = i; + w = x[iw]; } else { + //Rprintf("iteration %d, test if (iw > i-k[i]): (%d > %d-%d): %d\n",i, iw, i, k[i], iw > i-k[i]); + //Rprintf("i-k[i] < 0: %d-k[%d] < 0: %d-%d < 0: %d < 0: %d\n", i, i, i, k[i], i-k[i], i-k[i] < 0); + //Rprintf("i < k[i]: %d\n", i < k[i]); + if (i < k[i] || iw > i-k[i]) { // max is still within window + Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]); + } else { // max left the window, need to find max in this window + Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k[i]); + w = R_NegInf; + iw = i-k[i]; + windowadaptivemax(x, i, k, &w, &iw); + Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw); + cmax++; + } + } + if (i+1 < k[i]) { + Rprintf("iteration %d, window size %d too big, skip\n", i, k[i]); ans->dbl_v[i] = fill; + } else { + ans->dbl_v[i] = w; } } - } else { + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax); + if (truehasna) { if (hasna==-1) { ans->status = 2; snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); } if (verbose) snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); - w = 0.0; - truehasna = true; + w = R_NegInf; + cmax = 0; } } if (truehasna) { - uint64_t nc = 0; - uint64_t *cn = malloc(nx*sizeof(uint64_t)); - if (!cn) { // # nocov start - ans->status = 3; - snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cum NA counter"), __func__); - free(cs); - free(cn); - return; - } // # nocov end - for (uint64_t i=0; idbl_v[i] = fill; - } else if (!narm) { - if (i+1 == k[i]) { - ans->dbl_v[i] = cn[i]>0 ? NA_REAL : cs[i]; - } else if (i+1 > k[i]) { - ans->dbl_v[i] = (cn[i] - cn[i-k[i]])>0 ? NA_REAL : cs[i]-cs[i-k[i]]; - } - } else if (i+1 == k[i]) { - int thisk = k[i] - ((int) cn[i]); - ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i]; - } else if (i+1 > k[i]) { - int thisk = k[i] - ((int) (cn[i] - cn[i-k[i]])); - ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i]-cs[i-k[i]]; - } - } - free(cn); + ans->status = 3; + snprintf(ans->message[3], 500, _("%s: frollmax adaptive algo='fast' having NAs not yet implemented, use algo='exact'"), __func__); + return; + // TODO } - free(cs); } void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) From 6c3201af5eee80203184421a6abb7f938fa512c8 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sun, 21 Aug 2022 11:26:55 +0200 Subject: [PATCH 05/20] froll docs, adaptive left --- R/froll.R | 23 ++++++++++- inst/tests/froll.Rraw | 4 +- man/froll.Rd | 89 +++++++++++++++++++++++++++++-------------- src/froll.c | 19 +++++---- src/frollR.c | 4 +- 5 files changed, 96 insertions(+), 43 deletions(-) diff --git a/R/froll.R b/R/froll.R index 41a816be04..be69c6f457 100644 --- a/R/froll.R +++ b/R/froll.R @@ -2,8 +2,25 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", " stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun)) algo = match.arg(algo) align = match.arg(align) + leftadaptive = isTRUE(adaptive) && align=="left" ## support for left added in #5441 + if (leftadaptive) { + rev2 = function(x) if (is.list(x)) sapply(x, rev, simplify=FALSE) else rev(x) + verbose = getOption("datatable.verbose") + if (verbose) + cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n") + ## TODO test atomic x but list of lists n (multiple windows)! + x = rev2(x) + n = rev2(n) + align = "right" + } ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, hasNA, adaptive) - ans + if (!leftadaptive) + ans + else { + if (verbose) + cat("froll: adaptive=TRUE && align='left' post-processing from align='right'\n") + rev2(ans) + } } frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { @@ -15,9 +32,11 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "lef frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive) } -frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center")) { +frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive) { FUN = match.fun(FUN) align = match.arg(align) + if (!missing(adaptive)) + stopf("frollapply does not support 'adaptive' argument") rho = new.env() ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho) ans diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index f6a4f96a80..54c67e5628 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -563,8 +563,8 @@ if (FALSE) { #### adaptive limitations test(6000.145, frollmean(1:2, 1:2, adaptive=TRUE, align="right"), c(1, 1.5)) -test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align argument different than 'right' is not implemented") -test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented") +test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align 'center' is not implemented") +##test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented") ## support added in #5441, TODO add tests test(6000.148, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately") #### adaptive exact diff --git a/man/froll.Rd b/man/froll.Rd index 143c585c1d..800709a408 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -23,7 +23,7 @@ frollsum(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) frollmax(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) -frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) +frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adaptive) } \arguments{ \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. } @@ -49,36 +49,65 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) conveniently within \code{data.table} syntax. Argument \code{n} allows multiple values to apply rolling functions on - multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list. - Each list element must be integer vector of window sizes corresponding - to every single observation in each column; see Examples. + multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list, + see \emph{Adaptive rolling functions} section below for details. - When \code{algo="fast"} an \emph{"on-line"} algorithm is used, and - all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. - Setting \code{algo="exact"} will make rolling functions to use a more - computationally-intensive algorithm that suffers less from floating point - rounding error (the same consideration applies to \code{\link[base]{mean}}). - \code{algo="exact"} also handles \code{NaN, +Inf, -Inf} consistently to - base R. In case of some functions (like \emph{mean}), it will additionally + When multiple columns or multiple windows width are provided, then they + are run in parallel. The exception is for \code{algo="exact"}, which runs in + parallel already. +} +\section{\code{hasNA} argument}{ + \code{hasNA} can be used to speed up processing in cases when it is known + if \code{x} contains \code{NA, NaN, +Inf, -Inf}. + \itemize{ + \item{ Default \code{hasNA=NA} will use faster NA inaware implementation, + but when NAs are detected it will re-run NA aware implementation. } + \item{ \code{hasNA=TRUE} will use NA aware implementation straightaway. } + \item{ \code{hasNA=FALSE} will use faster NA inaware implementation. + Then depending on the rolling function it will either + \itemize{ + \item{ (\emph{mean, sum}) detect NAs, raise warning, re-run NA aware. } + \item{ (\emph{max}) not detect NAs and may silently produce incorrect + answer. }} + Therefore \code{hasNA=FALSE} should be used with care. + } + } +} +\section{Implementation}{ + \itemize{ + \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and + all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. } + \item{ \code{algo="exact"} will make rolling functions to use a more + computationally-intensive algorithm. For each observation from input vector + it will compute a function on a window from scratch (complexity \eqn{O(n^2)}). + Depeneding on the function, this algorithm may suffers less from + floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}). + Algorithm also handles \code{NaN, +Inf, -Inf} consistently to + base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA}is \code{FALSE} + but NAs are present. In case of some functions (like \emph{mean}), it will additionally make extra pass to perform floating point error correction. Error corrections might not be truly exact on some platforms (like Windows) - when using multiple threads. - + when using multiple threads. } + } +} +\section{Adaptive rolling functions}{ Adaptive rolling functions are a special case where each - observation has its own corresponding rolling window width. Due to the logic - of adaptive rolling functions, the following restrictions apply: + observation has its own corresponding rolling window width. \code{n} + argument must be a list, then each list element must be an integer vector + of window sizes corresponding to every single observation in each column; + see Examples. Due to the logic or implementation of adaptive rolling + functions, the following restrictions apply: \itemize{ - \item{ \code{align} only \code{"right"}. } + \item{ \code{align} does not support \code{"center"}. } \item{ if list of vectors is passed to \code{x}, then all vectors within it must have equal length. } + \item{ functionality in not suported in \code{frollapply}. } } - - When multiple columns or multiple windows width are provided, then they - are run in parallel. The exception is for \code{algo="exact"}, which runs in - parallel already. - +} +\section{\code{frollapply}}{ \code{frollapply} computes rolling aggregate on arbitrary R functions. - The input \code{x} (first argument) to the function \code{FUN} + \code{adaptive} argument is not supported. The input + \code{x} (first argument) to the function \code{FUN} is coerced to \emph{numeric} beforehand and \code{FUN} has to return a scalar \emph{numeric} value. Checks for that are made only during the first iteration when \code{FUN} is evaluated. Edge cases can be @@ -88,11 +117,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) because there is no thread-safe API to R's C \code{eval}. Nevertheless we've seen the computation speed up vis-a-vis versions implemented in base R. } -\value{ - A list except when the input is a \code{vector} and - \code{length(n)==1} in which case a \code{vector} is returned. -} -\note{ +\section{\code{zoo} package users notice}{ Users coming from most popular package for rolling functions \code{zoo} might expect following differences in \code{data.table} implementation. @@ -111,9 +136,15 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of length equal to \code{nrow(x)}, or list of such vectors. } \item{ \code{partial} window feature is not supported, although it can - be accomplished by using \code{adaptive=TRUE}, see examples. \code{NA} is always returned for incomplete windows. } + be accomplished by using \code{adaptive=TRUE}, see examples. + \code{NA} is always returned for incomplete windows. } } - +} +\value{ + A list except when the input is a \code{vector} and + \code{length(n)==1} in which case a \code{vector} is returned. +} +\note{ Be aware that rolling functions operates on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, one has to ensure that there are no gaps in diff --git a/src/froll.c b/src/froll.c index 3b900077f8..cc4836c06b 100644 --- a/src/froll.c +++ b/src/froll.c @@ -451,15 +451,14 @@ inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, doub } } } +/* fast rolling max - fast + * fast online algorithm do single pass over elements keeping track of recent max and its index + * if index of max is within progressing window then it keep running single pass + * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop + * new max is used to continue outer single pass + * should scale well for bigger window size, may carry overhead for small window, needs benchmarking + */ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { - /* - * fast online algorithm do single pass over elements keeping track of recent max and its index - * if index of max is within progressing window then it keep running single pass - * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop - * new max is used to continue outer single pass - * - * should scale well for bigger window size, may carry overhead for small window, needs benchmarking - */ if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm); double w = R_NegInf; // window max @@ -602,6 +601,10 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n snprintf(end(ans->message[0]), 500, _("%s: windowmaxnarm called %"PRIu64" time(s)\n"), __func__, cmax); } } +/* fast rolling max - exact + * for each observation in x compute max in window from scratch + * no proper support for NAs yet + */ void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxExact", (uint64_t)nx, k, hasna, (int)narm); diff --git a/src/frollR.c b/src/frollR.c index 91e988c434..ac138f220c 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -108,8 +108,8 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX else error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "align", "rolling"); // # nocov - if (badaptive && ialign!=1) - error(_("using adaptive TRUE and align argument different than 'right' is not implemented")); + if (badaptive && ialign==0) // support for left added in #5441 + error(_("using adaptive TRUE and align 'center' is not implemented")); SEXP ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++; // allocate list to keep results if (verbose) From 4a6f06364f7e6d29f26cd054a31411178bb790f1 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sun, 21 Aug 2022 11:48:57 +0200 Subject: [PATCH 06/20] no frollmax fast adaptive --- man/froll.Rd | 9 +++-- src/data.table.h | 2 +- src/frolladaptive.c | 87 +++------------------------------------------ 3 files changed, 13 insertions(+), 85 deletions(-) diff --git a/man/froll.Rd b/man/froll.Rd index 800709a408..be78b26907 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -54,7 +54,8 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti When multiple columns or multiple windows width are provided, then they are run in parallel. The exception is for \code{algo="exact"}, which runs in - parallel already. + parallel already. By default data.table uses only half of available CPUs, + see \code{\link{setDTthreads}} for details. } \section{\code{hasNA} argument}{ \code{hasNA} can be used to speed up processing in cases when it is known @@ -76,7 +77,11 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti \section{Implementation}{ \itemize{ \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and - all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. } + all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. Not all functions + have \emph{fast} implementation available. As of now + \emph{max} and \code{adaptive=TRUE} does not have, therefore it will + automatically fall back to \emph{exact} implementation. \code{datatable.verbose} + option can be used to check that. } \item{ \code{algo="exact"} will make rolling functions to use a more computationally-intensive algorithm. For each observation from input vector it will compute a function on a window from scratch (complexity \eqn{O(n^2)}). diff --git a/src/data.table.h b/src/data.table.h index 4109c52e9a..4f74f1e198 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -216,7 +216,7 @@ void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); -void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // frollR.c diff --git a/src/frolladaptive.c b/src/frolladaptive.c index eb07dbfc74..e4e1983f94 100644 --- a/src/frolladaptive.c +++ b/src/frolladaptive.c @@ -370,92 +370,15 @@ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int double tic = 0; if (verbose) tic = omp_get_wtime(); - if (algo==0) { - fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); - } else if (algo==1) { - fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); + if (algo==0 && verbose) { + //fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); // fadaptiverollmaxFast does not exists as of now + snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); } + fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); if (verbose) snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); } -inline void windowadaptivemax(double *x, uint64_t o, int *k, double *w, uint64_t *iw) { - for (int i=0; i= w[0]: x[%d+%d-%d+1] >= w[0]: %f >= %f: %d\n", - i, o, x[o], o, i, k[o+i], x[o+i-k[o+i]+1], w[0], x[o+i-k[o+i]+1] >= w[0]); - if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test! - iw[0] = o+i-k[o+i]+1; - w[0] = x[iw[0]]; - } - } -} -inline void windowadaptivemaxnarm(double *x, uint64_t o, int *k, bool narm, int *nc, double *w, uint64_t *iw) { - for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); - if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test! - iw[0] = o+i-k[o+i]+1; - w[0] = x[iw[0]]; - } - } -} -void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxFast", (uint64_t)nx, hasna, (int) narm); - double w = R_NegInf; // window max - uint64_t cmax = 0; // counter of nested loops for verbose - uint64_t iw = 0; // index of window max - bool truehasna = hasna>0; - if (!truehasna) { - for (uint64_t i=0; i= w) { - Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw); - iw = i; - w = x[iw]; - } else { - //Rprintf("iteration %d, test if (iw > i-k[i]): (%d > %d-%d): %d\n",i, iw, i, k[i], iw > i-k[i]); - //Rprintf("i-k[i] < 0: %d-k[%d] < 0: %d-%d < 0: %d < 0: %d\n", i, i, i, k[i], i-k[i], i-k[i] < 0); - //Rprintf("i < k[i]: %d\n", i < k[i]); - if (i < k[i] || iw > i-k[i]) { // max is still within window - Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]); - } else { // max left the window, need to find max in this window - Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k[i]); - w = R_NegInf; - iw = i-k[i]; - windowadaptivemax(x, i, k, &w, &iw); - Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw); - cmax++; - } - } - if (i+1 < k[i]) { - Rprintf("iteration %d, window size %d too big, skip\n", i, k[i]); - ans->dbl_v[i] = fill; - } else { - ans->dbl_v[i] = w; - } - } - } - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax); - if (truehasna) { - if (hasna==-1) { - ans->status = 2; - snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); - } - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); - w = R_NegInf; - cmax = 0; - } - } - if (truehasna) { - ans->status = 3; - snprintf(ans->message[3], 500, _("%s: frollmax adaptive algo='fast' having NAs not yet implemented, use algo='exact'"), __func__); - return; - // TODO - } -} +//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm); From feef63d923831c9f6874267d0c47da478c8b35be Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sun, 21 Aug 2022 18:41:45 +0200 Subject: [PATCH 07/20] frollmax adaptive exact NAs handling --- src/froll.c | 7 ++-- src/frollR.c | 2 +- src/frolladaptive.c | 82 +++++++++++++++++++++------------------------ 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/src/froll.c b/src/froll.c index cc4836c06b..8484b09d43 100644 --- a/src/froll.c +++ b/src/froll.c @@ -453,10 +453,11 @@ inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, doub } /* fast rolling max - fast * fast online algorithm do single pass over elements keeping track of recent max and its index - * if index of max is within progressing window then it keep running single pass - * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop - * new max is used to continue outer single pass + * if index of max is within progressing window then it keeps running single pass + * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop on current complete window + * new max is used to continue outer single pass as long as new max index is not leaving the running window * should scale well for bigger window size, may carry overhead for small window, needs benchmarking + * TODO NA handling */ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { if (verbose) diff --git a/src/frollR.c b/src/frollR.c index ac138f220c..ee4d7fd391 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -154,7 +154,7 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX int ihasna = // plain C tri-state boolean as integer LOGICAL(hasna)[0]==NA_LOGICAL ? 0 : // hasna NA, default, no info about NA LOGICAL(hasna)[0]==TRUE ? 1 : // hasna TRUE, might be some NAs - -1; // hasna FALSE, there should be no NAs + -1; // hasna FALSE, there should be no NAs // or there must be no NAs for rollmax #5441 unsigned int ialgo; // decode algo to integer if (!strcmp(CHAR(STRING_ELT(algo, 0)), "fast")) diff --git a/src/frolladaptive.c b/src/frolladaptive.c index e4e1983f94..f0a40fb9ca 100644 --- a/src/frolladaptive.c +++ b/src/frolladaptive.c @@ -379,69 +379,63 @@ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); } //void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now +/* fast adaptive rolling max - exact + * for hasNA=FALSE it will not detect if any NAs were in the input, therefore could produce incorrect result, well documented + */ void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm); - bool truehasna = hasna>0; - if (!truehasna || !narm) { + if (hasna==-1) { // fastest we can get for adaptive max as there is no algo='fast', therefore we drop any NA checks when hasNA=FALSE #pragma omp parallel for num_threads(getDTthreads(nx, true)) for (uint64_t i=0; idbl_v[i] = fill; } else { double w = R_NegInf; - for (int j=-k[i]+1; j<=0; j++) { - // should be used with setDTthreads(1) - //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w); + for (int j=-k[i]+1; j<=0; j++) { //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w); if (x[i+j] > w) w = x[i+j]; } - if (R_FINITE(w)) { - ans->dbl_v[i] = w; - } else { - if (!narm) { - ans->dbl_v[i] = w; - } - truehasna = true; - } + ans->dbl_v[i] = w; } } - if (truehasna) { - if (hasna==-1) { - ans->status = 2; - snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); - } - if (verbose) { - if (narm) { - snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + } else { + if (narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; } else { - snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__); + int nc = 0; + double w = R_NegInf; + for (int j=-k[i]+1; j<=0; j++) { + if (ISNAN(x[i+j])) + nc++; + else if (x[i+j] > w) + w = x[i+j]; + } + if (nc < k[i]) + ans->dbl_v[i] = w; + else + ans->dbl_v[i] = R_NegInf; } } - } - } - if (truehasna && narm) { - #pragma omp parallel for num_threads(getDTthreads(nx, true)) - for (uint64_t i=0; idbl_v[i] = fill; - } else { - double w = R_NegInf; - int nc = 0; - for (int j=-k[i]+1; j<=0; j++) { - if (ISNAN(x[i+j])) { - nc++; - } else if (x[i+j] > w) { - w = x[i+j]; + } else { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; + } else { + double w = R_NegInf; + for (int j=-k[i]+1; j<=0; j++) { + if (ISNAN(x[i+j])) { + w = NA_REAL; + break; + } else if (x[i+j] > w) { + w = x[i+j]; + } } - } - if (nc < k[i]) { ans->dbl_v[i] = w; - } else { - ans->dbl_v[i] = R_NegInf; } } } From 63ea485ed24eb3c7ec75f3e57f0f0b4681b76165 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sun, 21 Aug 2022 19:40:06 +0200 Subject: [PATCH 08/20] PR summary in news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 4f4a2f417a..3fe0bb3b81 100644 --- a/NEWS.md +++ b/NEWS.md @@ -296,6 +296,8 @@ 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. +42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needed 1left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has observed to be up to 50 time faster than second fastest solution using `max` and grouping `by=.EACHI`. + ## BUG FIXES 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. From ae15520cb5820ba9a9228551ae25632ded1f3d1b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 8 Jan 2024 08:12:52 -0800 Subject: [PATCH 09/20] copy-edit changes from reviews Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- NEWS.md | 2 +- R/froll.R | 4 ++-- man/froll.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3fe0bb3b81..3af6d43b89 100644 --- a/NEWS.md +++ b/NEWS.md @@ -296,7 +296,7 @@ 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. -42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needed 1left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has observed to be up to 50 time faster than second fastest solution using `max` and grouping `by=.EACHI`. +42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has been observed to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`. ## BUG FIXES diff --git a/R/froll.R b/R/froll.R index be69c6f457..27085f99f8 100644 --- a/R/froll.R +++ b/R/froll.R @@ -7,7 +7,7 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", " rev2 = function(x) if (is.list(x)) sapply(x, rev, simplify=FALSE) else rev(x) verbose = getOption("datatable.verbose") if (verbose) - cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n") + catf("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n") ## TODO test atomic x but list of lists n (multiple windows)! x = rev2(x) n = rev2(n) @@ -18,7 +18,7 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", " ans else { if (verbose) - cat("froll: adaptive=TRUE && align='left' post-processing from align='right'\n") + catf("froll: adaptive=TRUE && align='left' post-processing from align='right'\n") rev2(ans) } } diff --git a/man/froll.Rd b/man/froll.Rd index be78b26907..6de62a9f24 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -52,7 +52,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list, see \emph{Adaptive rolling functions} section below for details. - When multiple columns or multiple windows width are provided, then they + When multiple columns or multiple window widths are provided, then they are run in parallel. The exception is for \code{algo="exact"}, which runs in parallel already. By default data.table uses only half of available CPUs, see \code{\link{setDTthreads}} for details. From c46a6fa2b694b93881335ac53227448cc6c918dd Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 9 Jan 2024 10:55:19 +0100 Subject: [PATCH 10/20] Apply suggestions from code review Co-authored-by: Michael Chirico Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- NEWS.md | 2 +- src/froll.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3af6d43b89..9497a69ef8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -296,7 +296,7 @@ 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. -42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has been observed to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`. +42. New window function `frollmax` computes the rolling maximum. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore we added this feature as well for all adaptive rolling functions. We measure adaptive `frollmax` to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`. ## BUG FIXES diff --git a/src/froll.c b/src/froll.c index 8484b09d43..550def9aec 100644 --- a/src/froll.c +++ b/src/froll.c @@ -401,7 +401,7 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { if (nx < k) { if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__); + snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all %f vector\n"), __func__, fill); for (int i=0; idbl_v[i] = fill; } @@ -427,7 +427,7 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int if (verbose) snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); } -inline void windowmax(double *x, uint64_t o, int k, double *w, uint64_t *iw) { +inline void windowmax(double *x, uint64_t o, int k, /* max of current window */ double *w, /* index of w within window */ uint64_t *iw) { for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test! @@ -436,7 +436,7 @@ inline void windowmax(double *x, uint64_t o, int k, double *w, uint64_t *iw) { } } } -inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, double *w, uint64_t *iw) { +inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, /* NA counter */ int *nc, double *w, uint64_t *iw) { for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); if (R_FINITE(x[o+i-k+1])) { From 72d452460aab7fe7465ea2a78fde636d116156ec Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 9 Jan 2024 10:57:45 +0100 Subject: [PATCH 11/20] comment requested by Michael --- src/froll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/froll.c b/src/froll.c index 550def9aec..48d36340c9 100644 --- a/src/froll.c +++ b/src/froll.c @@ -412,7 +412,7 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int tic = omp_get_wtime(); if (algo==0) { frollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); - } else if (algo==1) { + } else { // algo==1 frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); } if (ans->status < 3 && align < 1) { From 3a26a9945255a56eaedaf2bc06462228e4536add Mon Sep 17 00:00:00 2001 From: jangorecki Date: Tue, 9 Jan 2024 11:05:05 +0100 Subject: [PATCH 12/20] update NEWS file --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 772200053d..b2ee2ea71e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,8 @@ # 2: ``` +2. New window function `frollmax` computes the rolling maximum. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore we added this feature as well for all adaptive rolling functions. We measure adaptive `frollmax` to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`. + ## NOTES 1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1. @@ -313,8 +315,6 @@ 41. `tables()` is faster by default by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). `mb=` now accepts any function which accepts a `data.table` and returns a higher and better estimate of its size in bytes, albeit more slowly; e.g. `mb = utils::object.size`. -42. New window function `frollmax` computes the rolling maximum. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore we added this feature as well for all adaptive rolling functions. We measure adaptive `frollmax` to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`. - ## BUG FIXES 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. From 5eae9aae55f226930c7b7582656f3c59eb2cf833 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 9 Jan 2024 12:15:02 +0100 Subject: [PATCH 13/20] Apply suggestions from code review Co-authored-by: Michael Chirico --- man/froll.Rd | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/man/froll.Rd b/man/froll.Rd index 77f040710e..c969026304 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -54,21 +54,20 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti When multiple columns or multiple window widths are provided, then they are run in parallel. The exception is for \code{algo="exact"}, which runs in - parallel already. By default data.table uses only half of available CPUs, - see \code{\link{setDTthreads}} for details. + parallel already. See \code{\link{setDTthreads}} for defaults and further details on parallelism in data.table. } \section{\code{hasNA} argument}{ \code{hasNA} can be used to speed up processing in cases when it is known - if \code{x} contains \code{NA, NaN, +Inf, -Inf}. + whether \code{x} contains infinite values \code{NA, NaN, +Inf, -Inf}. \itemize{ - \item{ Default \code{hasNA=NA} will use faster NA inaware implementation, - but when NAs are detected it will re-run NA aware implementation. } - \item{ \code{hasNA=TRUE} will use NA aware implementation straightaway. } - \item{ \code{hasNA=FALSE} will use faster NA inaware implementation. + \item{ Default \code{hasNA=NA} will use faster \code{NA} agnostic implementation, + but when \code{NA}s are detected it will re-run \code{NA} aware implementation. } + \item{ \code{hasNA=TRUE} will use \code{NA} aware implementation straightaway. } + \item{ \code{hasNA=FALSE} will use faster \code{NA} agnostic implementation. Then depending on the rolling function it will either \itemize{ - \item{ (\emph{mean, sum}) detect NAs, raise warning, re-run NA aware. } - \item{ (\emph{max}) not detect NAs and may silently produce incorrect + \item{ (\emph{mean, sum}) detect \code{NA}s, raise warning, re-run \code{NA} aware. } + \item{ (\emph{max}) not detect \code{NA}s and may silently produce an incorrect answer. }} Therefore \code{hasNA=FALSE} should be used with care. } From 0773c44edac01b326e06ecbaffe2914f4103851d Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 9 Jan 2024 12:18:11 +0100 Subject: [PATCH 14/20] Apply suggestions from code review Co-authored-by: Michael Chirico --- man/froll.Rd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/man/froll.Rd b/man/froll.Rd index c969026304..c718bafa0b 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -78,16 +78,16 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. Not all functions have \emph{fast} implementation available. As of now - \emph{max} and \code{adaptive=TRUE} does not have, therefore it will + \emph{max} and \code{adaptive=TRUE} do not have it, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that. } - \item{ \code{algo="exact"} will make rolling functions to use a more + \item{ \code{algo="exact"} will make rolling functions use a more computationally-intensive algorithm. For each observation from input vector it will compute a function on a window from scratch (complexity \eqn{O(n^2)}). - Depeneding on the function, this algorithm may suffers less from + Depending on the function, this algorithm may suffers less from floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}). Algorithm also handles \code{NaN, +Inf, -Inf} consistently to - base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA}is \code{FALSE} + base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA} is \code{FALSE} but NAs are present. In case of some functions (like \emph{mean}), it will additionally make extra pass to perform floating point error correction. Error corrections might not be truly exact on some platforms (like Windows) @@ -105,7 +105,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti \item \code{align} does not support \code{"center"}. \item if list of vectors is passed to \code{x}, then all vectors within it must have equal length. - \item functionality in not suported in \code{frollapply}. + \item functionality is not suported in \code{frollapply}. } } \section{\code{frollapply}}{ @@ -126,7 +126,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti \code{zoo} might expect following differences in \code{data.table} implementation. \itemize{ - \item rolling function will always return result of the same length + \item rolling functions will always return result of the same length as input. \item \code{fill} defaults to \code{NA}. \item \code{fill} accepts only constant values. It does not support From 45904a7c7d4b49775b4ef8f6b1c2d7b0967793f7 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 11 Jan 2024 14:47:14 +0100 Subject: [PATCH 15/20] add comment requested by Michael --- src/froll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/froll.c b/src/froll.c index 48d36340c9..1f5960f83d 100644 --- a/src/froll.c +++ b/src/froll.c @@ -587,7 +587,7 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n } else { nc++; } - if (!R_FINITE(x[i-k])) { + if (!R_FINITE(x[i-k])) { // value leaving rolling window is non-finite, decrement NF counter nc--; } if (nc == 0) { From 623f42b0f37986e57819f5933318b6a0c44372ad Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 11 Jan 2024 17:01:09 +0100 Subject: [PATCH 16/20] add comment about int iterator for loop over k-1 obs --- src/froll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/froll.c b/src/froll.c index 1f5960f83d..01a77936e2 100644 --- a/src/froll.c +++ b/src/froll.c @@ -49,7 +49,7 @@ void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool long double w = 0.0; // sliding window aggregate bool truehasna = hasna>0; // flag to re-run with NA support if NAs detected if (!truehasna) { - int i; // iterator declared here because it is being used after for loop + int i; // iterator declared here because it is being used after for loop, it is int32 rather than int64 because first loop over k-1 observations cannot exceed int32 range for (i=0; idbl_v[i] = fill; // answers are fill for partial window From 03af0e30f1a6a9e75f82b5827c1078f42db48e45 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 11 Jan 2024 17:06:36 +0100 Subject: [PATCH 17/20] extra comments --- src/froll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/froll.c b/src/froll.c index 01a77936e2..a6f80aa706 100644 --- a/src/froll.c +++ b/src/froll.c @@ -415,7 +415,7 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int } else { // algo==1 frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); } - if (ans->status < 3 && align < 1) { + if (ans->status < 3 && align < 1) { // non-error && align!="right" // see comment in frollmean function int k_ = align==-1 ? k-1 : floor(k/2); if (verbose) snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_); From 673ddc59fce3d73edf6ff444477b9c167a76db03 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 12 Jan 2024 14:16:43 +0100 Subject: [PATCH 18/20] Revert "extra comments" This reverts commit 03af0e30f1a6a9e75f82b5827c1078f42db48e45. --- src/froll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/froll.c b/src/froll.c index a6f80aa706..01a77936e2 100644 --- a/src/froll.c +++ b/src/froll.c @@ -415,7 +415,7 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int } else { // algo==1 frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); } - if (ans->status < 3 && align < 1) { // non-error && align!="right" // see comment in frollmean function + if (ans->status < 3 && align < 1) { int k_ = align==-1 ? k-1 : floor(k/2); if (verbose) snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_); From 393c8d13bfd3c4e8f6767d524c3849f039653004 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 12 Jan 2024 14:18:57 +0100 Subject: [PATCH 19/20] add comments to frollmax and frollsum --- src/froll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/froll.c b/src/froll.c index 01a77936e2..851942bf41 100644 --- a/src/froll.c +++ b/src/froll.c @@ -217,6 +217,7 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool } /* fast rolling sum */ +// see frollmean code for comments describing online implementations for rolling functions void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { if (nx < k) { if (verbose) @@ -398,6 +399,7 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool } /* fast rolling max */ +// ssee frollmean code for comments describing online implementations for rolling functions void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { if (nx < k) { if (verbose) From fb67e699dd8af64fddb7fa6fc1c3b288c0aab284 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 12 Jan 2024 14:21:10 +0100 Subject: [PATCH 20/20] typo fix --- src/froll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/froll.c b/src/froll.c index 851942bf41..1c0fa25371 100644 --- a/src/froll.c +++ b/src/froll.c @@ -399,7 +399,7 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool } /* fast rolling max */ -// ssee frollmean code for comments describing online implementations for rolling functions +// see frollmean code for comments describing online implementations for rolling functions void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { if (nx < k) { if (verbose)