Rdatatable · jangorecki · Jan 12, 2024 · Aug 18, 2022 · Aug 19, 2022 · Aug 19, 2022
diff --git a/NAMESPACE b/NAMESPACE
@@ -51,6 +51,7 @@ S3method(cube, data.table)
 S3method(rollup, data.table)
 export(frollmean)
 export(frollsum)
+export(frollmax)
 export(frollapply)
 export(nafill)
 export(setnafill)

diff --git a/NEWS.md b/NEWS.md
@@ -14,6 +14,8 @@
     # 2:
     ```
 
+2. New window function `frollmax` computes the rolling maximum. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore we added this feature as well for all adaptive rolling functions. We measure adaptive `frollmax` to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`.
+
 ## NOTES
 
 1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1.

diff --git a/R/froll.R b/R/froll.R
@@ -2,8 +2,25 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
   stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
   algo = match.arg(algo)
   align = match.arg(align)
+  leftadaptive = isTRUE(adaptive) && align=="left"  ## support for left added in #5441
+  if (leftadaptive) {
+    rev2 = function(x) if (is.list(x)) sapply(x, rev, simplify=FALSE) else rev(x)
+    verbose = getOption("datatable.verbose")
+    if (verbose)
+      catf("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
+    ## TODO test atomic x but list of lists n (multiple windows)!
+    x = rev2(x)
+    n = rev2(n)
+    align = "right"
+  }
   ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, hasNA, adaptive)
-  ans
+  if (!leftadaptive)
+    ans
+  else {
+    if (verbose)
+      catf("froll: adaptive=TRUE && align='left' post-processing from align='right'\n")
+    rev2(ans)
+  }
 }
 
 frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
@@ -12,9 +29,14 @@ frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "l
 frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
   froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive)
 }
-frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center")) {
+frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
+  froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive)
+}
+frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive) {
   FUN = match.fun(FUN)
   align = match.arg(align)
+  if (!missing(adaptive))
+    stopf("frollapply does not support 'adaptive' argument")
   rho = new.env()
   ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho)
   ans

diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
@@ -563,8 +563,8 @@ if (FALSE) {
 
 #### adaptive limitations
 test(6000.145, frollmean(1:2, 1:2, adaptive=TRUE, align="right"), c(1, 1.5))
-test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align argument different than 'right' is not implemented")
-test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented")
+test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align 'center' is not implemented")
+##test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented") ## support added in #5441, TODO add tests
 test(6000.148, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately")
 
 #### adaptive exact

diff --git a/man/froll.Rd b/man/froll.Rd
@@ -7,7 +7,9 @@
 \alias{rollmean}
 \alias{frollmean}
 \alias{rollsum}
+\alias{rollmax}
 \alias{frollsum}
+\alias{frollmax}
 \alias{rollapply}
 \alias{frollapply}
 \title{Rolling functions}
@@ -19,7 +21,9 @@ frollmean(x, n, fill=NA, algo=c("fast", "exact"),
           align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
 frollsum(x, n, fill=NA, algo=c("fast","exact"),
          align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
-frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
+frollmax(x, n, fill=NA, algo=c("fast","exact"),
+         align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adaptive)
 }
 \arguments{
   \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. }
@@ -45,36 +49,69 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
   conveniently within \code{data.table} syntax.
 
   Argument \code{n} allows multiple values to apply rolling functions on
-  multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list.
-  Each list element must be integer vector of window sizes corresponding
-  to every single observation in each column; see Examples.
+  multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list,
+  see \emph{Adaptive rolling functions} section below for details.
 
-  When \code{algo="fast"} an \emph{"on-line"} algorithm is used, and
-  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}.
-  Setting \code{algo="exact"} will make rolling functions to use a more
-  computationally-intensive algorithm that suffers less from floating point
-  rounding error (the same consideration applies to \code{\link[base]{mean}}).
-  \code{algo="exact"} also handles \code{NaN, +Inf, -Inf} consistently to
-  base R. In case of some functions (like \emph{mean}), it will additionally
+  When multiple columns or multiple window widths are provided, then they
+  are run in parallel. The exception is for \code{algo="exact"}, which runs in
+  parallel already. See \code{\link{setDTthreads}} for defaults and further details on parallelism in data.table.
+}
+\section{\code{hasNA} argument}{
+  \code{hasNA} can be used to speed up processing in cases when it is known
+  whether \code{x} contains infinite values \code{NA, NaN, +Inf, -Inf}.
+  \itemize{
+    \item{ Default \code{hasNA=NA} will use faster \code{NA} agnostic implementation,
+    but when \code{NA}s are detected it will re-run \code{NA} aware implementation. }
+    \item{ \code{hasNA=TRUE} will use \code{NA} aware implementation straightaway. }
+    \item{ \code{hasNA=FALSE} will use faster \code{NA} agnostic implementation.
+    Then depending on the rolling function it will either
+    \itemize{
+      \item{ (\emph{mean, sum}) detect \code{NA}s, raise warning, re-run \code{NA} aware. }
+      \item{ (\emph{max}) not detect \code{NA}s and may silently produce an incorrect
+      answer. }}
+    Therefore \code{hasNA=FALSE} should be used with care.
+    }
+  }
+}
+\section{Implementation}{
+  \itemize{
+    \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and
+  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. Not all functions
+  have \emph{fast} implementation available. As of now
+  \emph{max} and \code{adaptive=TRUE} do not have it, therefore it will
+  automatically fall back to \emph{exact} implementation. \code{datatable.verbose}
+  option can be used to check that. }
+    \item{ \code{algo="exact"} will make rolling functions use a more
+  computationally-intensive algorithm. For each observation from input vector
+  it will compute a function on a window from scratch (complexity \eqn{O(n^2)}).
+  Depending on the function, this algorithm may suffers less from
+  floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}).
+  Algorithm also handles \code{NaN, +Inf, -Inf} consistently to
+  base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA} is \code{FALSE}
+  but NAs are present. In case of some functions (like \emph{mean}), it will additionally
   make extra pass to perform floating point error correction. Error
   corrections might not be truly exact on some platforms (like Windows)
-  when using multiple threads.
-
+  when using multiple threads. }
+  }
+}
+\section{Adaptive rolling functions}{
   Adaptive rolling functions are a special case where each
-  observation has its own corresponding rolling window width. Due to the logic
-  of adaptive rolling functions, the following restrictions apply:
+  observation has its own corresponding rolling window width. \code{n}
+  argument must be a list, then each list element must be an integer vector
+  of window sizes corresponding to every single observation in each column;
+  see Examples. Due to the logic or implementation of adaptive rolling
+  functions, the following restrictions apply:
   \itemize{
-    \item \code{align} only \code{"right"}. 
+    \item \code{align} does not support \code{"center"}.
     \item if list of vectors is passed to \code{x}, then all
       vectors within it must have equal length.
+    \item functionality is not suported in \code{frollapply}.
   }
-
-  When multiple columns or multiple windows width are provided, then they
-  are run in parallel. The exception is for \code{algo="exact"}, which runs in
-  parallel already.
-
+}
+\section{\code{frollapply}}{
   \code{frollapply} computes rolling aggregate on arbitrary R functions.
-  The input \code{x} (first argument) to the function \code{FUN}
+  \code{adaptive} argument is not supported. The input
+  \code{x} (first argument) to the function \code{FUN}
   is coerced to \emph{numeric} beforehand and \code{FUN}
   has to return a scalar \emph{numeric} value. Checks for that are made only
   during the first iteration when \code{FUN} is evaluated. Edge cases can be
@@ -84,32 +121,34 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
   because there is no thread-safe API to R's C \code{eval}. Nevertheless we've
   seen the computation speed up vis-a-vis versions implemented in base R.
 }
-\value{
-  A list except when the input is a \code{vector} and
-  \code{length(n)==1} in which case a \code{vector} is returned.
-}
-\note{
+\section{\code{zoo} package users notice}{
   Users coming from most popular package for rolling functions
   \code{zoo} might expect following differences in \code{data.table}
   implementation.
   \itemize{
-    \item rolling function will always return result of the same length as input.
-    \item \code{fill} defaults to \code{NA}. 
+    \item rolling functions will always return result of the same length
+      as input.
+    \item \code{fill} defaults to \code{NA}.
     \item \code{fill} accepts only constant values. It does not support
       for \emph{na.locf} or other functions.
-    \item \code{align} defaults to \code{"right"}. 
+    \item \code{align} defaults to \code{"right"}.
     \item \code{na.rm} is respected, and other functions are not needed
       when input contains \code{NA}.
-    \item integers and logical are always coerced to double. 
+    \item integers and logical are always coerced to double.
     \item when \code{adaptive=FALSE} (default), then \code{n} must be a
       numeric vector. List is not accepted.
     \item when \code{adaptive=TRUE}, then \code{n} must be vector of
       length equal to \code{nrow(x)}, or list of such vectors.
     \item \code{partial} window feature is not supported, although it can
-      be accomplished by using \code{adaptive=TRUE}, see
-      examples. \code{NA} is always returned for incomplete windows. 
+      be accomplished by using \code{adaptive=TRUE}, see examples.
+      \code{NA} is always returned for incomplete windows.
   }
-
+}
+\value{
+  A list except when the input is a \code{vector} and
+  \code{length(n)==1} in which case a \code{vector} is returned.
+}
+\note{
   Be aware that rolling functions operates on the physical order of input.
   If the intent is to roll values in a vector by a logical window, for
   example an hour, or a day, one has to ensure that there are no gaps in

diff --git a/src/data.table.h b/src/data.table.h
@@ -206,6 +206,9 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
 void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
+void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
+void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
+void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
 
 // frolladaptive.c
@@ -215,6 +218,9 @@ void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double f
 void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
+void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 
 // frollR.c
 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,8 @@ @@
         # 2:
         ```
+. New window function `frollmax` computes the rolling maximum. Request came from @gpierard who needs left-aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore we added this feature as well for all adaptive rolling functions. We measure adaptive `frollmax` to be up to 50 times faster than the next fastest solution using `max` and grouping `by=.EACHI`.
     ## NOTES
 . `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1.
@@ Expand Down @@