Merge branch 'master' into coerce-list

Rdatatable · Apr 10, 2024 · 658255b · 658255b
2 parents 243d173 + 585ec52
commit 658255b
Show file tree

Hide file tree

Showing 13 changed files with 428 additions and 283 deletions.
diff --git a/.dev/cc.R b/.dev/cc.R
@@ -31,7 +31,27 @@ sourceDir = function(path=getwd(), trace = TRUE, ...) {
   if(trace) cat("\n")
 }
 
-cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH"), CC="gcc", quiet=FALSE) {
+# NB: since we only import from default packages, this is rarely needed, but useful for truly minimal dev environments (#6056)
+sourceImports = function(path=getwd(), quiet=FALSE) {
+  nsFile = file.path(path, "NAMESPACE")
+  if (!file.exists(nsFile)) {
+    if (!quiet) warning("No NAMESPACE file found, required to guarantee imports resolve correctly")
+    return(invisible())
+  }
+  nsParsedImports = parseNamespaceFile(basename(path), "..")$imports # weird signature to this function
+  if (!quiet && length(nsParsedImports)) cat(sprintf("Ensuring objects from %d import entries in NAMESPACE resolve correctly\n", length(nsParsedImports)))
+  for (ii in seq_along(nsParsedImports)) {
+    entry = nsParsedImports[[ii]]
+    # getNamespaceExports includes weird objects but that's intentional, consider evalq(.__C__VIRTUAL, asNamespace("Rcpp")) due to import(methods) in that NAMESPACE
+    imported = if (length(entry) == 1L) getNamespaceExports(entry) else entry[[2L]]
+    # Assign directly to better imitate actual namespace imports. Earlier tried to require(include.only=) these objects, but R doesn't allow multiple such require, meaning we can't add more objects later in tests, see:
+    #   https://stat.ethz.ch/pipermail/r-devel/2024-April/083319.html
+    for (import in imported) assign(import, getExportedValue(entry[[1L]], import), .GlobalEnv)
+  }
+  return(invisible())
+}
+
+cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH", unset="."), CC="gcc", quiet=FALSE) {
   if (!missing(cc_dir)) {
     warning("'cc_dir' arg is deprecated, use 'path' argument or 'PROJ_PATH' env var instead")
     path = cc_dir
@@ -81,6 +101,7 @@ cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys
     .GlobalEnv[[Call$name]] = Call$address
   for (Extern in xx$.External)
     .GlobalEnv[[Extern$name]] = Extern$address
+  sourceImports(path, quiet=quiet)
   sourceDir(file.path(path, "R"), trace=!quiet)
   if (base::getRversion()<"4.0.0") rm(list=c("rbind.data.table", "cbind.data.table"), envir=.GlobalEnv) # 3968 follow up
   .GlobalEnv$testDir = function(x) file.path(path,"inst/tests",x)

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -1,9 +1,12 @@
 # https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
 * @jangorecki @michaelchirico
 
-# melt
+# reshaping
+/R/fcast.R @tdhock
 /R/fmelt.R @tdhock
+/src/fcast.c @tdhock
 /src/fmelt.c @tdhock
+/man/dcast.data.table.Rd @tdhock
 /man/melt.data.table.Rd @tdhock
 /vignettes/datatable-reshape.Rmd @tdhock
 

diff --git a/NEWS.md b/NEWS.md
@@ -30,6 +30,8 @@
 
 7. `fread`'s `fill` argument now also accepts an `integer` in addition to boolean values. `fread` always guesses the number of columns based on reading a sample of rows in the file. When `fill=TRUE`, `fread` stops reading and ignores subsequent rows when this estimate winds up too low, e.g. when the sampled rows happen to exclude some rows that are even wider, [#2727](https://github.com/Rdatatable/data.table/issues/2727) [#2691](https://github.com/Rdatatable/data.table/issues/2691) [#4130](https://github.com/Rdatatable/data.table/issues/4130) [#3436](https://github.com/Rdatatable/data.table/issues/3436). Providing an `integer` as argument for `fill` allows for a manual estimate of the number of columns instead, [#1812](https://github.com/Rdatatable/data.table/issues/1812) [#5378](https://github.com/Rdatatable/data.table/issues/5378). Thanks to @jangorecki, @christellacaze, @Yiguan, @alexdthomas, @ibombonato, @Befrancesco, @TobiasGold for reporting/requesting, and Benjamin Schwendinger for the PR.
 
+8. Computations in `j` can return a matrix or array _if it is one-dimensional_, e.g. a row or column vector, when `j` is a list of columns during grouping, [#783](https://github.com/Rdatatable/data.table/issues/783). Previously a matrix could be provided `DT[, expr, by]` form, but not `DT[, list(expr), by]` form; this resolves that inconsistency. It is still an error to return a "true" array, e.g. a `2x3` matrix.
+
 ## BUG FIXES
 
 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.
@@ -42,7 +44,9 @@
 
 5. `fwrite(x, row.names=TRUE)` with `x` a `matrix` writes `row.names` when present, not row numbers, [#5315](https://github.com/Rdatatable/data.table/issues/5315). Thanks to @Liripo for the report, and @ben-schwen for the fix.
 
-3. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation.
+6. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation.
+
+7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. Any users who were relying on this behavior can change `measure.vars=list("col_name")` (output `variable` was column name, now is column index/integer) to `measure.vars="col_name"` (`variable` still is column name).
 
 ## NOTES
 
@@ -64,6 +68,10 @@
 
 9. `print.data.table` now handles combination multibyte characters correctly when truncating wide string entries, [#5096](https://github.com/Rdatatable/data.table/issues/5096). Thanks to @MichaelChirico for the report and @joshhwuu for the fix.
 
+10. `test.data.table()` runs correctly in more sessions, in particular those where the `digits` or `warn` settings are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
+
+11. Using `print.data.table` when truncation is needed with `row.names = FALSE` prints the indicator `---` in every value column instead of adding a blank column where the `rownames` would have been just to include `---`, [#4083](https://github.com/Rdatatable/data.table/issues/4083). Thanks @MichaelChirico for the report and @joshhwuu for the fix.
+
 # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29)  (30 Jan 2024)
 
 ## BREAKING CHANGE

diff --git a/R/AllS4.R b/R/AllS4.R
@@ -3,21 +3,21 @@ if ("package:data.table" %in% search()) stopf("data.table package loaded. When d
 
 ## Allows data.table to be defined as an object of an S4 class,
 ## or even have data.table be a super class of an S4 class.
-setOldClass(c('data.frame'))
-setOldClass(c('data.table', 'data.frame'))
+methods::setOldClass(c('data.frame'))
+methods::setOldClass(c('data.table', 'data.frame'))
 
 ## as(some.data.frame, "data.table")
-setAs("data.frame", "data.table", function(from) {
+methods::setAs("data.frame", "data.table", function(from) {
   as.data.table(from)
 })
 
 ## as(some.data.table, "data.frame")
-setAs("data.table", "data.frame", function(from) {
+methods::setAs("data.table", "data.frame", function(from) {
   as.data.frame(from)
 })
 
-setOldClass("IDate")
-setOldClass("ITime")
+methods::setOldClass("IDate")
+methods::setOldClass("ITime")
 
-setAs("character", "IDate", function(from) as.IDate(from))
-setAs("character", "ITime", function(from) as.ITime(from))
+methods::setAs("character", "IDate", function(from) as.IDate(from))
+methods::setAs("character", "ITime", function(from) as.ITime(from))
diff --git a/R/data.table.R b/R/data.table.R
@@ -12,7 +12,7 @@ dim.data.table = function(x)
 }
 
 .global = new.env()  # thanks to: http://stackoverflow.com/a/12605694/403310
-setPackageName("data.table",.global)
+methods::setPackageName("data.table",.global)
 .global$print = ""
 
 # NB: if adding to/editing this list, be sure to do the following:
@@ -1022,8 +1022,13 @@ replace_dot_alias = function(e) {
             .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame())
           } else {
             if (colsub %iscall% 'patterns') {
-              # each pattern gives a new filter condition, intersect the end result
-              .SDcols = Reduce(intersect, eval_with_cols(colsub, names_x))
+              patterns_list_or_vector = eval_with_cols(colsub, names_x)
+              .SDcols = if (is.list(patterns_list_or_vector)) {
+                # each pattern gives a new filter condition, intersect the end result
+                Reduce(intersect, patterns_list_or_vector)
+              } else {
+                patterns_list_or_vector
+              }
             } else {
               .SDcols = eval(colsub, parent.frame(), parent.frame())
               # allow filtering via function in .SDcols, #3950

diff --git a/R/fmelt.R b/R/fmelt.R
@@ -30,6 +30,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe
   # replace with lengths when R 3.2.0 dependency arrives
   if (length(idx <- which(sapply(matched, length) == 0L)))
     stopf('Pattern(s) not found: [%s]', brackify(p[idx]))
+  if (length(matched) == 1L) return(matched[[1L]])
   matched
 }
 

diff --git a/R/print.data.table.R b/R/print.data.table.R
@@ -111,7 +111,11 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     toprint = toprint_subset(toprint, cols_to_print)
   }
   if (printdots) {
-    toprint = rbind(head(toprint, topn + isTRUE(class)), "---"="", tail(toprint, topn))
+    if (isFALSE(row.names)) {
+      toprint = rbind(head(toprint, topn + isTRUE(class)), "---", tail(toprint, topn)) # 4083
+    } else {
+      toprint = rbind(head(toprint, topn + isTRUE(class)), "---"="", tail(toprint, topn))
+    }
     rownames(toprint) = format(rownames(toprint), justify="right")
     if (col.names == "none") {
       cut_colnames(print(toprint, right=TRUE, quote=quote))

diff --git a/R/test.data.table.R b/R/test.data.table.R
@@ -98,6 +98,8 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     datatable.print.trunc.cols = FALSE, #4552
     datatable.rbindlist.check = NULL,
     datatable.integer64 = "integer64",
+    digits = 7L, # ensure printing rounds to the expected number of digits in all sessions, #5285
+    warn = 0L,   # ensure signals are emitted as they are in the code, #5285
     warnPartialMatchArgs = base::getRversion()>="3.6.0", # ensure we don't rely on partial argument matching in internal code, #3664; >=3.6.0 for #3865
     warnPartialMatchAttr = TRUE,
     warnPartialMatchDollar = TRUE,
@@ -416,7 +418,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     }
   }
   if (!fail && !length(error) && (!length(output) || !missing(y))) {   # TODO test y when output=, too
-    y = try(y,TRUE)
+    capture.output(y <- try(y, silent=TRUE)) # y might produce verbose output, just toss it
     if (identical(x,y)) return(invisible(TRUE))
     all.equal.result = TRUE
     if (is.data.frame(x) && is.data.frame(y)) {

diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw
@@ -0,0 +1,104 @@
+search_order <- match(c("package:data.table", "package:methods"), search(), 0L)
+if (diff(search_order) < 0L) {
+  cat("'methods' must be attached before 'data.table' for dispatch to register correctly; quitting\n")
+  q("no")
+}
+library(methods)
+
+if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
+  if ((tt<-compiler::enableJIT(-1))>0)
+    cat("This is dev mode and JIT is enabled (level ", tt, ") so there will be a brief pause around the first test.\n", sep="")
+} else {
+  library(data.table)
+
+  is_utc = data.table:::is_utc
+  shallow = data.table:::shallow
+  test = data.table:::test
+}
+
+tt = Sys.getenv("TZ", unset=NA)
+TZnotUTC = !identical(tt,"") && !is_utc(tt)
+
+suppressWarnings({
+  setClass("Data.Table", contains="data.table")  # suppress "Created a package name, '2018-05-26 06:14:43.444', when none found"
+  setClass("S4Composition", representation(data="data.table"))
+})
+# data.table can be a parent class
+ids <- sample(letters[1:3], 10, replace=TRUE)
+scores <- stats::rnorm(10)
+dt <- data.table(id=ids, score=scores)
+dt.s4 <- new("Data.Table", data.table(id=ids, score=scores))
+test(1.01, isS4(dt.s4))
+test(1.02, inherits(dt.s4, 'data.table'))
+# Test possible regression. shallow() needs to preserve the S4 bit to support S4 classes that contain data.table
+test(1.03, isS4(shallow(dt.s4)))
+## pull out data from S4 as.list, and compare to list from dt
+dt.s4.list <- dt.s4@.Data
+names(dt.s4.list) <- names(dt.s4)
+test(1.04, dt.s4.list, as.list(dt))  # Underlying data not identical
+# simple S4 conversion-isms work
+df = data.frame(a=sample(letters, 10), b=1:10)
+dt = as.data.table(df)
+test(1.05, identical(methods::as(df, 'data.table'), dt))
+test(1.06, identical(methods::as(dt, 'data.frame'), df))
+# data.table can be used in an S4 slot
+dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=stats::rnorm(10))
+dt.comp <- new("S4Composition", data=dt)
+test(1.07, dt.comp@data, dt)
+# S4 methods dispatch properly on data.table slots"
+dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=stats::rnorm(10))
+dt.comp <- new("S4Composition", data=dt)
+setGeneric("dtGet", function(x, what) standardGeneric("dtGet"))
+setMethod("dtGet", c(x="S4Composition", what="missing"), function(x, what){x@data})
+setMethod("dtGet", c(x="S4Composition", what="ANY"), function(x, what) {x@data[[what]]})
+test(1.08, dtGet(dt.comp), dt)  # actually
+test(1.09, identical(dtGet(dt.comp, 1), dt[[1]]))
+test(1.10, identical(dtGet(dt.comp, 'b'), dt$b))
+removeClass("Data.Table")   # so that test 1914.2 passes on the second run of cc() in dev
+removeClass("S4Composition")
+# END port of old testthat tests
+
+# miscellaneous missing tests uncovered by CodeCov difference in the process of PR #2573 [S4 portion, c.f. 1872.* in tests.Rraw]
+## data.table cannot recycle complicated types
+short_s4_col = getClass("MethodDefinition")
+test(2, data.table(a = 1:4, short_s4_col), error="attempt to replicate an object of type 'S4'")
+
+# print dims in list-columns, #3671, c.f. 2130.* in tests.Rraw
+s4class = setClass("ex_class", slots = list(x="integer", y="character", z="numeric"))
+DT = data.table(
+  x = 1:2,
+  y = list(s4class(x=1L, y=c("yes", "no"), z=2.5),
+           s4class(x=2L, y="yes", z=1)))
+test(3, print(DT), output=c("   x             y", "1: 1 <ex_class[3]>",     "2: 2 <ex_class[3]>"))
+
+# S4 object not supported in fifelse and fcase, #4135
+class4 = setClass("class4", slots=list(x="numeric"))
+s1 = class4(x=20191231)
+s2 = class4(x=20191230)
+test(4.1, fifelse(TRUE, s1, s2),      error = "S4 class objects (except nanotime) are not supported.")
+test(4.2, fifelse(TRUE, 1, s2),       error = "S4 class objects (except nanotime) are not supported.")
+test(4.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see")
+test(4.4, fcase(FALSE, 1, TRUE, s1),  error = "S4 class objects (except nanotime) are not supported. Please see")
+rm(s1, s2, class4)
+
+# native reading of timestamp strings in fread
+# NB: S4 required for methods::as()
+test(5.1, options=c(datatable.old.fread.datetime.character=TRUE),
+     fread("a,b,c\n2015-01-01,2015-01-02,2015-01-03 01:02:03", colClasses=c("Date","IDate","POSIXct")),
+     ans<-data.table(a=as.Date("2015-01-01"), b=as.IDate("2015-01-02"), c=as.POSIXct("2015-01-03 01:02:03")))
+ans_print = utils::capture.output(print(ans))
+if (TZnotUTC) {
+  test(5.2, options=list(datatable.old.fread.datetime.character=NULL),
+       fread("a,b,c\n2015-01-01,2015-01-02,2015-01-03 01:02:03", colClasses=c("Date","IDate","POSIXct"), tz=""),
+       ans, output=ans_print)
+  test(5.3, options=list(datatable.old.fread.datetime.character=NULL),
+       fread("a,b,c\n2015-01-01,2015-01-02,2015-01-03 01:02:03", colClasses=c("Date",NA,NA), tz=""),
+       data.table(a=as.Date("2015-01-01"), b=as.IDate("2015-01-02"), c="2015-01-03 01:02:03"), output=ans_print)
+} else {
+  test(5.4, options=list(datatable.old.fread.datetime.character=NULL),
+       fread("a,b,c\n2015-01-01,2015-01-02,2015-01-03 01:02:03", colClasses=c("Date","IDate","POSIXct")),
+       ans<-data.table(a=as.Date("2015-01-01"), b=as.IDate("2015-01-02"), c=as.POSIXct("2015-01-03 01:02:03", tz="UTC")), output=ans_print)
+  test(5.5, options=list(datatable.old.fread.datetime.character=NULL),
+       fread("a,b,c\n2015-01-01,2015-01-02,2015-01-03 01:02:03", colClasses=c("Date",NA,NA)),
+       ans, output=ans_print)
+}