Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for test.data.table() in foreign mode #6808

Merged
merged 12 commits into from
Feb 11, 2025
25 changes: 16 additions & 9 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,6 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
Sys.unsetenv(names(old)[!is_preset])
}, add=TRUE)
}
if (!is.null(options)) {
old_options <- do.call(base::options, as.list(options)) # as.list(): allow passing named character vector for convenience
on.exit(base::options(old_options), add=TRUE)
}
# Usage:
# i) tests that x equals y when both x and y are supplied, the most common usage
# ii) tests that x is TRUE when y isn't supplied
Expand Down Expand Up @@ -425,13 +421,23 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
actual$message <- c(actual$message, conditionMessage(m))
m
}
if (!is.null(options)) {
old_options <- do.call(base::options, as.list(options)) # as.list(): allow passing named character vector for convenience
on.exit(base::options(old_options), add=TRUE)
}
if (is.null(output) && is.null(notOutput)) {
x = suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))
# save the overhead of capture.output() since there are a lot of tests, often called in loops
# Thanks to tryCatch2 by Jan here : https://github.com/jangorecki/logR/blob/master/R/logR.R#L21
} else {
out = capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))))
}
if (!is.null(options)) {
# some of the options passed to test() may break internal data.table use below (e.g. invalid datatable.alloccol), so undo them ASAP
base::options(old_options)
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
# this is still registered for on.exit(), keep empty
old_options <- list()
}
fail = FALSE
if (.test.data.table && num>0.0) {
if (num<prevtest+0.0000005) {
Expand All @@ -451,15 +457,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
stopifnot(is.character(ignore.warning), !anyNA(ignore.warning), nchar(ignore.warning)>=1L)
for (msg in ignore.warning) observed = grep(msg, observed, value=TRUE, invert=TRUE) # allow multiple for translated messages rather than relying on '|' to always work
}
if (length(expected) != length(observed)) {
if (length(expected) != length(observed) && (!foreign || is.null(ignore.warning))) {
# nocov start
catf("Test %s produced %d %ss but expected %d\n%s\n%s\n", numStr, length(observed), type, length(expected), paste("Expected:", expected), paste("Observed:", observed, collapse = "\n"))
fail = TRUE
# nocov end
} else {
} else if (!foreign) {
# the expected type occurred and, if more than 1 of that type, in the expected order
for (i in seq_along(expected)) {
if (!foreign && !string_match(expected[i], observed[i])) {
if (!string_match(expected[i], observed[i])) {
# nocov start
catf("Test %s didn't produce the correct %s:\nExpected: %s\nObserved: %s\n", numStr, type, expected[i], observed[i])
fail = TRUE
Expand All @@ -478,7 +484,8 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
if (out[length(out)] == "NULL") out = out[-length(out)]
out = paste(out, collapse="\n")
output = paste(output, collapse="\n") # so that output= can be either a \n separated string, or a vector of strings.
if (length(output) && !string_match(output, out)) {
# it also happens to turn off the 'y' checking branch below
if (length(output) && !foreign && !string_match(output, out)) {
# nocov start
catf("Test %s did not produce correct output:\n", numStr)
catf("Expected: <<%s>>\n", encodeString(output)) # \n printed as '\\n' so the two lines of output can be compared vertically
Expand All @@ -490,7 +497,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
fail = TRUE
# nocov end
}
if (length(notOutput) && string_match(notOutput, out, ignore.case=TRUE)) {
if (length(notOutput) && !foreign && string_match(notOutput, out, ignore.case=TRUE)) {
# nocov start
catf("Test %s produced output but should not have:\n", numStr)
catf("Expected absent (case insensitive): <<%s>>\n", encodeString(notOutput))
Expand Down
166 changes: 71 additions & 95 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1361,43 +1361,20 @@ if (test_bit64) {
test(431.5, DT[5,1:=as.integer64(NA)], data.table(a=factor(c(NA,NA,NA,NA,NA), levels=LETTERS[1:3]), b=1:5))
}

old = getOption("datatable.alloccol") # Test that unsetting datatable.alloccol is caught, #2014
options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1.
# This is why getOption is called first rather than just using the result of option() like elsewhere in this test file.
# TODO: simplify this test if/when R dependency >= 3.1.1
err1 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol="1024")
err2 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol=c(10L,20L))
err3 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol=NA_integer_)
err4 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol=-1)
err5 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error
test(432.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
test(432.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
test(432.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
test(432.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
test(432.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
# Test that unsetting datatable.alloccol is caught, #2014
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great fix & modernization!

test(432.1, data.table(a=1:3), options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?")
test(432.2, data.table(a=1:3), options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.")
test(432.3, data.table(a=1:3), options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.")
test(432.4, data.table(a=1:3), options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.")
test(432.5, data.table(a=1:3), options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.")

# Repeat the tests but this time with subsetting, to ensure the validity check on option happens for those too
DT = data.table(a=1:3, b=4:6)
options(datatable.alloccol=NULL)
err1 = try(DT[2,], silent=TRUE)
options(datatable.alloccol="1024")
err2 = try(DT[,2], silent=TRUE)
options(datatable.alloccol=c(10L,20L))
err3 = try(DT[a>1], silent=TRUE)
options(datatable.alloccol=NA_integer_)
err4 = try(DT[,"b"], silent=TRUE)
options(datatable.alloccol=-1)
err5 = try(DT[2,"b"], silent=TRUE)
options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error
test(433.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
test(433.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
test(433.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
test(433.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
test(433.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
test(433.1, DT[2,], options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?")
test(433.2, DT[,2], options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.")
test(433.3, DT[a>1], options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.")
test(433.4, DT[,"b"], options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.")
test(433.5, DT[2,"b"], options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.")

# simple realloc test
DT = data.table(a=1:3,b=4:6)
Expand Down Expand Up @@ -8712,17 +8689,17 @@ test(1613.21, all.equal(DT2, DT1, ignore.row.order = TRUE), "Dataset 'current' h
# test attributes: key
DT1 <- data.table(a = 1:4, b = letters[1:4], key = "a")
DT2 <- data.table(a = 1:4, b = letters[1:4])
test(1613.22, all.equal(DT1, DT2), "Datasets have different keys. 'target': [a]. 'current': has no key.")
test(1613.22, all.equal(DT1, DT2), output="Datasets have different keys. 'target': [a]. 'current': has no key.")
test(1613.23, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
test(1613.24, all.equal(DT1, setkeyv(DT2, "a"), check.attributes = TRUE), TRUE)
# test attributes: index
DT1 <- data.table(a = 1:4, b = letters[1:4])
DT2 <- data.table(a = 1:4, b = letters[1:4])
setindexv(DT1, "b")
test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.")
test(1613.25, all.equal(DT1, DT2), output="Datasets have different indices. 'target': [b]. 'current': has no index.")
test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].")
test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].")
test(1613.27, all.equal(DT1, setindexv(DT2, "a")), output="Datasets have different indices. 'target': [b]. 'current': [a].")
test(1613.28, all.equal(DT1, setindexv(DT2, "b")), output="Datasets have different indices. 'target': [b]. 'current': [a, b].")
test(1613.29, all.equal(DT1, setindexv(setindexv(DT2, NULL), "b")), TRUE)
# test custom attribute
DT1 <- data.table(a = 1:4, b = letters[1:4])
Expand Down Expand Up @@ -11810,15 +11787,15 @@ test(1775.1, capture.output(print(DT1, print.keys = TRUE)),
c("Key: <a>", " a", "1: 1", "2: 2", "3: 3"))
DT2 <- data.table(a = 1:3, b = 4:6)
setindexv(DT2, c("b","a"))
test(1775.2, capture.output(print(DT2, print.keys = TRUE)),
c("Index: <b__a>", " a b", "1: 1 4", "2: 2 5", "3: 3 6"))
test(1775.2, print(DT2, print.keys = TRUE),
output=c("Index: <b__a>", " a b", "1: 1 4", "2: 2 5", "3: 3 6"))
setindexv(DT2, "b")
test(1775.3, capture.output(print(DT2, print.keys = TRUE)),
c("Indices: <b__a>, <b>", " a b", "1: 1 4", "2: 2 5", "3: 3 6"))
test(1775.3, print(DT2, print.keys = TRUE),
output=c("Indices: <b__a>, <b>", " a b", "1: 1 4", "2: 2 5", "3: 3 6"))
setkey(DT2, a)
setindexv(DT2, c("b","a"))
test(1775.4, capture.output(print(DT2, print.keys = TRUE)),
c("Key: <a>", "Indices: <b__a>, <b>", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it
test(1775.4, print(DT2, print.keys = TRUE),
output=c("Key: <a>", "Indices: <b__a>, <b>", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it

# dev regression #2285
cat("A B C\n1 2 3\n4 5 6", file=f<-tempfile())
Expand Down Expand Up @@ -12142,8 +12119,7 @@ test(1831.4, fread(paste0("A\n", "1.", src2)), data.table(A=1.1234567890098766))
DT = as.data.table(matrix(5L, nrow=10, ncol=10))
test(1832.1, fwrite(DT, f<-tempfile(), verbose=TRUE), output="Column writers")
DT = as.data.table(matrix(5L, nrow=10, ncol=60))
# Using capture.output directly to look for the "..." because test(,output=) intercepts [] for convenience elsewhere
test(1832.2, any(grepl("^Column writers.* [.][.][.] ", capture.output(fwrite(DT, f, verbose=TRUE)))))
test(1832.2, fwrite(DT, f, verbose=TRUE), output = "\nColumn writers.* [.][.][.] ")
unlink(f)

# ensure explicitly setting select to default value doesn't error, #2007
Expand Down Expand Up @@ -16568,69 +16544,69 @@ DT = data.table(a = vector("integer", 102L),
b = "bbbbbbbbbbbbb",
c = "ccccccccccccc",
d = c("ddddddddddddd", "d"))
test(2125.02, capture.output(print(DT, trunc.cols=TRUE)),
c(" a b c",
" 1: 0 bbbbbbbbbbbbb ccccccccccccc",
" 2: 0 bbbbbbbbbbbbb ccccccccccccc",
" 3: 0 bbbbbbbbbbbbb ccccccccccccc",
" 4: 0 bbbbbbbbbbbbb ccccccccccccc",
" 5: 0 bbbbbbbbbbbbb ccccccccccccc",
" --- ",
" 98: 0 bbbbbbbbbbbbb ccccccccccccc",
" 99: 0 bbbbbbbbbbbbb ccccccccccccc",
"100: 0 bbbbbbbbbbbbb ccccccccccccc",
"101: 0 bbbbbbbbbbbbb ccccccccccccc",
"102: 0 bbbbbbbbbbbbb ccccccccccccc",
"1 variable not shown: [d]"))
test(2125.03, capture.output(print(DT, trunc.cols=TRUE, row.names=FALSE)),
c(" a b c",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" --- --- ---",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
"1 variable not shown: [d]" ))
test(2125.02, print(DT, trunc.cols=TRUE),
output=c(" a b c",
" 1: 0 bbbbbbbbbbbbb ccccccccccccc",
" 2: 0 bbbbbbbbbbbbb ccccccccccccc",
" 3: 0 bbbbbbbbbbbbb ccccccccccccc",
" 4: 0 bbbbbbbbbbbbb ccccccccccccc",
" 5: 0 bbbbbbbbbbbbb ccccccccccccc",
" --- ",
" 98: 0 bbbbbbbbbbbbb ccccccccccccc",
" 99: 0 bbbbbbbbbbbbb ccccccccccccc",
"100: 0 bbbbbbbbbbbbb ccccccccccccc",
"101: 0 bbbbbbbbbbbbb ccccccccccccc",
"102: 0 bbbbbbbbbbbbb ccccccccccccc",
"1 variable not shown: [d]"))
test(2125.03, print(DT, trunc.cols=TRUE, row.names=FALSE),
output=c(" a b c",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" --- --- ---",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
" 0 bbbbbbbbbbbbb ccccccccccccc",
"1 variable not shown: [d]" ))
# also testing #4266 -- getting width of row #s register right
# TODO: understand why 2 variables truncated here. a,b,c combined have width
# _exactly_ 40, but still wraps. If we set options(width=41) it won't truncate.
# seems to be an issue with print.default.
test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14L],
"2 variables not shown: [c <char>, d <char>]")
test(2125.05, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE))[c(1,14)],
c(" a b c",
"1 variable not shown: [d <char>]" ))
test(2125.06, capture.output(print(DT, trunc.cols=TRUE, col.names="none"))[c(1,12)],
c(" 1: 0 bbbbbbbbbbbbb ccccccccccccc",
"1 variable not shown: [d]" ))
test(2125.07, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"))[c(1,13)],
c(" 1: 0 bbbbbbbbbbbbb",
"2 variables not shown: [c, d]" ),
test(2125.04, print(DT, trunc.cols=TRUE, class=TRUE),
output="2 variables not shown: [c <char>, d <char>]")
test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE),
output=c("^ a b c", ".*",
"1 variable not shown: \\[d <char>\\]"))
test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"),
output=c("^ 1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*",
"1 variable not shown: \\[d\\]", ""))
test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"),
output=c("^ 1: 0 bbbbbbbbbbbbb", ".*",
"2 variables not shown: \\[c, d\\]", ""),
warning = "Column classes will be suppressed when col.names is 'none'")
options("width" = 20)
DT = data.table(a = vector("integer", 2),
b = "bbbbbbbbbbbbb",
c = "ccccccccccccc",
d = "ddddddddddddd")
test(2125.08, capture.output(print(DT, trunc.cols=TRUE)),
c(" a b",
"1: 0 bbbbbbbbbbbbb",
"2: 0 bbbbbbbbbbbbb",
"2 variables not shown: [c, d]"))
test(2125.08, print(DT, trunc.cols=TRUE),
output=c(" a b",
"1: 0 bbbbbbbbbbbbb",
"2: 0 bbbbbbbbbbbbb",
"2 variables not shown: [c, d]"))
options("width" = 10)
DT = data.table(a = "aaaaaaaaaaaaa",
b = "bbbbbbbbbbbbb",
c = "ccccccccccccc",
d = "ddddddddddddd")
test(2125.09, capture.output(print(DT, trunc.cols=TRUE)),
"4 variables not shown: [a, b, c, d]")
test(2125.10, capture.output(print(DT, trunc.cols=TRUE, class=TRUE)),
"4 variables not shown: [a <char>, b <char>, c <char>, d <char>]")
test(2125.09, print(DT, trunc.cols=TRUE),
output="4 variables not shown: [a, b, c, d]")
test(2125.10, print(DT, trunc.cols=TRUE, class=TRUE),
output="4 variables not shown: [a <char>, b <char>, c <char>, d <char>]")
options(old_width)

# segfault when i is NULL or zero-column, #4060
Expand Down
Loading