Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #5510: undefined behaviour #5832

Merged
merged 22 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0471c4d
Fix UB via int conversion (#5510)
HughParsonage Dec 17, 2023
e3c6a78
Ensure nan is not coerced. Closes #5510
HughParsonage Dec 17, 2023
90b167e
Fix '1' mistakenly removed from earlier commit
HughParsonage Dec 17, 2023
47c87fd
Re #5834
HughParsonage Dec 18, 2023
772906d
Rename function more sensibly
HughParsonage Dec 18, 2023
c1bf61c
Incorporate finite checks into the logic
HughParsonage Dec 18, 2023
2d76efd
Suspend int64 coerce tests subject to #5834
HughParsonage Dec 18, 2023
f0238b4
Update CODEOWNERS
HughParsonage Dec 19, 2023
7696ea3
Merge branch 'master' into hp-int-undefined-behaviour
HughParsonage Dec 19, 2023
61ce9dd
Revert CODEOWNERS
HughParsonage Dec 19, 2023
e9f9ba1
Merge branch 'hp-int-undefined-behaviour' of https://github.com/Rdata…
HughParsonage Dec 19, 2023
e00a553
Update CODEOWNERS
HughParsonage Dec 19, 2023
9d05feb
Simplify logic for integer check
HughParsonage Dec 19, 2023
a27ccf2
Merge branch 'hp-int-undefined-behaviour' of https://github.com/Rdata…
HughParsonage Dec 19, 2023
50056ef
Check for NA_INTEGER superfluous
HughParsonage Dec 19, 2023
cd73555
Test 6.53 should emit warning on -2^31
HughParsonage Dec 19, 2023
d32470e
Merge branch 'master' into hp-int-undefined-behaviour
MichaelChirico Dec 22, 2023
46cf79c
C++ toolkit included by default
MichaelChirico Dec 22, 2023
3e2f1c3
first pass at fix
MichaelChirico Dec 22, 2023
7701738
Simplify/unite with within_int64_repres()
MichaelChirico Dec 22, 2023
b269b39
weak inequality
MichaelChirico Dec 23, 2023
6f2769f
Merge branch 'master' into hp-int-undefined-behaviour
HughParsonage Dec 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion inst/tests/nafill.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ if (test_bit64) {
test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range")
test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range")
test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647"))))
test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))))
test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning=c("out-of-range", "precision lost"))
test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost"))
}

Expand Down
12 changes: 6 additions & 6 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
year = data.table::year # lubridate
yearmon = data.table::yearmon # zoo
yearqtr = data.table::yearqtr # zoo

rm_all = function(env=parent.frame()) {
tt = setdiff(ls(envir=env), .do_not_rm)
rm(list=tt, envir=env)
Expand Down Expand Up @@ -3886,7 +3886,7 @@ test(1133.75, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5,
# on a new column with warning on 2nd assign
DT[,new:=NULL]
test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(7,7,7,7,7,3,3)),
warning="Group 2 column 'new': 3.4.*double.*at RHS position 1 truncated.*precision lost.*integer")
warning="Group 2 column 'new': 3.4.*double.*at RHS position.*truncated.*precision lost.*integer")

# Fix for FR #2496 - catch `{` in `:=` expression in `j`:
DT <- data.table(x=c("A", "A", "B", "B"), val =1:4)
Expand Down Expand Up @@ -5071,7 +5071,7 @@ dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a

test(1294.01, dt[, a := 1]$a, rep(1L, 3L))
test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L),
warning="1.5.*double.*position 1 truncated.*integer.*column 1 named 'a'")
warning="1.5.*double.*position 1.*truncated.*integer.*column 1 named 'a'")
test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L))
test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L),
warning=c("Coercing 'character' RHS to 'integer'.*column 1 named 'a'",
Expand Down Expand Up @@ -9668,7 +9668,7 @@ nqjoin_test <- function(x, y, k=1L, test_no, mult="all") {
gc() # no longer needed but left in place just in case, no harm
}

dt1 = nq_fun(100L) # 400 reduced to 100, #5517
dt1 = nq_fun(100L) # 400 reduced to 100, #5517
dt2 = nq_fun(50L)
x = na.omit(dt1)
y = na.omit(dt2)
Expand Down Expand Up @@ -14365,7 +14365,7 @@ DT[,foo:=factor(c("a","b","c"))]
test(2005.05, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)")
test(2005.06, DT[2, a:=9, verbose=TRUE], notOutput="Coerced")
test(2005.07, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced")
test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1 truncated.*integer.*column 1 named 'a'")
test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1.*truncated.*integer.*column 1 named 'a'")
test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot be coerced to 'raw'")
test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'")
test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'")
Expand Down Expand Up @@ -18004,7 +18004,7 @@ test(2233.34, copy(DT)[, same_value:=value[1], by=.(by1, by2), verbose=TRUE],
test(2233.35, copy(DT)[, same_value:=value[1], by=.(by2, by1), verbose=TRUE], ans, output=out)
test(2233.36, copy(DT)[, same_value:=value[1], keyby=.(by2, by1), verbose=TRUE], setkey(ans,by2,by1), output=out)
# similar to #5307 using integer
DT = data.table(A=INT(2,1,2,1), B=6:3, v=11:14)
DT = data.table(A=INT(2,1,2,1), B=6:3, v=11:14)
test(2233.37, copy(DT)[, val:=v[1L], by=.(A,B), verbose=TRUE], copy(DT)[, val:=11:14], output=out)
test(2233.38, copy(DT)[, val:=v[1L], keyby=.(A,B), verbose=TRUE], data.table(A=INT(1,1,2,2), B=INT(3,5,4,6), v=INT(14,12,13,11), val=INT(14,12,13,11), key=c("A","B")), output=out)
# test from #5326 but with n=100 rather than n=100000; confirmed that n=100 fails tests 2233.403-405 before fix
Expand Down
14 changes: 7 additions & 7 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ int checkOverAlloc(SEXP x)
error(_("getOption('datatable.alloccol') should be a number, by default 1024. But its type is '%s'."), type2char(TYPEOF(x)));
if (LENGTH(x) != 1)
error(_("getOption('datatable.alloc') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x));
int ans = isInteger(x) ? INTEGER(x)[0] : (int)REAL(x)[0];
int ans = asInteger(x);
if (ans<0)
error(_("getOption('datatable.alloc')==%d. It must be >=0 and not NA."), ans);
return ans;
Expand Down Expand Up @@ -742,7 +742,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
const double *sd = REAL(source);
for (int i=0; i<slen; ++i) {
const double val = sd[i+soff];
if (!ISNAN(val) && (!R_FINITE(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) {
if (!ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) {
error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel);
}
}
Expand Down Expand Up @@ -897,14 +897,14 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
switch (TYPEOF(source)) {
case REALSXP: if (sourceIsI64)
CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val)
else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)", val)
else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val)
case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) &&
(ISNAN(val.r) || (R_FINITE(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r)
(ISNAN(val.r) || (R_FINITE(val.r) && inside_int32_range(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r)
} break;
case REALSXP:
switch (TYPEOF(source)) {
case REALSXP: if (targetIsI64 && !sourceIsI64)
CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)", val)
CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || (int)val!=val), "f", "truncated (precision lost)", val)
break;
case CPLXSXP: if (targetIsI64)
CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) &&
Expand Down Expand Up @@ -1004,8 +1004,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
case REALSXP:
if (sourceIsI64)
BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval)
else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval)
case CPLXSXP: BODY(Rcomplex, COMPLEX, int, ISNAN(val.r) ? NA_INTEGER : (int)val.r, td[i]=cval)
else BODY(double, REAL, int, (ISNAN(val) || !inside_int32_range(val)) ? NA_INTEGER : (int)val, td[i]=cval)
case CPLXSXP: BODY(Rcomplex, COMPLEX, int, (ISNAN(val.r) || !inside_int32_range(val.r)) ? NA_INTEGER : (int)val.r, td[i]=cval)
default: COERCE_ERROR("integer"); // test 2005.4
}
} break;
Expand Down
1 change: 1 addition & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP
SEXP coalesce(SEXP x, SEXP inplace);

// utils.c
bool inside_int32_range(double x);
bool isRealReallyInt(SEXP x);
SEXP isRealReallyIntR(SEXP x);
SEXP isReallyReal(SEXP x);
Expand Down
2 changes: 1 addition & 1 deletion src/gsumm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ SEXP gprod(SEXP x, SEXP narmArg) {
if (INHERITS(x, char_integer64)) {
int64_t *ansd = (int64_t *)REAL(ans);
for (int i=0; i<ngrp; ++i) {
ansd[i] = (s[i]>INT64_MAX || s[i]<=INT64_MIN) ? NA_INTEGER64 : (int64_t)s[i];
ansd[i] = (ISNAN(s[i]) || s[i]>INT64_MAX || s[i]<=INT64_MIN) ? NA_INTEGER64 : (int64_t)s[i];
}
} else {
double *ansd = REAL(ans);
Expand Down
7 changes: 6 additions & 1 deletion src/utils.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#include "data.table.h"

bool inside_int32_range(double x) {
// N.B. if x = 2147483647.99 then (int)2147483647.99 is not undefined behaviour
return x < 2147483648 && x > -2147483648;
}

static R_xlen_t firstNonInt(SEXP x) {
R_xlen_t n=xlength(x), i=0;
const double *dx = REAL(x);
while (i<n &&
( ISNA(dx[i]) ||
( R_FINITE(dx[i]) && dx[i]==(int)(dx[i]) && (int)(dx[i])!=NA_INTEGER))) { // NA_INTEGER == INT_MIN == -2147483648
( R_FINITE(dx[i]) && inside_int32_range(dx[i]) && dx[i]==(int)(dx[i]) && (int)(dx[i])!=NA_INTEGER))) { // NA_INTEGER == INT_MIN == -2147483648
i++;
}
return i==n ? 0 : i+1;
Expand Down