Skip to content

Commit

Permalink
Normalizing verbose messages in bmerge.R (#6728)
Browse files Browse the repository at this point in the history
* move verbosity to coerce_col, update tests

* do not match.fun as.integer64, pull 'factor' cond from cast_with_atts to coerce_col

* delint

* delint

* simplify coerce_col and cast_with_attrs further

* style+readability changes; attempt to fix possible mistaken use of from_detail=

* remove ()? in for join msgs

* fix arg name

* fully normalize useFancyQuotes

* Revert "fully normalize useFancyQuotes"

This reverts commit 8d50c3a.

* one-off handling of useFancyQuotes instead

---------

Co-authored-by: Michael Chirico <chiricom@google.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
  • Loading branch information
3 people authored Jan 30, 2025
1 parent 3aeaedf commit b739ab2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 32 deletions.
45 changes: 23 additions & 22 deletions R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ mergeType = function(x) {
ans
}

cast_with_atts = function(x, as.f) {
ans = as.f(x)
if (!is.null(attributes(x))) attributes(ans) = attributes(x)
cast_with_attrs = function(x, cast_fun) {
ans = cast_fun(x)
# do not copy attributes when coercing factor (to character)
if (!is.factor(x) && !is.null(attributes(x))) attributes(ans) = attributes(x)
ans
}

coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg=NULL) {
if (!is.null(verbose_msg)) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL)
set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type))))
coerce_col = function(dt, col, from_type, to_type, from_name, to_name, from_detail="", to_detail="", verbose) {
if (verbose) catf(
"Coercing %s column %s%s to type %s to match type of %s%s.\n",
from_type, from_name, from_detail, to_type, to_name, to_detail
)
cast_fun = switch(to_type, integer64 = bit64::as.integer64, match.fun(paste0("as.", to_type)))
set(dt, j=col, value=cast_with_attrs(dt[[col]], cast_fun))
}

bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbose)
Expand Down Expand Up @@ -68,9 +73,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
next
} else {
if (x_merge_type=="character") {
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
set(i, j=icol, value=val<-as.character(i[[icol]]))
set(callersi, j=icol, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
set(callersi, j=icol, value=i[[icol]]) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
next
} else if (i_merge_type=="character") {
if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
Expand All @@ -89,13 +93,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
}
cfl = c("character", "logical", "factor")
if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) {
msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL
if (anyNA(i[[icol]]) && allNA(i[[icol]])) {
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, msg)
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, from_detail=gettext(" (all-NA)"), verbose=verbose)
next
}
if (anyNA(x[[xcol]]) && allNA(x[[xcol]])) {
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, msg)
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
next
}
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
Expand All @@ -104,8 +107,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
nm = c(iname, xname)
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which has integer64 representation, e.g. no fractions)" else "", nm[2L])
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
} else {
# just integer and double left
Expand All @@ -126,28 +129,26 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
}
}
if (coerce_x) {
msg = if (verbose) gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") else NULL
coerce_col(i, icol, "double", "integer", iname, xname, msg)
from_detail = gettext(" (which contains no fractions)")
coerce_col(i, icol, "double", "integer", iname, xname, from_detail, verbose=verbose)
set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too.
if (length(ic_idx)>1L) {
xc_idx = xcols[ic_idx]
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg)
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, from_detail, verbose=verbose)
}
}
}
}
if (!coerce_x) {
msg = if (verbose) gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") else NULL
coerce_col(x, xcol, "integer", "double", xname, iname, msg)
coerce_col(x, xcol, "integer", "double", xname, iname, to_detail=gettext(" (which contains fractions)"), verbose=verbose)
}
} else {
msg = if (verbose) gettext("Coercing %s column %s to type %s for join to match type of %s.\n") else NULL
coerce_col(i, icol, "integer", "double", iname, xname, msg)
coerce_col(i, icol, "integer", "double", iname, xname, from_detail=gettext(" (for join)"), verbose=verbose)
if (length(ic_idx)>1L) {
xc_idx = xcols[ic_idx]
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "integer")]) {
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg)
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, verbose=verbose)
}
}
}
Expand Down
21 changes: 11 additions & 10 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -10994,7 +10994,8 @@ DT = data.table(
D = as.POSIXct(dt<-paste(d,t), tz="UTC"),
E = as.POSIXct(paste0(dt,c(".999",".0",".5",".111112",".123456",".023",".0",".999999",".99",".0009")), tz="UTC"))

test(1740.1, fwrite(DT,dateTimeAs="iso"), error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
test(1740.1, options=c(useFancyQuotes=FALSE), fwrite(DT,dateTimeAs="iso"),
error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
test(1740.2, fwrite(DT,dateTimeAs=c("ISO","squash")), error=base_messages$match_arg_length)
test(1740.3, capture.output(fwrite(DT,dateTimeAs="ISO")), c(
"A,B,C,D,E",
Expand Down Expand Up @@ -15165,13 +15166,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE],
test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE], # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int)
data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA),
i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions")
output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.")
test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coercing integer column i.int to type double for join to match type of x.doubleInt")
output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt")
test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coercing integer column i.int to type double for join to match type of x.realDouble")
output="Coercing integer column i.int .for join. to type double to match type of x.realDouble")
cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
Expand Down Expand Up @@ -15206,15 +15207,15 @@ if (test_bit64) {
dt1 = data.table(a=1, b=NA_character_)
dt2 = data.table(a=2L, b=NA)
test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L),
output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a")
output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a")
test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()),
output=msg)
test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b")
output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b")
test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
output=msg)
test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA),
output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a")
output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a")
test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()),
output=msg)

Expand Down Expand Up @@ -15641,7 +15642,7 @@ i = data.table(date = dbl_date, key = 'date')
test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date")
test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
output="Coercing integer column i.date to type double for join to match type of x.date")
output="Coercing integer column i.date .for join. to type double to match type of x.date")

# complex values in grouping, #3639
set.seed(42)
Expand Down Expand Up @@ -20688,8 +20689,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%')
# fix coercing integer/double for joins on multiple columns, #6602
x = data.table(a=1L)
y = data.table(c=1L, d=1)
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
x = data.table(a=1)
y = data.table(c=1, d=1L)
test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")
Expand Down

0 comments on commit b739ab2

Please sign in to comment.