-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
check for index in setkey #3582
Changes from 10 commits
572b64f
c8ddc62
bfefa71
0c64f51
d46358f
057938f
7503aea
3bce910
3b0d3f5
3986117
153ae6c
f2934a2
1809782
a6aa862
9288121
b500253
230e5d1
97d64a5
70b5760
fcb0c5b
bf59b63
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,12 +86,30 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU | |
if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.") | ||
} | ||
if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov | ||
if (verbose) { | ||
tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE))) # system.time does a gc, so we don't want this always on, until refcnt is on by default in R | ||
# suppress needed for tests 644 and 645 in verbose mode | ||
cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n") | ||
|
||
# get existing index name if any | ||
found_index = NULL | ||
if(!is.null(indices(x))) found_index <- names(attributes(attributes(x)$index)) | ||
new_possible_index = paste0("__", cols, collapse="") | ||
|
||
# forder only if index is not present | ||
if(!any(new_possible_index == found_index)){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then this |
||
if (verbose) { | ||
tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE))) # system.time does a gc, so we don't want this always on, until refcnt is on by default in R | ||
# suppress needed for tests 644 and 645 in verbose mode | ||
cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n") | ||
} else { | ||
o <- forderv(x, cols, sort=TRUE, retGrp=FALSE) | ||
} | ||
} else { | ||
o = forderv(x, cols, sort=TRUE, retGrp=FALSE) | ||
# find the matching index | ||
ix = found_index[which(found_index == new_possible_index)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then this line can be removed. |
||
if (verbose){ | ||
cat("using existing index for", gsub("^__","", ix), "\n") | ||
o <- attr(attributes(x)$index, which=ix, exact = TRUE) | ||
} else { | ||
o <- attr(attributes(x)$index, which=ix, exact = TRUE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And these two There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mattdowle I've made the changes. Correct me if I am wrong, I think we need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great - thanks again. Almost there! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Thanks for your patience. |
||
} | ||
} | ||
if (!physical) { | ||
if (is.null(attr(x,"index",exact=TRUE))) setattr(x, "index", integer()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6031,6 +6031,19 @@ thisDT <- copy(DT)[, c("aaa", "b") := 2] | |
test(1419.58, indices(thisDT), c("a", "ab")) | ||
test(1419.59, allIndicesValid(thisDT), TRUE) | ||
|
||
## setkey on same col as index before | ||
DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2), | ||
aaa = c(1,1,2,2,2,1,1,2,2,2)) | ||
setindex(DT, a) | ||
test(1419.60, allIndicesValid(DT), TRUE) | ||
test(1419.61, setkey(DT, a, verbose=TRUE), output="using existing index for a") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check on |
||
DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2), | ||
aaa = c(1,1,2,2,2,1,1,2,2,2), | ||
bbb = c(1,1,2,0,1,1,1,0,1,1)) | ||
setindex(DT, a) | ||
setindex(DT, aaa) | ||
test(1419.62, allIndicesValid(DT), TRUE) | ||
test(1419.63, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same request here as above for 1419.61. This test needs to check the result of this setkey() command has done the correct thing (e.g. changed the physical row order properly) as well as doing that for the right reason ( |
||
|
||
# setnames updates secondary key | ||
DT = data.table(a=1:5,b=10:6) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These 3 lines would be simpler as just one line :
index = paste0(cols, collapse="__")
See the next 3 comments in combination below ...