Skip to content

Commit

Permalink
Improve default filtering method for data tables
Browse files Browse the repository at this point in the history
Now uses common code for all 3 cases, and names index variable to avoid clash with existing columns. Closes #615
  • Loading branch information
hadley committed Nov 19, 2014
1 parent 84f656e commit 225f01b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 21 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dplyr 0.3.1.9000

* Filtering data tables no longer fails when the table has a variable called
"V1" (#615).

* Implemented slice methods for data tables (#717).

* In joins, you can now name only those variables that are different between
Expand Down
28 changes: 10 additions & 18 deletions R/manip-dt.r
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,25 @@ and_expr <- function(exprs) {

#' @export
filter_.grouped_dt <- function(.data, ..., .dots) {
dots <- lazyeval::all_dots(.dots, ...)

# http://stackoverflow.com/questions/16573995/subset-by-group-with-data-table
expr <- lapply(dots, `[[`, "expr")
call <- substitute(dt[, .I[expr], by = vars], list(expr = and_expr(expr)))

env <- dt_env(.data, lazyeval::common_env(dots))
indices <- eval(call, env)$V1
out <- .data[indices[!is.na(indices)]]
grouped_dt(NextMethod(), groups(.data), copy = FALSE)
}

grouped_dt(out, groups(.data), copy = FALSE)
#' @export
filter_.tbl_dt <- function(.data, ..., .dots) {
tbl_dt(NextMethod(), copy = FALSE)
}

#' @export
filter_.data.table <- function(.data, ..., .dots) {
dots <- lazyeval::all_dots(.dots, ...)
env <- lazyeval::common_env(.dots)

# http://stackoverflow.com/questions/16573995/subset-by-group-with-data-table
expr <- lapply(dots, `[[`, "expr")
call <- substitute(dt[expr, ], list(expr = and_expr(expr)))

env <- dt_env(.data, lazyeval::common_env(dots))
eval(call, env)
}
j <- substitute(list(`_row` = .I[expr]), list(expr = and_expr(expr)))
indices <- dt_subset(.data, , j, env)$`_row`

#' @export
filter_.tbl_dt <- function(.data, ..., .dots) {
tbl_dt(NextMethod(), copy = FALSE)
.data[indices[!is.na(indices)]]
}

# Summarise --------------------------------------------------------------------
Expand Down
15 changes: 12 additions & 3 deletions tests/testthat/test-filter.r
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,13 @@ test_that( "filter handles complex vectors (#436)", {

test_that("%in% works as expected (#126)", {
df <- data_frame( a = c("a", "b", "ab"), g = c(1,1,2) )

res <- df %>% filter( a %in% letters )
expect_equal(nrow(res), 2L)

res <- df %>% group_by(g) %>% filter( a %in% letters )
expect_equal(nrow(res), 2L)

})

test_that( "filter handles data.frames as columns (#602)", {
Expand Down Expand Up @@ -238,3 +238,12 @@ test_that("row_number does not segfault with example from #781", {
expect_equal( nrow(res), 0L )
})


# data.table --------------------------------------------------------------

test_that("filter succeeds even if column called V1 (#615)", {
dt <- data.table(x = 1:10 ,V1 = 0)
out <- dt %>% group_by(V1) %>% filter(x > 5)

expect_equal(nrow(out), 5)
})

0 comments on commit 225f01b

Please sign in to comment.