From 225f01ba710719766092b806c2311827fe9d7ebc Mon Sep 17 00:00:00 2001 From: hadley Date: Wed, 19 Nov 2014 15:04:01 -0600 Subject: [PATCH] Improve default filtering method for data tables Now uses common code for all 3 cases, and names index variable to avoid clash with existing columns. Closes #615 --- NEWS.md | 3 +++ R/manip-dt.r | 28 ++++++++++------------------ tests/testthat/test-filter.r | 15 ++++++++++++--- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4ae8683c90..9b45fa2fb4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # dplyr 0.3.1.9000 +* Filtering data tables no longer fails when the table has a variable called + "V1" (#615). + * Implemented slice methods for data tables (#717). * In joins, you can now name only those variables that are different between diff --git a/R/manip-dt.r b/R/manip-dt.r index 21d24d29aa..ed2a62cec0 100644 --- a/R/manip-dt.r +++ b/R/manip-dt.r @@ -14,33 +14,25 @@ and_expr <- function(exprs) { #' @export filter_.grouped_dt <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) - - # http://stackoverflow.com/questions/16573995/subset-by-group-with-data-table - expr <- lapply(dots, `[[`, "expr") - call <- substitute(dt[, .I[expr], by = vars], list(expr = and_expr(expr))) - - env <- dt_env(.data, lazyeval::common_env(dots)) - indices <- eval(call, env)$V1 - out <- .data[indices[!is.na(indices)]] + grouped_dt(NextMethod(), groups(.data), copy = FALSE) +} - grouped_dt(out, groups(.data), copy = FALSE) +#' @export +filter_.tbl_dt <- function(.data, ..., .dots) { + tbl_dt(NextMethod(), copy = FALSE) } #' @export filter_.data.table <- function(.data, ..., .dots) { dots <- lazyeval::all_dots(.dots, ...) + env <- lazyeval::common_env(.dots) + # http://stackoverflow.com/questions/16573995/subset-by-group-with-data-table expr <- lapply(dots, `[[`, "expr") - call <- substitute(dt[expr, ], list(expr = and_expr(expr))) - - env <- dt_env(.data, lazyeval::common_env(dots)) - eval(call, env) -} + j <- substitute(list(`_row` = .I[expr]), list(expr = and_expr(expr))) + indices <- dt_subset(.data, , j, env)$`_row` -#' @export -filter_.tbl_dt <- function(.data, ..., .dots) { - tbl_dt(NextMethod(), copy = FALSE) + .data[indices[!is.na(indices)]] } # Summarise -------------------------------------------------------------------- diff --git a/tests/testthat/test-filter.r b/tests/testthat/test-filter.r index 78490c9871..142c7a072e 100644 --- a/tests/testthat/test-filter.r +++ b/tests/testthat/test-filter.r @@ -199,13 +199,13 @@ test_that( "filter handles complex vectors (#436)", { test_that("%in% works as expected (#126)", { df <- data_frame( a = c("a", "b", "ab"), g = c(1,1,2) ) - + res <- df %>% filter( a %in% letters ) expect_equal(nrow(res), 2L) - + res <- df %>% group_by(g) %>% filter( a %in% letters ) expect_equal(nrow(res), 2L) - + }) test_that( "filter handles data.frames as columns (#602)", { @@ -238,3 +238,12 @@ test_that("row_number does not segfault with example from #781", { expect_equal( nrow(res), 0L ) }) + +# data.table -------------------------------------------------------------- + +test_that("filter succeeds even if column called V1 (#615)", { + dt <- data.table(x = 1:10 ,V1 = 0) + out <- dt %>% group_by(V1) %>% filter(x > 5) + + expect_equal(nrow(out), 5) +})