From 62d2db8730b79110ab3b9a308c9d5de2a86b1d60 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 Sep 2022 22:04:32 +0000 Subject: [PATCH 1/5] New boolean_arithmetic_linter --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 6 +- R/boolean_arithmetic_linter.R | 71 +++++++++++++++++++ inst/lintr/linters.csv | 1 + man/best_practices_linters.Rd | 1 + man/boolean_arithmetic_linter.Rd | 18 +++++ man/efficiency_linters.Rd | 1 + man/linters.Rd | 7 +- man/readability_linters.Rd | 1 + .../testthat/test-boolean_arithmetic_linter.R | 49 +++++++++++++ 11 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 R/boolean_arithmetic_linter.R create mode 100644 man/boolean_arithmetic_linter.Rd create mode 100644 tests/testthat/test-boolean_arithmetic_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index b271a9f93..b4d9ac49d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -63,6 +63,7 @@ Collate: 'any_is_na_linter.R' 'assignment_linter.R' 'backport_linter.R' + 'boolean_arithmetic_linter.R' 'brace_linter.R' 'cache.R' 'class_equals_linter.R' diff --git a/NAMESPACE b/NAMESPACE index 7c37237bb..0b4a6ec5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(assignment_linter) export(available_linters) export(available_tags) export(backport_linter) +export(boolean_arithmetic_linter) export(brace_linter) export(checkstyle_output) export(class_equals_linter) diff --git a/NEWS.md b/NEWS.md index 80374cb3f..1e8dcaf96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,12 +26,16 @@ * `unnecessary_lambda_linter()`: detect unnecessary lambdas (anonymous functions), e.g. `lapply(x, function(xi) sum(xi))` can be `lapply(x, sum)` and `purrr::map(x, ~quantile(.x, 0.75, na.rm = TRUE))` - can be `purrr::map(x, quantile, 0.75, na.rm = TRUE)`. Naming `probs = 0.75` can further improve readability. + can be `purrr::map(x, quantile, 0.75, na.rm = TRUE)`. Naming `probs = 0.75` can further improve readability (#1531, @MichaelChirico). + * `redundant_equals_linter()` for redundant comparisons to `TRUE` or `FALSE` like `is_treatment == TRUE` (#1500, @MichaelChirico) * `function_return_linter()` for handling issues in function `return()` statements. Currently handles assignments within the `return()` clause, e.g. `return(x <- foo())` (@MichaelChirico) +* `boolean_arithmetic_linter()` for identifying places where logical aggregations are more appropriate, e.g. + `length(which(x == y)) == 0` is the same as `!any(x == y)` or even `all(x != y)` (@MichaelChirico) + # lintr 3.0.1 * Skip multi-byte tests in non UTF-8 locales (#1504) diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R new file mode 100644 index 000000000..713b02324 --- /dev/null +++ b/R/boolean_arithmetic_linter.R @@ -0,0 +1,71 @@ +#' Require usage of boolean operators over equivalent arithmetic +#' +#' `length(which(x == y)) == 0` is the same as `!any(x == y)`, but the latter +#' is more readable and more efficient. +#' +#' #' @evalRd rd_tags("boolean_arithmetic_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +boolean_arithmetic_linter <- function() { + # TODO(michaelchirico): consider sum(x %in% y) + # TODO(michaelchirico): consider sum(A & B), sum(A | B), and sum(!A) + # TODO(michaelchirico): consider is.na, is.nan, is.finite, is.infinite, is.element + # TODO(michaelchirico): stringr equivalents to regex functions + zero_expr <- + "(EQ or NE or GT or LE) and expr[NUM_CONST[text() = '0' or text() = '0L']]" + one_expr <- + "(LT or GE) and expr[NUM_CONST[text() = '1' or text() = '1L']]" + length_xpath <- glue::glue(" + //SYMBOL_FUNCTION_CALL[text() = 'which' or text() = 'grep'] + /parent::expr + /parent::expr + /parent::expr[ + expr[SYMBOL_FUNCTION_CALL[text() = 'length']] + and parent::expr[ ({zero_expr}) or ({one_expr})] + ] + ") + sum_xpath <- glue::glue(" + //SYMBOL_FUNCTION_CALL[text() = 'sum'] + /parent::expr + /parent::expr[ + expr[ + expr[SYMBOL_FUNCTION_CALL[text() = 'grepl']] + or (EQ or NE or GT or LT or GE or LE) + ] and parent::expr[ ({zero_expr}) or ({one_expr})] + ] + ") + any_xpath <- paste(length_xpath, "|", sum_xpath) + + Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + any_expr <- xml2::xml_find_all(xml, any_xpath) + + # TODO(michaelchirico): extend to include all()-alike expressions like + # sum(x == y) == length(x) --> all(x == y). The issues are (1) we can't + # just test for length() on the RHS (e.g. `sum(x == y) == length(z)` could + # be totally different); (2) looking for sum(x == y) == length(x == y) + # will only find a particularly silly manifestation of the issue; and + # (3) there is a bit of a combinatorial explosion if we have to look for + # _any_ symbol found inside the logical expression on the RHS + # (e.g. sum(x == y) == [length(x) _or_ length(y)]). + # A first pass would be to check how many hits there are for the naive + # version (i.e., condition 1), to see how worth investing in this + # complicated logic is to begin with. + + xml_nodes_to_lints( + any_expr, + source_expression = source_expression, + # TODO(michaelchirico): customize this? + lint_message = paste( + "Use any() to express logical aggregations.", + "For example, replace length(which(x == y)) == 0 with !any(x == y)." + ), + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index ef06f5e57..92ee160d1 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -4,6 +4,7 @@ any_duplicated_linter,efficiency best_practices any_is_na_linter,efficiency best_practices assignment_linter,style consistency default backport_linter,robustness configurable package_development +boolean_arithmetic_linter,efficiency best_practices readability brace_linter,style readability default configurable class_equals_linter,best_practices robustness consistency closed_curly_linter,style readability deprecated configurable diff --git a/man/best_practices_linters.Rd b/man/best_practices_linters.Rd index 7b53d924e..22f899dcd 100644 --- a/man/best_practices_linters.Rd +++ b/man/best_practices_linters.Rd @@ -15,6 +15,7 @@ The following linters are tagged with 'best_practices': \item{\code{\link{absolute_path_linter}}} \item{\code{\link{any_duplicated_linter}}} \item{\code{\link{any_is_na_linter}}} +\item{\code{\link{boolean_arithmetic_linter}}} \item{\code{\link{class_equals_linter}}} \item{\code{\link{commented_code_linter}}} \item{\code{\link{condition_message_linter}}} diff --git a/man/boolean_arithmetic_linter.Rd b/man/boolean_arithmetic_linter.Rd new file mode 100644 index 000000000..26c71ef2c --- /dev/null +++ b/man/boolean_arithmetic_linter.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/boolean_arithmetic_linter.R +\name{boolean_arithmetic_linter} +\alias{boolean_arithmetic_linter} +\title{Require usage of boolean operators over equivalent arithmetic} +\usage{ +boolean_arithmetic_linter() +} +\description{ +\code{length(which(x == y)) == 0} is the same as \code{!any(x == y)}, but the latter +is more readable and more efficient. +} +\details{ +#' @evalRd rd_tags("boolean_arithmetic_linter") +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index 67028e2b9..6ac847afa 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -14,6 +14,7 @@ The following linters are tagged with 'efficiency': \itemize{ \item{\code{\link{any_duplicated_linter}}} \item{\code{\link{any_is_na_linter}}} +\item{\code{\link{boolean_arithmetic_linter}}} \item{\code{\link{fixed_regex_linter}}} \item{\code{\link{ifelse_censor_linter}}} \item{\code{\link{inner_combine_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index 14db766c0..546f723a5 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -17,17 +17,17 @@ see also \code{\link[=available_tags]{available_tags()}}. \section{Tags}{ The following tags exist: \itemize{ -\item{\link[=best_practices_linters]{best_practices} (40 linters)} +\item{\link[=best_practices_linters]{best_practices} (41 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (7 linters)} \item{\link[=configurable_linters]{configurable} (20 linters)} \item{\link[=consistency_linters]{consistency} (17 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (24 linters)} \item{\link[=deprecated_linters]{deprecated} (4 linters)} -\item{\link[=efficiency_linters]{efficiency} (18 linters)} +\item{\link[=efficiency_linters]{efficiency} (19 linters)} \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} -\item{\link[=readability_linters]{readability} (40 linters)} +\item{\link[=readability_linters]{readability} (41 linters)} \item{\link[=robustness_linters]{robustness} (12 linters)} \item{\link[=style_linters]{style} (36 linters)} } @@ -40,6 +40,7 @@ The following linters exist: \item{\code{\link{any_is_na_linter}} (tags: best_practices, efficiency)} \item{\code{\link{assignment_linter}} (tags: consistency, default, style)} \item{\code{\link{backport_linter}} (tags: configurable, package_development, robustness)} +\item{\code{\link{boolean_arithmetic_linter}} (tags: best_practices, efficiency, readability)} \item{\code{\link{brace_linter}} (tags: configurable, default, readability, style)} \item{\code{\link{class_equals_linter}} (tags: best_practices, consistency, robustness)} \item{\code{\link{closed_curly_linter}} (tags: configurable, deprecated, readability, style)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index 14a06d6e0..4ca13753b 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -12,6 +12,7 @@ Linters highlighting readability issues, such as missing whitespace. \section{Linters}{ The following linters are tagged with 'readability': \itemize{ +\item{\code{\link{boolean_arithmetic_linter}}} \item{\code{\link{brace_linter}}} \item{\code{\link{closed_curly_linter}}} \item{\code{\link{commas_linter}}} diff --git a/tests/testthat/test-boolean_arithmetic_linter.R b/tests/testthat/test-boolean_arithmetic_linter.R new file mode 100644 index 000000000..9b82b69c7 --- /dev/null +++ b/tests/testthat/test-boolean_arithmetic_linter.R @@ -0,0 +1,49 @@ +test_that("boolean_arithmetic_linter requires use of any() or !any()", { + linter <- boolean_arithmetic_linter() + lint_msg <- rex::rex("Use any() to express logical aggregations.") + + expect_lint("length(which(x == y)) == 0", lint_msg, linter) + # anything passed to which() can be assumed to be logical + expect_lint("length(which(is_treatment)) == 0L", lint_msg, linter) + # regex version + expect_lint("length(grep(pattern, x)) == 0", lint_msg, linter) + # sum version + expect_lint("sum(x == y) == 0L", lint_msg, linter) + expect_lint("sum(grepl(pattern, x)) == 0", lint_msg, linter) + + # non-== comparisons + expect_lint("length(which(x == y)) > 0L", lint_msg, linter) + expect_lint("length(which(is_treatment)) < 1", lint_msg, linter) + expect_lint("length(grep(pattern, x)) >= 1L", lint_msg, linter) + expect_lint("sum(x == y) != 0", lint_msg, linter) + expect_lint("sum(grepl(pattern, x)) > 0L", lint_msg, linter) +}) + +# TODO(michaelchirico): activate these; see code comments explaining the complication. +# test_that("boolean_arithmetic_linter blocks comparisons to the object length", { +# expect_lint( +# "length(which(condition)) < length(condition)", +# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", +# boolean_arithmetic_linter +# ) +# expect_lint( +# "length(which(is_treatment)) == length(is_treatment)", +# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", +# boolean_arithmetic_linter +# ) +# expect_lint( +# "length(grep(pattern, x)) < length(x)", +# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", +# boolean_arithmetic_linter +# ) +# expect_lint( +# "sum(x == y) < length(x)", +# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", +# boolean_arithmetic_linter +# ) +# expect_lint( +# "sum(grepl(pattern, x)) == length(x)", +# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", +# boolean_arithmetic_linter +# ) +# }) From 7c64b48fea7543a933cbe406aa8295f38f430ba8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 Sep 2022 22:11:04 +0000 Subject: [PATCH 2/5] follow-up #1580 --- R/boolean_arithmetic_linter.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R index 713b02324..1e805186e 100644 --- a/R/boolean_arithmetic_linter.R +++ b/R/boolean_arithmetic_linter.R @@ -7,9 +7,7 @@ #' @seealso [linters] for a complete list of linters available in lintr. #' @export boolean_arithmetic_linter <- function() { - # TODO(michaelchirico): consider sum(x %in% y) - # TODO(michaelchirico): consider sum(A & B), sum(A | B), and sum(!A) - # TODO(michaelchirico): consider is.na, is.nan, is.finite, is.infinite, is.element + # TODO(#1580): sum() cases x %in% y, A [&|] B, !A, is.na/is.nan/is.finite/is.infinite/is.element # TODO(michaelchirico): stringr equivalents to regex functions zero_expr <- "(EQ or NE or GT or LE) and expr[NUM_CONST[text() = '0' or text() = '0L']]" From 7e122ba2c0cb6a35835ece3c168a2921707b1676 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 Sep 2022 22:17:13 +0000 Subject: [PATCH 3/5] Follow-up #1581 --- R/boolean_arithmetic_linter.R | 14 +-------- .../testthat/test-boolean_arithmetic_linter.R | 29 ------------------- 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R index 1e805186e..26d5f10e7 100644 --- a/R/boolean_arithmetic_linter.R +++ b/R/boolean_arithmetic_linter.R @@ -8,7 +8,7 @@ #' @export boolean_arithmetic_linter <- function() { # TODO(#1580): sum() cases x %in% y, A [&|] B, !A, is.na/is.nan/is.finite/is.infinite/is.element - # TODO(michaelchirico): stringr equivalents to regex functions + # TODO(#1581): extend to include all()-alike expressions zero_expr <- "(EQ or NE or GT or LE) and expr[NUM_CONST[text() = '0' or text() = '0L']]" one_expr <- @@ -43,18 +43,6 @@ boolean_arithmetic_linter <- function() { any_expr <- xml2::xml_find_all(xml, any_xpath) - # TODO(michaelchirico): extend to include all()-alike expressions like - # sum(x == y) == length(x) --> all(x == y). The issues are (1) we can't - # just test for length() on the RHS (e.g. `sum(x == y) == length(z)` could - # be totally different); (2) looking for sum(x == y) == length(x == y) - # will only find a particularly silly manifestation of the issue; and - # (3) there is a bit of a combinatorial explosion if we have to look for - # _any_ symbol found inside the logical expression on the RHS - # (e.g. sum(x == y) == [length(x) _or_ length(y)]). - # A first pass would be to check how many hits there are for the naive - # version (i.e., condition 1), to see how worth investing in this - # complicated logic is to begin with. - xml_nodes_to_lints( any_expr, source_expression = source_expression, diff --git a/tests/testthat/test-boolean_arithmetic_linter.R b/tests/testthat/test-boolean_arithmetic_linter.R index 9b82b69c7..0e6750f50 100644 --- a/tests/testthat/test-boolean_arithmetic_linter.R +++ b/tests/testthat/test-boolean_arithmetic_linter.R @@ -18,32 +18,3 @@ test_that("boolean_arithmetic_linter requires use of any() or !any()", { expect_lint("sum(x == y) != 0", lint_msg, linter) expect_lint("sum(grepl(pattern, x)) > 0L", lint_msg, linter) }) - -# TODO(michaelchirico): activate these; see code comments explaining the complication. -# test_that("boolean_arithmetic_linter blocks comparisons to the object length", { -# expect_lint( -# "length(which(condition)) < length(condition)", -# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", -# boolean_arithmetic_linter -# ) -# expect_lint( -# "length(which(is_treatment)) == length(is_treatment)", -# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", -# boolean_arithmetic_linter -# ) -# expect_lint( -# "length(grep(pattern, x)) < length(x)", -# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", -# boolean_arithmetic_linter -# ) -# expect_lint( -# "sum(x == y) < length(x)", -# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", -# boolean_arithmetic_linter -# ) -# expect_lint( -# "sum(grepl(pattern, x)) == length(x)", -# "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.", -# boolean_arithmetic_linter -# ) -# }) From 0c38d513962c2ca8111e0bebca2b8153425a213d Mon Sep 17 00:00:00 2001 From: Indrajeet Patil Date: Sun, 2 Oct 2022 09:46:59 +0200 Subject: [PATCH 4/5] Update R/boolean_arithmetic_linter.R --- R/boolean_arithmetic_linter.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R index 26d5f10e7..17500537e 100644 --- a/R/boolean_arithmetic_linter.R +++ b/R/boolean_arithmetic_linter.R @@ -3,7 +3,7 @@ #' `length(which(x == y)) == 0` is the same as `!any(x == y)`, but the latter #' is more readable and more efficient. #' -#' #' @evalRd rd_tags("boolean_arithmetic_linter") +#' @evalRd rd_tags("boolean_arithmetic_linter") #' @seealso [linters] for a complete list of linters available in lintr. #' @export boolean_arithmetic_linter <- function() { From 2ac1ec877e16dae6f414d2c66d6d5199e260af75 Mon Sep 17 00:00:00 2001 From: Indrajeet Patil Date: Sun, 2 Oct 2022 09:56:01 +0200 Subject: [PATCH 5/5] update docs and linter counts --- man/boolean_arithmetic_linter.Rd | 6 +++--- man/linters.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/man/boolean_arithmetic_linter.Rd b/man/boolean_arithmetic_linter.Rd index 26c71ef2c..9ed4db1d2 100644 --- a/man/boolean_arithmetic_linter.Rd +++ b/man/boolean_arithmetic_linter.Rd @@ -10,9 +10,9 @@ boolean_arithmetic_linter() \code{length(which(x == y)) == 0} is the same as \code{!any(x == y)}, but the latter is more readable and more efficient. } -\details{ -#' @evalRd rd_tags("boolean_arithmetic_linter") -} \seealso{ \link{linters} for a complete list of linters available in lintr. } +\section{Tags}{ +\link[=best_practices_linters]{best_practices}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} +} diff --git a/man/linters.Rd b/man/linters.Rd index 4ab371488..f92caf059 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -17,17 +17,17 @@ see also \code{\link[=available_tags]{available_tags()}}. \section{Tags}{ The following tags exist: \itemize{ -\item{\link[=best_practices_linters]{best_practices} (41 linters)} +\item{\link[=best_practices_linters]{best_practices} (42 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (7 linters)} \item{\link[=configurable_linters]{configurable} (20 linters)} \item{\link[=consistency_linters]{consistency} (17 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (24 linters)} \item{\link[=deprecated_linters]{deprecated} (4 linters)} -\item{\link[=efficiency_linters]{efficiency} (19 linters)} +\item{\link[=efficiency_linters]{efficiency} (20 linters)} \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} -\item{\link[=readability_linters]{readability} (41 linters)} +\item{\link[=readability_linters]{readability} (42 linters)} \item{\link[=robustness_linters]{robustness} (12 linters)} \item{\link[=style_linters]{style} (36 linters)} }