From 62d2db8730b79110ab3b9a308c9d5de2a86b1d60 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Fri, 30 Sep 2022 22:04:32 +0000
Subject: [PATCH 1/5] New boolean_arithmetic_linter

---
 DESCRIPTION                                   |  1 +
 NAMESPACE                                     |  1 +
 NEWS.md                                       |  6 +-
 R/boolean_arithmetic_linter.R                 | 71 +++++++++++++++++++
 inst/lintr/linters.csv                        |  1 +
 man/best_practices_linters.Rd                 |  1 +
 man/boolean_arithmetic_linter.Rd              | 18 +++++
 man/efficiency_linters.Rd                     |  1 +
 man/linters.Rd                                |  7 +-
 man/readability_linters.Rd                    |  1 +
 .../testthat/test-boolean_arithmetic_linter.R | 49 +++++++++++++
 11 files changed, 153 insertions(+), 4 deletions(-)
 create mode 100644 R/boolean_arithmetic_linter.R
 create mode 100644 man/boolean_arithmetic_linter.Rd
 create mode 100644 tests/testthat/test-boolean_arithmetic_linter.R

diff --git a/DESCRIPTION b/DESCRIPTION
index b271a9f93..b4d9ac49d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -63,6 +63,7 @@ Collate:
     'any_is_na_linter.R'
     'assignment_linter.R'
     'backport_linter.R'
+    'boolean_arithmetic_linter.R'
     'brace_linter.R'
     'cache.R'
     'class_equals_linter.R'
diff --git a/NAMESPACE b/NAMESPACE
index 7c37237bb..0b4a6ec5c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,6 +25,7 @@ export(assignment_linter)
 export(available_linters)
 export(available_tags)
 export(backport_linter)
+export(boolean_arithmetic_linter)
 export(brace_linter)
 export(checkstyle_output)
 export(class_equals_linter)
diff --git a/NEWS.md b/NEWS.md
index 80374cb3f..1e8dcaf96 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -26,12 +26,16 @@
 
 * `unnecessary_lambda_linter()`: detect unnecessary lambdas (anonymous functions), e.g.
   `lapply(x, function(xi) sum(xi))` can be `lapply(x, sum)` and `purrr::map(x, ~quantile(.x, 0.75, na.rm = TRUE))`
-  can be `purrr::map(x, quantile, 0.75, na.rm = TRUE)`. Naming `probs = 0.75` can further improve readability.
+  can be `purrr::map(x, quantile, 0.75, na.rm = TRUE)`. Naming `probs = 0.75` can further improve readability (#1531, @MichaelChirico).
+
 * `redundant_equals_linter()` for redundant comparisons to `TRUE` or `FALSE` like `is_treatment == TRUE` (#1500, @MichaelChirico)
 
 * `function_return_linter()` for handling issues in function `return()` statements. Currently handles assignments within the `return()`
   clause, e.g. `return(x <- foo())` (@MichaelChirico)
 
+* `boolean_arithmetic_linter()` for identifying places where logical aggregations are more appropriate, e.g.
+  `length(which(x == y)) == 0` is the same as `!any(x == y)` or even `all(x != y)` (@MichaelChirico)
+
 # lintr 3.0.1
 
 * Skip multi-byte tests in non UTF-8 locales (#1504)
diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R
new file mode 100644
index 000000000..713b02324
--- /dev/null
+++ b/R/boolean_arithmetic_linter.R
@@ -0,0 +1,71 @@
+#' Require usage of boolean operators over equivalent arithmetic
+#'
+#' `length(which(x == y)) == 0` is the same as `!any(x == y)`, but the latter
+#'   is more readable and more efficient.
+#'
+#' #' @evalRd rd_tags("boolean_arithmetic_linter")
+#' @seealso [linters] for a complete list of linters available in lintr.
+#' @export
+boolean_arithmetic_linter <- function() {
+  # TODO(michaelchirico): consider sum(x %in% y)
+  # TODO(michaelchirico): consider sum(A & B), sum(A | B), and sum(!A)
+  # TODO(michaelchirico): consider is.na, is.nan, is.finite, is.infinite, is.element
+  # TODO(michaelchirico): stringr equivalents to regex functions
+  zero_expr <-
+    "(EQ or NE or GT or LE) and expr[NUM_CONST[text() = '0' or text() = '0L']]"
+  one_expr <-
+    "(LT or GE) and expr[NUM_CONST[text() = '1' or text() = '1L']]"
+  length_xpath <- glue::glue("
+    //SYMBOL_FUNCTION_CALL[text() = 'which' or text() = 'grep']
+    /parent::expr
+    /parent::expr
+    /parent::expr[
+      expr[SYMBOL_FUNCTION_CALL[text() = 'length']]
+      and parent::expr[ ({zero_expr}) or ({one_expr})]
+    ]
+  ")
+  sum_xpath <- glue::glue("
+    //SYMBOL_FUNCTION_CALL[text() = 'sum']
+    /parent::expr
+    /parent::expr[
+      expr[
+        expr[SYMBOL_FUNCTION_CALL[text() = 'grepl']]
+        or (EQ or NE or GT or LT or GE or LE)
+      ] and parent::expr[ ({zero_expr}) or ({one_expr})]
+    ]
+  ")
+  any_xpath <- paste(length_xpath, "|", sum_xpath)
+
+  Linter(function(source_expression) {
+    if (!is_lint_level(source_expression, "expression")) {
+      return(list())
+    }
+
+    xml <- source_expression$xml_parsed_content
+
+    any_expr <- xml2::xml_find_all(xml, any_xpath)
+
+    # TODO(michaelchirico): extend to include all()-alike expressions like
+    #   sum(x == y) == length(x) --> all(x == y). The issues are (1) we can't
+    #   just test for length() on the RHS (e.g. `sum(x == y) == length(z)` could
+    #   be totally different); (2) looking for sum(x == y) == length(x == y)
+    #   will only find a particularly silly manifestation of the issue; and
+    #   (3) there is a bit of a combinatorial explosion if we have to look for
+    #   _any_ symbol found inside the logical expression on the RHS
+    #   (e.g. sum(x == y) == [length(x) _or_ length(y)]).
+    # A first pass would be to check how many hits there are for the naive
+    #   version (i.e., condition 1), to see how worth investing in this
+    #   complicated logic is to begin with.
+
+    xml_nodes_to_lints(
+      any_expr,
+      source_expression = source_expression,
+      # TODO(michaelchirico): customize this?
+      lint_message = paste(
+        "Use any() to express logical aggregations.",
+        "For example, replace length(which(x == y)) == 0 with !any(x == y)."
+      ),
+      type = "warning"
+    )
+  })
+}
diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv
index ef06f5e57..92ee160d1 100644
--- a/inst/lintr/linters.csv
+++ b/inst/lintr/linters.csv
@@ -4,6 +4,7 @@ any_duplicated_linter,efficiency best_practices
 any_is_na_linter,efficiency best_practices
 assignment_linter,style consistency default
 backport_linter,robustness configurable package_development
+boolean_arithmetic_linter,efficiency best_practices readability
 brace_linter,style readability default configurable
 class_equals_linter,best_practices robustness consistency
 closed_curly_linter,style readability deprecated configurable
diff --git a/man/best_practices_linters.Rd b/man/best_practices_linters.Rd
index 7b53d924e..22f899dcd 100644
--- a/man/best_practices_linters.Rd
+++ b/man/best_practices_linters.Rd
@@ -15,6 +15,7 @@ The following linters are tagged with 'best_practices':
 \item{\code{\link{absolute_path_linter}}}
 \item{\code{\link{any_duplicated_linter}}}
 \item{\code{\link{any_is_na_linter}}}
+\item{\code{\link{boolean_arithmetic_linter}}}
 \item{\code{\link{class_equals_linter}}}
 \item{\code{\link{commented_code_linter}}}
 \item{\code{\link{condition_message_linter}}}
diff --git a/man/boolean_arithmetic_linter.Rd b/man/boolean_arithmetic_linter.Rd
new file mode 100644
index 000000000..26c71ef2c
--- /dev/null
+++ b/man/boolean_arithmetic_linter.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/boolean_arithmetic_linter.R
+\name{boolean_arithmetic_linter}
+\alias{boolean_arithmetic_linter}
+\title{Require usage of boolean operators over equivalent arithmetic}
+\usage{
+boolean_arithmetic_linter()
+}
+\description{
+\code{length(which(x == y)) == 0} is the same as \code{!any(x == y)}, but the latter
+is more readable and more efficient.
+}
+\details{
+#' @evalRd rd_tags("boolean_arithmetic_linter")
+}
+\seealso{
+\link{linters} for a complete list of linters available in lintr.
+}
diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd
index 67028e2b9..6ac847afa 100644
--- a/man/efficiency_linters.Rd
+++ b/man/efficiency_linters.Rd
@@ -14,6 +14,7 @@ The following linters are tagged with 'efficiency':
 \itemize{
 \item{\code{\link{any_duplicated_linter}}}
 \item{\code{\link{any_is_na_linter}}}
+\item{\code{\link{boolean_arithmetic_linter}}}
 \item{\code{\link{fixed_regex_linter}}}
 \item{\code{\link{ifelse_censor_linter}}}
 \item{\code{\link{inner_combine_linter}}}
diff --git a/man/linters.Rd b/man/linters.Rd
index 14db766c0..546f723a5 100644
--- a/man/linters.Rd
+++ b/man/linters.Rd
@@ -17,17 +17,17 @@ see also \code{\link[=available_tags]{available_tags()}}.
 \section{Tags}{
 The following tags exist:
 \itemize{
-\item{\link[=best_practices_linters]{best_practices} (40 linters)}
+\item{\link[=best_practices_linters]{best_practices} (41 linters)}
 \item{\link[=common_mistakes_linters]{common_mistakes} (7 linters)}
 \item{\link[=configurable_linters]{configurable} (20 linters)}
 \item{\link[=consistency_linters]{consistency} (17 linters)}
 \item{\link[=correctness_linters]{correctness} (7 linters)}
 \item{\link[=default_linters]{default} (24 linters)}
 \item{\link[=deprecated_linters]{deprecated} (4 linters)}
-\item{\link[=efficiency_linters]{efficiency} (18 linters)}
+\item{\link[=efficiency_linters]{efficiency} (19 linters)}
 \item{\link[=executing_linters]{executing} (5 linters)}
 \item{\link[=package_development_linters]{package_development} (14 linters)}
-\item{\link[=readability_linters]{readability} (40 linters)}
+\item{\link[=readability_linters]{readability} (41 linters)}
 \item{\link[=robustness_linters]{robustness} (12 linters)}
 \item{\link[=style_linters]{style} (36 linters)}
 }
@@ -40,6 +40,7 @@ The following linters exist:
 \item{\code{\link{any_is_na_linter}} (tags: best_practices, efficiency)}
 \item{\code{\link{assignment_linter}} (tags: consistency, default, style)}
 \item{\code{\link{backport_linter}} (tags: configurable, package_development, robustness)}
+\item{\code{\link{boolean_arithmetic_linter}} (tags: best_practices, efficiency, readability)}
 \item{\code{\link{brace_linter}} (tags: configurable, default, readability, style)}
 \item{\code{\link{class_equals_linter}} (tags: best_practices, consistency, robustness)}
 \item{\code{\link{closed_curly_linter}} (tags: configurable, deprecated, readability, style)}
diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd
index 14a06d6e0..4ca13753b 100644
--- a/man/readability_linters.Rd
+++ b/man/readability_linters.Rd
@@ -12,6 +12,7 @@ Linters highlighting readability issues, such as missing whitespace.
 \section{Linters}{
 The following linters are tagged with 'readability':
 \itemize{
+\item{\code{\link{boolean_arithmetic_linter}}}
 \item{\code{\link{brace_linter}}}
 \item{\code{\link{closed_curly_linter}}}
 \item{\code{\link{commas_linter}}}
diff --git a/tests/testthat/test-boolean_arithmetic_linter.R b/tests/testthat/test-boolean_arithmetic_linter.R
new file mode 100644
index 000000000..9b82b69c7
--- /dev/null
+++ b/tests/testthat/test-boolean_arithmetic_linter.R
@@ -0,0 +1,49 @@
+test_that("boolean_arithmetic_linter requires use of any() or !any()", {
+  linter <- boolean_arithmetic_linter()
+  lint_msg <- rex::rex("Use any() to express logical aggregations.")
+
+  expect_lint("length(which(x == y)) == 0", lint_msg, linter)
+  # anything passed to which() can be assumed to be logical
+  expect_lint("length(which(is_treatment)) == 0L", lint_msg, linter)
+  # regex version
+  expect_lint("length(grep(pattern, x)) == 0", lint_msg, linter)
+  # sum version
+  expect_lint("sum(x == y) == 0L", lint_msg, linter)
+  expect_lint("sum(grepl(pattern, x)) == 0", lint_msg, linter)
+
+  # non-== comparisons
+  expect_lint("length(which(x == y)) > 0L", lint_msg, linter)
+  expect_lint("length(which(is_treatment)) < 1", lint_msg, linter)
+  expect_lint("length(grep(pattern, x)) >= 1L", lint_msg, linter)
+  expect_lint("sum(x == y) != 0", lint_msg, linter)
+  expect_lint("sum(grepl(pattern, x)) > 0L", lint_msg, linter)
+})
+
+# TODO(michaelchirico): activate these; see code comments explaining the complication.
+# test_that("boolean_arithmetic_linter blocks comparisons to the object length", {
+#   expect_lint(
+#     "length(which(condition)) < length(condition)",
+#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
+#     boolean_arithmetic_linter
+#   )
+#   expect_lint(
+#     "length(which(is_treatment)) == length(is_treatment)",
+#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
+#     boolean_arithmetic_linter
+#   )
+#   expect_lint(
+#     "length(grep(pattern, x)) < length(x)",
+#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
+#     boolean_arithmetic_linter
+#   )
+#   expect_lint(
+#     "sum(x == y) < length(x)",
+#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
+#     boolean_arithmetic_linter
+#   )
+#   expect_lint(
+#     "sum(grepl(pattern, x)) == length(x)",
+#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
+#     boolean_arithmetic_linter
+#   )
+# })

From 7c64b48fea7543a933cbe406aa8295f38f430ba8 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Fri, 30 Sep 2022 22:11:04 +0000
Subject: [PATCH 2/5] follow-up #1580

---
 R/boolean_arithmetic_linter.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R
index 713b02324..1e805186e 100644
--- a/R/boolean_arithmetic_linter.R
+++ b/R/boolean_arithmetic_linter.R
@@ -7,9 +7,7 @@
 #' @seealso [linters] for a complete list of linters available in lintr.
 #' @export
 boolean_arithmetic_linter <- function() {
-  # TODO(michaelchirico): consider sum(x %in% y)
-  # TODO(michaelchirico): consider sum(A & B), sum(A | B), and sum(!A)
-  # TODO(michaelchirico): consider is.na, is.nan, is.finite, is.infinite, is.element
+  # TODO(#1580): sum() cases x %in% y, A [&|] B, !A, is.na/is.nan/is.finite/is.infinite/is.element
   # TODO(michaelchirico): stringr equivalents to regex functions
   zero_expr <-
     "(EQ or NE or GT or LE) and expr[NUM_CONST[text() = '0' or text() = '0L']]"

From 7e122ba2c0cb6a35835ece3c168a2921707b1676 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Fri, 30 Sep 2022 22:17:13 +0000
Subject: [PATCH 3/5] Follow-up #1581

---
 R/boolean_arithmetic_linter.R                 | 14 +--------
 .../testthat/test-boolean_arithmetic_linter.R | 29 -------------------
 2 files changed, 1 insertion(+), 42 deletions(-)

diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R
index 1e805186e..26d5f10e7 100644
--- a/R/boolean_arithmetic_linter.R
+++ b/R/boolean_arithmetic_linter.R
@@ -8,7 +8,7 @@
 #' @export
 boolean_arithmetic_linter <- function() {
   # TODO(#1580): sum() cases x %in% y, A [&|] B, !A, is.na/is.nan/is.finite/is.infinite/is.element
-  # TODO(michaelchirico): stringr equivalents to regex functions
+  # TODO(#1581): extend to include all()-alike expressions
   zero_expr <-
     "(EQ or NE or GT or LE) and expr[NUM_CONST[text() = '0' or text() = '0L']]"
   one_expr <-
@@ -43,18 +43,6 @@ boolean_arithmetic_linter <- function() {
 
     any_expr <- xml2::xml_find_all(xml, any_xpath)
 
-    # TODO(michaelchirico): extend to include all()-alike expressions like
-    #   sum(x == y) == length(x) --> all(x == y). The issues are (1) we can't
-    #   just test for length() on the RHS (e.g. `sum(x == y) == length(z)` could
-    #   be totally different); (2) looking for sum(x == y) == length(x == y)
-    #   will only find a particularly silly manifestation of the issue; and
-    #   (3) there is a bit of a combinatorial explosion if we have to look for
-    #   _any_ symbol found inside the logical expression on the RHS
-    #   (e.g. sum(x == y) == [length(x) _or_ length(y)]).
-    # A first pass would be to check how many hits there are for the naive
-    #   version (i.e., condition 1), to see how worth investing in this
-    #   complicated logic is to begin with.
-
     xml_nodes_to_lints(
       any_expr,
       source_expression = source_expression,
diff --git a/tests/testthat/test-boolean_arithmetic_linter.R b/tests/testthat/test-boolean_arithmetic_linter.R
index 9b82b69c7..0e6750f50 100644
--- a/tests/testthat/test-boolean_arithmetic_linter.R
+++ b/tests/testthat/test-boolean_arithmetic_linter.R
@@ -18,32 +18,3 @@ test_that("boolean_arithmetic_linter requires use of any() or !any()", {
   expect_lint("sum(x == y) != 0", lint_msg, linter)
   expect_lint("sum(grepl(pattern, x)) > 0L", lint_msg, linter)
 })
-
-# TODO(michaelchirico): activate these; see code comments explaining the complication.
-# test_that("boolean_arithmetic_linter blocks comparisons to the object length", {
-#   expect_lint(
-#     "length(which(condition)) < length(condition)",
-#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
-#     boolean_arithmetic_linter
-#   )
-#   expect_lint(
-#     "length(which(is_treatment)) == length(is_treatment)",
-#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
-#     boolean_arithmetic_linter
-#   )
-#   expect_lint(
-#     "length(grep(pattern, x)) < length(x)",
-#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
-#     boolean_arithmetic_linter
-#   )
-#   expect_lint(
-#     "sum(x == y) < length(x)",
-#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
-#     boolean_arithmetic_linter
-#   )
-#   expect_lint(
-#     "sum(grepl(pattern, x)) == length(x)",
-#     "grep\\(pattern, x\\) is better than which\\(grepl\\(pattern, x\\)\\)\\.",
-#     boolean_arithmetic_linter
-#   )
-# })

From 0c38d513962c2ca8111e0bebca2b8153425a213d Mon Sep 17 00:00:00 2001
From: Indrajeet Patil <patilindrajeet.science@gmail.com>
Date: Sun, 2 Oct 2022 09:46:59 +0200
Subject: [PATCH 4/5] Update R/boolean_arithmetic_linter.R

---
 R/boolean_arithmetic_linter.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/boolean_arithmetic_linter.R b/R/boolean_arithmetic_linter.R
index 26d5f10e7..17500537e 100644
--- a/R/boolean_arithmetic_linter.R
+++ b/R/boolean_arithmetic_linter.R
@@ -3,7 +3,7 @@
 #' `length(which(x == y)) == 0` is the same as `!any(x == y)`, but the latter
 #'   is more readable and more efficient.
 #'
-#' #' @evalRd rd_tags("boolean_arithmetic_linter")
+#' @evalRd rd_tags("boolean_arithmetic_linter")
 #' @seealso [linters] for a complete list of linters available in lintr.
 #' @export
 boolean_arithmetic_linter <- function() {

From 2ac1ec877e16dae6f414d2c66d6d5199e260af75 Mon Sep 17 00:00:00 2001
From: Indrajeet Patil <patilindrajeet.science@gmail.com>
Date: Sun, 2 Oct 2022 09:56:01 +0200
Subject: [PATCH 5/5] update docs and linter counts

---
 man/boolean_arithmetic_linter.Rd | 6 +++---
 man/linters.Rd                   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/man/boolean_arithmetic_linter.Rd b/man/boolean_arithmetic_linter.Rd
index 26c71ef2c..9ed4db1d2 100644
--- a/man/boolean_arithmetic_linter.Rd
+++ b/man/boolean_arithmetic_linter.Rd
@@ -10,9 +10,9 @@ boolean_arithmetic_linter()
 \code{length(which(x == y)) == 0} is the same as \code{!any(x == y)}, but the latter
 is more readable and more efficient.
 }
-\details{
-#' @evalRd rd_tags("boolean_arithmetic_linter")
-}
 \seealso{
 \link{linters} for a complete list of linters available in lintr.
 }
+\section{Tags}{
+\link[=best_practices_linters]{best_practices}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability}
+}
diff --git a/man/linters.Rd b/man/linters.Rd
index 4ab371488..f92caf059 100644
--- a/man/linters.Rd
+++ b/man/linters.Rd
@@ -17,17 +17,17 @@ see also \code{\link[=available_tags]{available_tags()}}.
 \section{Tags}{
 The following tags exist:
 \itemize{
-\item{\link[=best_practices_linters]{best_practices} (41 linters)}
+\item{\link[=best_practices_linters]{best_practices} (42 linters)}
 \item{\link[=common_mistakes_linters]{common_mistakes} (7 linters)}
 \item{\link[=configurable_linters]{configurable} (20 linters)}
 \item{\link[=consistency_linters]{consistency} (17 linters)}
 \item{\link[=correctness_linters]{correctness} (7 linters)}
 \item{\link[=default_linters]{default} (24 linters)}
 \item{\link[=deprecated_linters]{deprecated} (4 linters)}
-\item{\link[=efficiency_linters]{efficiency} (19 linters)}
+\item{\link[=efficiency_linters]{efficiency} (20 linters)}
 \item{\link[=executing_linters]{executing} (5 linters)}
 \item{\link[=package_development_linters]{package_development} (14 linters)}
-\item{\link[=readability_linters]{readability} (41 linters)}
+\item{\link[=readability_linters]{readability} (42 linters)}
 \item{\link[=robustness_linters]{robustness} (12 linters)}
 \item{\link[=style_linters]{style} (36 linters)}
 }