diff --git a/DESCRIPTION b/DESCRIPTION index cbc7a1937..7e3899917 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -150,6 +150,7 @@ Collate: 'redundant_ifelse_linter.R' 'regex_subset_linter.R' 'routine_registration_linter.R' + 'scalar_in_linter.R' 'semicolon_linter.R' 'seq_linter.R' 'settings.R' diff --git a/NAMESPACE b/NAMESPACE index 75cba1df0..6c63fae68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -115,6 +115,7 @@ export(redundant_ifelse_linter) export(regex_subset_linter) export(routine_registration_linter) export(sarif_output) +export(scalar_in_linter) export(semicolon_linter) export(semicolon_terminator_linter) export(seq_linter) diff --git a/NEWS.md b/NEWS.md index f5e678d70..b5459598b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -39,6 +39,7 @@ * `library_call_linter()` can detect if all library/require calls are not at the top of your script (#2027 and #2043, @nicholas-masel and @MichaelChirico). * `keyword_quote_linter()` for finding unnecessary or discouraged quoting of symbols in assignment, function arguments, or extraction (part of #884, @MichaelChirico). Quoting is unnecessary when the target is a valid R name, e.g. `c("a" = 1)` can be `c(a = 1)`. The same goes to assignment (`"a" <- 1`) and extraction (`x$"a"`). Where quoting is necessary, the linter encourages doing so with backticks (e.g. `` x$`a b` `` instead of `x$"a b"`). * `length_levels_linter()` for using the specific function `nlevels()` instead of checking `length(levels(x))` (part of #884, @MichaelChirico). +* `scalar_in_linter()` for discouraging `%in%` when the right-hand side is a scalar, e.g. `x %in% 1` (part of #884, @MichaelChirico). * `if_not_else_linter()` for encouraging `if` statements to be structured as `if (A) x else y` instead of `if (!A) y else x` (part of #884, @MichaelChirico). * `commas_linter()` gains an option `allow_trailing` (default `FALSE`) to allow trailing commas while indexing. (#2104, @MEO265) diff --git a/R/scalar_in_linter.R b/R/scalar_in_linter.R new file mode 100644 index 000000000..ea64db002 --- /dev/null +++ b/R/scalar_in_linter.R @@ -0,0 +1,41 @@ +#' Block usage like x %in% "a" +#' +#' `vector %in% set` is appropriate for matching a vector to a set, but if +#' that set has size 1, `==` is more appropriate. `%chin%` from `data.table` +#' is matched as well. +#' +#' `scalar %in% vector` is OK, because the alternative (`any(vector == scalar)`) +#' is more circuitous & potentially less clear. +#' +#' @evalRd rd_tags("scalar_in_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +scalar_in_linter <- function() { + # TODO(#2085): Extend to include other cases where the RHS is clearly a scalar + # NB: all of logical, integer, double, hex, complex are parsed as NUM_CONST + xpath <- " + //SPECIAL[text() = '%in%' or text() = '%chin%'] + /following-sibling::expr[NUM_CONST[not(starts-with(text(), 'NA'))] or STR_CONST] + /parent::expr + " + + return(Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + bad_expr <- xml_find_all(xml, xpath) + in_op <- xml_find_chr(bad_expr, "string(SPECIAL)") + lint_msg <- + paste0("Use == to match length-1 scalars, not ", in_op, ". Note that == preserves NA where ", in_op, " does not.") + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = lint_msg, + type = "warning" + ) + })) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index d49834aba..39521e81a 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -72,6 +72,7 @@ redundant_equals_linter,best_practices readability efficiency common_mistakes redundant_ifelse_linter,best_practices efficiency consistency configurable regex_subset_linter,best_practices efficiency routine_registration_linter,best_practices efficiency robustness +scalar_in_linter,readability consistency best_practices efficiency semicolon_linter,style readability default configurable semicolon_terminator_linter,style readability deprecated configurable seq_linter,robustness efficiency consistency best_practices default diff --git a/man/best_practices_linters.Rd b/man/best_practices_linters.Rd index 99b27e23f..6016a5f66 100644 --- a/man/best_practices_linters.Rd +++ b/man/best_practices_linters.Rd @@ -51,6 +51,7 @@ The following linters are tagged with 'best_practices': \item{\code{\link{redundant_ifelse_linter}}} \item{\code{\link{regex_subset_linter}}} \item{\code{\link{routine_registration_linter}}} +\item{\code{\link{scalar_in_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{sort_linter}}} \item{\code{\link{system_file_linter}}} diff --git a/man/consistency_linters.Rd b/man/consistency_linters.Rd index e084d2c6f..536c8b53d 100644 --- a/man/consistency_linters.Rd +++ b/man/consistency_linters.Rd @@ -30,6 +30,7 @@ The following linters are tagged with 'consistency': \item{\code{\link{paste_linter}}} \item{\code{\link{quotes_linter}}} \item{\code{\link{redundant_ifelse_linter}}} +\item{\code{\link{scalar_in_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{system_file_linter}}} \item{\code{\link{T_and_F_symbol_linter}}} diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index f2f27d3dc..cc5931d4b 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -27,6 +27,7 @@ The following linters are tagged with 'efficiency': \item{\code{\link{redundant_ifelse_linter}}} \item{\code{\link{regex_subset_linter}}} \item{\code{\link{routine_registration_linter}}} +\item{\code{\link{scalar_in_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{sort_linter}}} \item{\code{\link{string_boundary_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index 60f7ae87b..4b96359e2 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -17,18 +17,18 @@ see also \code{\link[=available_tags]{available_tags()}}. \section{Tags}{ The following tags exist: \itemize{ -\item{\link[=best_practices_linters]{best_practices} (52 linters)} +\item{\link[=best_practices_linters]{best_practices} (53 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (7 linters)} \item{\link[=configurable_linters]{configurable} (32 linters)} -\item{\link[=consistency_linters]{consistency} (21 linters)} +\item{\link[=consistency_linters]{consistency} (22 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (8 linters)} -\item{\link[=efficiency_linters]{efficiency} (23 linters)} +\item{\link[=efficiency_linters]{efficiency} (24 linters)} \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (51 linters)} +\item{\link[=readability_linters]{readability} (52 linters)} \item{\link[=robustness_linters]{robustness} (14 linters)} \item{\link[=style_linters]{style} (36 linters)} } @@ -104,6 +104,7 @@ The following linters exist: \item{\code{\link{redundant_ifelse_linter}} (tags: best_practices, configurable, consistency, efficiency)} \item{\code{\link{regex_subset_linter}} (tags: best_practices, efficiency)} \item{\code{\link{routine_registration_linter}} (tags: best_practices, efficiency, robustness)} +\item{\code{\link{scalar_in_linter}} (tags: best_practices, consistency, efficiency, readability)} \item{\code{\link{semicolon_linter}} (tags: configurable, default, readability, style)} \item{\code{\link{seq_linter}} (tags: best_practices, consistency, default, efficiency, robustness)} \item{\code{\link{sort_linter}} (tags: best_practices, efficiency, readability)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index ddc74a30b..a3ff8f846 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -50,6 +50,7 @@ The following linters are tagged with 'readability': \item{\code{\link{pipe_continuation_linter}}} \item{\code{\link{quotes_linter}}} \item{\code{\link{redundant_equals_linter}}} +\item{\code{\link{scalar_in_linter}}} \item{\code{\link{semicolon_linter}}} \item{\code{\link{sort_linter}}} \item{\code{\link{spaces_inside_linter}}} diff --git a/man/scalar_in_linter.Rd b/man/scalar_in_linter.Rd new file mode 100644 index 000000000..3108ca8c5 --- /dev/null +++ b/man/scalar_in_linter.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scalar_in_linter.R +\name{scalar_in_linter} +\alias{scalar_in_linter} +\title{Block usage like x \%in\% "a"} +\usage{ +scalar_in_linter() +} +\description{ +\code{vector \%in\% set} is appropriate for matching a vector to a set, but if +that set has size 1, \code{==} is more appropriate. \verb{\%chin\%} from \code{data.table} +is matched as well. +} +\details{ +\code{scalar \%in\% vector} is OK, because the alternative (\code{any(vector == scalar)}) +is more circuitous & potentially less clear. +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} +\section{Tags}{ +\link[=best_practices_linters]{best_practices}, \link[=consistency_linters]{consistency}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} +} diff --git a/tests/testthat/test-scalar_in_linter.R b/tests/testthat/test-scalar_in_linter.R new file mode 100644 index 000000000..215639251 --- /dev/null +++ b/tests/testthat/test-scalar_in_linter.R @@ -0,0 +1,37 @@ +test_that("scalar_in_linter skips allowed usages", { + linter <- scalar_in_linter() + + expect_lint("x %in% y", NULL, linter) + expect_lint("y %in% c('a', 'b')", NULL, linter) + expect_lint("c('a', 'b') %chin% x", NULL, linter) + expect_lint("z %in% 1:3", NULL, linter) + # scalars on LHS are fine (often used as `"col" %in% names(DF)`) + expect_lint("3L %in% x", NULL, linter) + + # this should be is.na(x), but it more directly uses the "always TRUE/FALSE, _not_ NA" + # aspect of %in%, so we delegate linting here to equals_na_linter() + expect_lint("x %in% NA", NULL, linter) + expect_lint("x %in% NA_character_", NULL, linter) +}) + +test_that("scalar_in_linter blocks simple disallowed usages", { + linter <- scalar_in_linter() + lint_in_msg <- rex::rex("Use == to match length-1 scalars, not %in%.") + lint_chin_msg <- rex::rex("Use == to match length-1 scalars, not %chin%.") + + expect_lint("x %in% 1", lint_in_msg, linter) + expect_lint("x %chin% 'a'", lint_chin_msg, linter) +}) + +test_that("multiple lints are generated correctly", { + linter <- scalar_in_linter() + + expect_lint( + trim_some('{ + x %in% 1 + y %chin% "a" + }'), + list("%in%", "%chin%"), + linter + ) +})