From 7f528d876e51a608b805fdb3c9df9eb1f3bc87f0 Mon Sep 17 00:00:00 2001 From: Indrajeet Patil Date: Mon, 3 Oct 2022 05:46:15 +0200 Subject: [PATCH] Update docs for `extraction_operator_linter()` (#1592) Closes #1584 Part of #1492 Co-authored-by: Michael Chirico --- R/extraction_operator_linter.R | 50 +++++++++++++++++++++++++++++-- R/redundant_equals_linter.R | 10 +++---- man/extraction_operator_linter.Rd | 50 +++++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 9 deletions(-) diff --git a/R/extraction_operator_linter.R b/R/extraction_operator_linter.R index daffeed32..8ca615b01 100644 --- a/R/extraction_operator_linter.R +++ b/R/extraction_operator_linter.R @@ -1,7 +1,53 @@ #' Extraction operator linter #' -#' Check that the `[[` operator is used when extracting a single element from an object, not `[` (subsetting) nor `$` -#' (interactive use). +#' Check that the `[[` operator is used when extracting a single element from an object, +#' not `[` (subsetting) nor `$` (interactive use). +#' +#' @details +#' +#' There are three subsetting operators in R (`[[`, `[`, and `$`) and they interact differently +#' with different data structures (atomic vector, list, data frame, etc.). +#' +#' Here are a few reasons to prefer the `[[` operator over `[` or `$` when you want to extract +#' an element from a data frame or a list: +#' +#' - Subsetting a list with `[` always returns a smaller list, while `[[` returns +#' the list element. +#' +#' - Subsetting a named atomic vector with `[` returns a named vector, while `[[` returns +#' the vector element. +#' +#' - Subsetting a data frame (but not tibble) with `[` is type unstable; it can return +#' a vector or a data frame. `[[`, on the other hand, always returns a vector. +#' +#' - For a data frame (but not tibble), `$` does partial matching (e.g. `df$a` will subset +#' `df$abc`), which can be a source of bugs. `[[` doesn't do partial matching. +#' +#' For data frames (and tibbles), irrespective of the size, the `[[` operator is slower than `$`. +#' For lists, however, the reverse is true. +#' +#' @examples +#' library(lintr) +#' +#' # will produce lints +#' lint( +#' text = "iris['Species']", +#' linters = extraction_operator_linter() +#' ) +#' +#' lint( +#' text = "iris$Species", +#' linters = extraction_operator_linter() +#' ) +#' +#' # okay +#' lint( +#' text = "iris[['Species']]", +#' linters = extraction_operator_linter() +#' ) +#' +#' @references +#' - Subsetting [chapter](https://adv-r.hadley.nz/subsetting.html) from _Advanced R_ (Wickham, 2019). #' #' @evalRd rd_tags("extraction_operator_linter") #' @seealso [linters] for a complete list of linters available in lintr. diff --git a/R/redundant_equals_linter.R b/R/redundant_equals_linter.R index 8c4b6105b..f6a8abae1 100644 --- a/R/redundant_equals_linter.R +++ b/R/redundant_equals_linter.R @@ -1,11 +1,11 @@ #' Block usage of `==`, `!=` on logical vectors #' #' Testing `x == TRUE` is redundant if `x` is a logical vector. Wherever this is -#' used to improve readability, the solution should instead be to improve the -#' naming of the object to better indicate that its contents are logical. This -#' can be done using prefixes (is, has, can, etc.). For example, `is_child`, -#' `has_parent_supervision`, `can_watch_horror_movie` clarify their logical -#' nature, while `child`, `parent_supervision`, `watch_horror_movie` don't. +#' used to improve readability, the solution should instead be to improve the +#' naming of the object to better indicate that its contents are logical. This +#' can be done using prefixes (is, has, can, etc.). For example, `is_child`, +#' `has_parent_supervision`, `can_watch_horror_movie` clarify their logical +#' nature, while `child`, `parent_supervision`, `watch_horror_movie` don't. #' @export redundant_equals_linter <- function() { xpath <- paste0( diff --git a/man/extraction_operator_linter.Rd b/man/extraction_operator_linter.Rd index 95d6e824d..6b6a84cf3 100644 --- a/man/extraction_operator_linter.Rd +++ b/man/extraction_operator_linter.Rd @@ -7,8 +7,54 @@ extraction_operator_linter() } \description{ -Check that the \code{[[} operator is used when extracting a single element from an object, not \code{[} (subsetting) nor \code{$} -(interactive use). +Check that the \code{[[} operator is used when extracting a single element from an object, +not \code{[} (subsetting) nor \code{$} (interactive use). +} +\details{ +There are three subsetting operators in R (\code{[[}, \code{[}, and \code{$}) and they interact differently +with different data structures (atomic vector, list, data frame, etc.). + +Here are a few reasons to prefer the \code{[[} operator over \code{[} or \code{$} when you want to extract +an element from a data frame or a list: +\itemize{ +\item Subsetting a list with \code{[} always returns a smaller list, while \code{[[} returns +the list element. +\item Subsetting a named atomic vector with \code{[} returns a named vector, while \code{[[} returns +the vector element. +\item Subsetting a data frame (but not tibble) with \code{[} is type unstable; it can return +a vector or a data frame. \code{[[}, on the other hand, always returns a vector. +\item For a data frame (but not tibble), \code{$} does partial matching (e.g. \code{df$a} will subset +\code{df$abc}), which can be a source of bugs. \code{[[} doesn't do partial matching. +} + +For data frames (and tibbles), irrespective of the size, the \code{[[} operator is slower than \code{$}. +For lists, however, the reverse is true. +} +\examples{ +library(lintr) + +# will produce lints +lint( + text = "iris['Species']", + linters = extraction_operator_linter() +) + +lint( + text = "iris$Species", + linters = extraction_operator_linter() +) + +# okay +lint( + text = "iris[['Species']]", + linters = extraction_operator_linter() +) + +} +\references{ +\itemize{ +\item Subsetting \href{https://adv-r.hadley.nz/subsetting.html}{chapter} from \emph{Advanced R} (Wickham, 2019). +} } \seealso{ \link{linters} for a complete list of linters available in lintr.