From a936cf6828411eafb0c4ff8c1cdd3c5e3f6869f3 Mon Sep 17 00:00:00 2001 From: AshesITR Date: Thu, 8 Dec 2022 07:01:52 +0100 Subject: [PATCH] safer Lint(), xml_nodes_to_lints() and lint() (#1788) * safer Lint(), xml_nodes_to_lints() and lint() * add missing newline * add more checks to Lint() fixes #763 * fix surfaced errors in our own code * fix cyclocomp * avoid apply(..., simplify = FALSE) (R >= 4.1), try fixing r-devel * delint * line should have been line_number * more line fixups * reorder column == 0 case * fix column number problems in r-devel move column_number to nchar(line) + 1L if it's larger anchor end-of-line parse error lints to nchar(line) + 1 as well instead of nchar(line) * feedback * fix tests * fix apparent typo Co-authored-by: Michael Chirico --- NEWS.md | 5 ++ R/get_source_expressions.R | 54 ++++++++++++++----- R/indentation_linter.R | 18 +++++-- R/lint.R | 44 +++++++++++++++ R/xml_nodes_to_lints.R | 12 +++++ tests/testthat/test-indentation_linter.R | 5 ++ tests/testthat/test-lint.R | 27 ++++++++++ tests/testthat/test-methods.R | 2 +- tests/testthat/test-rstudio_markers.R | 2 +- .../test-spaces_left_parentheses_linter.R | 12 +++++ tests/testthat/test-xml_nodes_to_lints.R | 48 +++++++++++++++++ 11 files changed, 211 insertions(+), 18 deletions(-) create mode 100644 tests/testthat/test-lint.R diff --git a/NEWS.md b/NEWS.md index 8683517c4..c6be365c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,11 @@ * `lint_package()` correctly finds a package from within a subdir if the `path` points to anywhere within the package (#1759, @AshesITR) +* Improved error behavior in `Lint()`, `lint()` and `xml_nodes_to_lints()` (#1427, #763, @AshesITR) + + `Lint()` validates its inputs more thoroughly, preventing errors during `print.Lints` like "Error in rep.int(character, length) : invalid 'times' value:". + + `lint()` no longer tries to create an expression tree with unexpected end of input errors, because they can be broken. + + `xml_nodes_to_lints()` warns if it can't find lint locations and uses dummy locations as a fallback. + * `linters_with_defaults()` no longer erroneously marks linter factories as linters (#1725, @AshesITR). ## Changes to defaults diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R index 0495599d2..d21898954 100644 --- a/R/get_source_expressions.R +++ b/R/get_source_expressions.R @@ -83,7 +83,7 @@ get_source_expressions <- function(filename, lines = NULL) { source_expression$content <- get_content(source_expression$lines) parsed_content <- get_source_expression(source_expression, error = function(e) lint_parse_error(e, source_expression)) - if (inherits(e, "lint") && (is.na(e$line) || !nzchar(e$line))) { + if (inherits(e, "lint") && (is.na(e$line) || !nzchar(e$line) || e$message == "unexpected end of input")) { # Don't create expression list if it's unreliable (invalid encoding or unhandled parse error) expressions <- list() } else { @@ -152,6 +152,30 @@ lint_parse_error <- function(e, source_expression) { lint_parse_error_nonstandard(e, source_expression) } +#' Ensure a string is valid for printing +#' +#' Helper to ensure a valid string is provided as line where necessary. +#' Handles two cases: +#' +#' 1. NA lines, as generated by running outside of code blocks in Rmd documents +#' 2. invalid string lines, as generated by invalid encoding settings +#' +#' [nchar()] can detect both cases: 1. returns `NA_integer_`, 2. errors +#' +#' @param line Possibly misencoded or NA line +#' +#' @return `line` if it's a valid non-NA string, otherwise an empty string. +#' +#' @noRd +fixup_line <- function(line) { + nchars <- tryCatch(nchar(line, type = "chars"), error = function(e) NA_integer_) + if (is.na(nchars)) { + "" + } else { + line + } +} + #' Convert an R >= 4.3.0 classed parseError with metadata into a lint #' #' @param e A parse error of class `parseError` generated by R >= 4.3.0 @@ -162,7 +186,7 @@ lint_parse_error <- function(e, source_expression) { #' @noRd lint_parse_error_r43 <- function(e, source_expression) { msg <- rex::re_substitutes(e$message, rex::rex(" (", except_some_of(")"), ")", end), "") - line <- e$lineno + line_number <- e$lineno column <- e$colno substr(msg, 1L, 1L) <- toupper(substr(msg, 1L, 1L)) msg <- paste0(msg, ".") @@ -172,23 +196,27 @@ lint_parse_error_r43 <- function(e, source_expression) { } if (column == 0L) { - line <- line - 1L - column <- nchar(source_expression$lines[[line]]) + line_number <- line_number - 1L } - if (line < 1L || line > length(source_expression$lines)) { + if (line_number < 1L || line_number > length(source_expression$lines)) { # Safely handle invalid location info - line <- 1L + line_number <- 1L column <- 1L } + line <- fixup_line(source_expression$lines[[line_number]]) + if (column == 0L || column > nchar(line) + 1L) { + column <- nchar(line) + 1L + } + Lint( filename = source_expression$filename, - line_number = line, + line_number = line_number, column_number = column, type = "error", message = msg, - line = source_expression$lines[[line]] + line = line ) } @@ -208,10 +236,10 @@ lint_parse_error_r42 <- function(message_info, source_expression) { # end of the previous line if (column_number %==% 0L) { line_number <- line_number - 1L - line <- source_expression$lines[[line_number]] - column_number <- nchar(line) + line <- fixup_line(source_expression$lines[[line_number]]) + column_number <- nchar(line) + 1L } else { - line <- source_expression$lines[[line_number]] + line <- fixup_line(source_expression$lines[[line_number]]) } Lint( @@ -249,7 +277,7 @@ lint_parse_error_nonstandard <- function(e, source_expression) { column_number = 1L, type = "error", message = "Invalid multibyte character in parser. Is the encoding correct?", - line = source_expression$lines[[l]] + line = fixup_line(source_expression$lines[[l]]) ) ) } else if (grepl("invalid multibyte string, element", e$message, fixed = TRUE)) { @@ -284,7 +312,7 @@ lint_parse_error_nonstandard <- function(e, source_expression) { column_number = 1L, type = "error", message = sprintf("Repeated formal argument '%s'.", sym), - line = source_expression$lines[[l]] + line = fixup_line(source_expression$lines[[l]]) ) ) } diff --git a/R/indentation_linter.R b/R/indentation_linter.R index 34c5c6a10..6fc59f2e7 100644 --- a/R/indentation_linter.R +++ b/R/indentation_linter.R @@ -184,7 +184,7 @@ indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "al change_type <- find_indent_type(change) change_begin <- as.integer(xml2::xml_attr(change, "line1")) + 1L change_end <- xml2::xml_find_num(change, xp_block_ends) - if (change_begin <= change_end) { + if (isTRUE(change_begin <= change_end)) { to_indent <- seq(from = change_begin, to = change_end) if (change_type == "hanging") { expected_indent_levels[to_indent] <- as.integer(xml2::xml_attr(change, "col2")) @@ -231,7 +231,12 @@ indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "al lint_lines <- unname(as.integer(names(source_expression$file_lines)[bad_lines])) lint_ranges <- cbind( pmin(expected_indent_levels[bad_lines] + 1L, indent_levels[bad_lines]), - pmax(expected_indent_levels[bad_lines], indent_levels[bad_lines]) + # If the expected indent is larger than the current line width, the lint range would become invalid. + # Therefor, limit range end to end of line. + pmin( + pmax(expected_indent_levels[bad_lines], indent_levels[bad_lines]), + nchar(source_expression$file_lines[bad_lines]) + 1L + ) ) Map( Lint, @@ -241,7 +246,14 @@ indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "al type = "style", message = lint_messages, line = unname(source_expression$file_lines[bad_lines]), - ranges = apply(lint_ranges, 1L, list, simplify = FALSE) + # TODO(AshesITR) when updating supported R version to R >= 4.1: + # replace by ranges = apply(lint_ranges, 1L, list, simplify = FALSE) + ranges = lapply( + seq_along(bad_lines), + function(i) { + list(lint_ranges[i, ]) + } + ) ) } else { list() diff --git a/R/lint.R b/R/lint.R index 2a2e8a10a..feb27aaf0 100644 --- a/R/lint.R +++ b/R/lint.R @@ -402,6 +402,21 @@ Lint <- function(filename, line_number = 1L, column_number = 1L, # nolint: objec ) } + if (length(line) != 1L || !is.character(line)) { + stop("`line` must be a string.") + } + max_col <- max(nchar(line) + 1L, 1L, na.rm = TRUE) + if (!is_number(column_number) || column_number < 0L || column_number > max_col) { + stop(sprintf( + "`column_number` must be an integer between 0 and nchar(line) + 1 (%d). It was %s.", + max_col, column_number + )) + } + if (!is_number(line_number) || line_number < 1L) { + stop(sprintf("`line_number` must be a positive integer. It was %s.", line_number)) + } + check_ranges(ranges, max_col) + type <- match.arg(type) obj <- list( @@ -418,6 +433,35 @@ Lint <- function(filename, line_number = 1L, column_number = 1L, # nolint: objec obj } +is_number <- function(number, n = 1L) { + length(number) == n && is.numeric(number) && !anyNA(number) +} + +is_valid_range <- function(range, max_col) { + 0L <= range[[1L]] && + range[[1L]] <= range[[2L]] && + range[[2L]] <= max_col +} + +check_ranges <- function(ranges, max_col) { + if (is.null(ranges)) { + return() + } + if (!is.list(ranges)) { + stop("`ranges` must be NULL or a list.") + } + + for (range in ranges) { + if (!is_number(range, 2L)) { + stop("`ranges` must only contain length 2 integer vectors without NAs.") + } else if (!is_valid_range(range, max_col)) { + stop(sprintf( + "All entries in `ranges` must satisfy 0 <= range[1L] <= range[2L] <= nchar(line) + 1 (%d).", max_col + )) + } + } +} + rstudio_source_markers <- function(lints) { if (!requireNamespace("rstudioapi", quietly = TRUE)) { stop("'rstudioapi' is required for rstudio_source_markers().") # nocov diff --git a/R/xml_nodes_to_lints.R b/R/xml_nodes_to_lints.R index 6842c5734..759cf7554 100644 --- a/R/xml_nodes_to_lints.R +++ b/R/xml_nodes_to_lints.R @@ -61,17 +61,29 @@ xml_nodes_to_lints <- function(xml, source_expression, lint_message, type <- match.arg(type, c("style", "warning", "error")) line1 <- xml2::xml_attr(xml, "line1") col1 <- xp_find_location(xml, range_start_xpath) + if (is.na(col1)) { + warning("Could not find range start for lint. Defaulting to start of line.") + col1 <- 1L + } lines <- source_expression[["lines"]] if (is.null(lines)) lines <- source_expression[["file_lines"]] if (xml2::xml_attr(xml, "line2") == line1) { col2 <- xp_find_location(xml, range_end_xpath) + if (is.na(col2)) { + warning("Could not find range end for lint. Defaulting to width 1.") + col2 <- col1 + } } else { col2 <- nchar(lines[[line1]]) } column_number <- xp_find_location(xml, column_number_xpath) + if (is.na(column_number)) { + warning("Could not find location for lint. Defaulting to start of range.") + column_number <- col1 + } Lint( filename = source_expression$filename, diff --git a/tests/testthat/test-indentation_linter.R b/tests/testthat/test-indentation_linter.R index 57cfb19c9..b50b676f0 100644 --- a/tests/testthat/test-indentation_linter.R +++ b/tests/testthat/test-indentation_linter.R @@ -690,3 +690,8 @@ test_that("native pipe is supported", { linter ) }) + +test_that("it doesn't error on invalid code", { + # Part of #1427 + expect_lint("function() {)", list(linter = "error", message = rex::rex("unexpected ')'")), indentation_linter()) +}) diff --git a/tests/testthat/test-lint.R b/tests/testthat/test-lint.R new file mode 100644 index 000000000..3d081a1af --- /dev/null +++ b/tests/testthat/test-lint.R @@ -0,0 +1,27 @@ +test_that("Lint() errors on invalid input", { + dummy_line <- "abc" + expect_error( + Lint("dummy.R", line = dummy_line, column_number = NA_integer_), + rex::rex("`column_number` must be an integer between 0 and nchar(line) + 1 (4). It was NA.") + ) + expect_error( + Lint("dummy.R", line = dummy_line, line_number = 0L), + rex::rex("`line_number` must be a positive integer. It was 0.") + ) + expect_error( + Lint("dummy.R", ranges = c(1L, 3L)), + rex::rex("`ranges` must be NULL or a list.") + ) + expect_error( + Lint("dummy.R", ranges = list(1L)), + rex::rex("`ranges` must only contain length 2 integer vectors without NAs.") + ) + expect_error( + Lint("dummy.R", ranges = list(c(1L, NA_integer_))), + rex::rex("`ranges` must only contain length 2 integer vectors without NAs.") + ) + expect_error( + Lint("dummy.R", line = dummy_line, ranges = list(c(1L, 2L), c(1L, 5L))), + rex::rex("All entries in `ranges` must satisfy 0 <= range[1L] <= range[2L] <= nchar(line) + 1 (4).") + ) +}) diff --git a/tests/testthat/test-methods.R b/tests/testthat/test-methods.R index 26c77ba23..719667a1c 100644 --- a/tests/testthat/test-methods.R +++ b/tests/testthat/test-methods.R @@ -42,7 +42,7 @@ test_that("as.data.frame.lints", { type = "error", message = "Under no circumstances is the use of foobar allowed.", line = "a <- 1", - ranges = list(c(1L, 2L), c(10L, 20L)) + ranges = list(c(1L, 2L), c(6L, 7L)) ), "lint" ) diff --git a/tests/testthat/test-rstudio_markers.R b/tests/testthat/test-rstudio_markers.R index b6896780e..ca21a35fe 100644 --- a/tests/testthat/test-rstudio_markers.R +++ b/tests/testthat/test-rstudio_markers.R @@ -40,7 +40,7 @@ test_that("it returns markers which match lints", { Lint( filename = "test_file2", line_number = 10L, - column_number = 5L, + column_number = 1L, type = "warning", message = "test a message" ) diff --git a/tests/testthat/test-spaces_left_parentheses_linter.R b/tests/testthat/test-spaces_left_parentheses_linter.R index 707d42102..a26a03f44 100644 --- a/tests/testthat/test-spaces_left_parentheses_linter.R +++ b/tests/testthat/test-spaces_left_parentheses_linter.R @@ -73,3 +73,15 @@ test_that("doesn't produce a warning", { ) ) }) + +test_that("it doesn't produce invalid lints", { + # Part of #1427 + expect_warning( + expect_lint( + "function() {)", + list(list(linter = "function_left_parentheses_linter", ranges = list(c(9L, 9L))), list(linter = "error")), + function_left_parentheses_linter() + ), + rex::rex("Could not find range end for lint. Defaulting to width 1.") + ) +}) diff --git a/tests/testthat/test-xml_nodes_to_lints.R b/tests/testthat/test-xml_nodes_to_lints.R index 9abbbb7fd..3a6a89c1e 100644 --- a/tests/testthat/test-xml_nodes_to_lints.R +++ b/tests/testthat/test-xml_nodes_to_lints.R @@ -86,6 +86,54 @@ test_that("it handles multi-line lints correctly", { expect_identical(l$ranges, list(as.integer(c(xml2::xml_attr(node, "col1"), nchar(code[1L]))))) }) +test_that("it doesn't produce invalid lints", { + # Part of #1427 + code <- "before %+% after" + tmpfile <- withr::local_tempfile(lines = code) + expr <- get_source_expressions(tmpfile)$expressions[[2L]] + xml <- expr$full_xml_parsed_content + node <- xml2::xml_find_first(xml, "//SPECIAL") + + # We can produce invalid location xpaths by requiring any non-existent node + xp_column_number <- "number(./preceding-sibling::*[1]/@col2 + 1)" + xp_range_start <- "number(./preceding-sibling::*[1]/@col1)" + xp_range_end <- "number(./following-sibling::*[1]/@col2)" + xp_invalid <- "number(./DOES-NOT-EXIST/@col1)" + + expect_warning(l_invalid_loc1 <- xml_nodes_to_lints( + xml = node, + source_expression = expr, + lint_message = "lint_msg", + column_number_xpath = xp_column_number, + range_start_xpath = xp_invalid, + range_end_xpath = xp_range_end + ), rex::rex("Could not find range start for lint. Defaulting to start of line.")) + expect_identical(l_invalid_loc1$column_number, nchar("before") + 1L) + expect_identical(l_invalid_loc1$ranges, list(c(1L, nchar(code)))) + + expect_warning(l_invalid_loc2 <- xml_nodes_to_lints( + xml = node, + source_expression = expr, + lint_message = "lint_msg", + column_number_xpath = xp_column_number, + range_start_xpath = xp_range_start, + range_end_xpath = xp_invalid + ), rex::rex("Could not find range end for lint. Defaulting to width 1.")) + expect_identical(l_invalid_loc2$column_number, nchar("before") + 1L) + expect_identical(l_invalid_loc2$ranges, list(c(1L, 1L))) + + expect_warning(l_invalid_col <- xml_nodes_to_lints( + xml = node, + source_expression = expr, + lint_message = "lint_msg", + column_number_xpath = xp_invalid, + range_start_xpath = xp_range_start, + range_end_xpath = xp_range_end + ), rex::rex("Could not find location for lint. Defaulting to start of range.")) + expect_identical(l_invalid_col$column_number, 1L) + expect_identical(l_invalid_col$ranges, list(c(1L, nchar(code)))) +}) + test_that("erroneous input errors", { expect_error(xml_nodes_to_lints(1L), "Expected an xml_nodeset", fixed = TRUE) })